From 8bf20dd1ca8b6b773ff0a773c09a845a9be42a96 Mon Sep 17 00:00:00 2001 From: qiaojbao Date: Thu, 8 Aug 2024 14:16:23 +0800 Subject: [PATCH] Update xgl from commit 3def0891 Update Khronos Vulkan Headers to 1.3.292 [CMake] Use FindPython3 Add missing post-command debug barrier on PipelineBarrier Reserve userdata nodes to raytracing for empty layout Fix compiler warning in Graphics Pipeline code. Eliminate BindNullPipeline Fix failure in VKD3D tests - test_renderpass_rendering Add app profile for SeriousSam4 NV_DGC: GPU Memory Allocation [Graphics Pipeline] Fix a typo for vertex input interface library Delete old settings codegen template NV_DGC: Custom Index Type Use const reference for getting Pal::DeviceProperties Support RRA UserMarker Fix the wrong trace ray addr Added new setting option expandHiZRangeForResummarize. Added new app profile, setting and detection for RainbowSixExtraction Fix component format VK_KHR_maintenance7 - Driver Implementation Report present support only on compute capable queues Correct indenting in OverrideProfiledSettings in Settings.cpp Use 0 as not ready for TimestampQuery Added support for SRV Fix TimestampQuery initialize value Change continuations mode heuristic Handle repeated #if in app_profile.h Fix GetImageFormatProperties for FORMAT_NOT_SUPPORTED split Ray Tracing task Make DisablePatchInterpMode exposed Enable vertex offset mode for LLPC Support dynamic primitive topology for LLPC Remove option UseShaderLibraryForPipelineLibraryFastLink Fix vkQueueSubmit2 does not respect VkCommandBufferSubmitInfo::deviceMask Update LLPC version to 74 Adding options to wait for vkDeviceWaitIdle() infinitely Remove calling convention workaround for Doom Eternal Workaround for Starfield game hang Fix 7 Days to Die corruption Update PAL Version to 887 ImageTilingPreference3dGpuWritable: Add a check for sparse images Clean up ray tracing mode settings NV_DGC: Token Offset Fix textures missing in Starfield --- CMakeLists.txt | 14 +- cmake/XglHelper.cmake | 1 + cmake/XglVersions.cmake | 4 +- icd/CMakeLists.txt | 1 + icd/Loader/LunarG/Lnx/amd-icd.json | 4 +- icd/api/app_profile.cpp | 52 +- .../Navi31/RainbowSixExtraction/profile.json | 29 + icd/api/appopt/split_raytracing_layer.cpp | 203 ++ icd/api/appopt/split_raytracing_layer.h | 65 + icd/api/compiler_solution.cpp | 1 + icd/api/compiler_solution_llpc.cpp | 31 +- icd/api/devmode/devmode_mgr.h | 11 + icd/api/devmode/devmode_rgp.cpp | 102 +- icd/api/devmode/devmode_rgp.h | 26 + icd/api/devmode/devmode_ubertrace.cpp | 113 +- icd/api/devmode/devmode_ubertrace.h | 16 + icd/api/entry.cpp | 7 + icd/api/graphics_pipeline_common.cpp | 104 +- icd/api/include/app_profile.h | 5 +- icd/api/include/color_space_helper.h | 2 +- icd/api/include/compiler_solution.h | 8 +- icd/api/include/compiler_solution_llpc.h | 6 - icd/api/include/graphics_pipeline_common.h | 28 +- .../khronos/sdk-1.3/vulkan/vulkan_core.h | 182 +- icd/api/include/pipeline_compiler.h | 5 - icd/api/include/vk_cmdbuffer.h | 37 +- icd/api/include/vk_compute_pipeline.h | 2 - icd/api/include/vk_conv.h | 10 +- icd/api/include/vk_descriptor_set_layout.h | 12 - icd/api/include/vk_event.h | 6 +- icd/api/include/vk_extensions.h | 4 + icd/api/include/vk_graphics_pipeline.h | 42 +- .../include/vk_graphics_pipeline_library.h | 15 + icd/api/include/vk_indirect_commands_layout.h | 38 +- icd/api/include/vk_physical_device.h | 3 - icd/api/include/vk_pipeline.h | 10 +- icd/api/include/vk_pipeline_layout.h | 4 +- icd/api/include/vk_query.h | 2 + icd/api/include/vk_render_pass.h | 5 - icd/api/pipeline_compiler.cpp | 82 +- icd/api/raytrace/ray_tracing_device.cpp | 42 +- icd/api/raytrace/ray_tracing_device.h | 6 + icd/api/raytrace/vk_ray_tracing_pipeline.cpp | 290 ++- icd/api/raytrace/vk_ray_tracing_pipeline.h | 24 +- icd/api/renderpass/renderpass_builder.cpp | 5 +- icd/api/sqtt/sqtt_layer.cpp | 85 +- icd/api/sqtt/sqtt_layer.h | 17 + icd/api/strings/entry_points.txt | 1 + icd/api/strings/extensions.txt | 2 + icd/api/vk_cmdbuffer.cpp | 491 +++-- icd/api/vk_cmdbuffer_transfer.cpp | 25 +- icd/api/vk_compute_pipeline.cpp | 15 - icd/api/vk_conv.cpp | 46 +- icd/api/vk_descriptor_pool.cpp | 65 +- icd/api/vk_descriptor_set_layout.cpp | 5 - icd/api/vk_device.cpp | 43 +- icd/api/vk_dispatch.cpp | 1 + icd/api/vk_event.cpp | 10 +- icd/api/vk_graphics_pipeline.cpp | 521 +---- icd/api/vk_graphics_pipeline_library.cpp | 235 +- icd/api/vk_image.cpp | 10 +- icd/api/vk_indirect_commands_layout.cpp | 297 ++- icd/api/vk_instance.cpp | 4 +- icd/api/vk_physical_device.cpp | 61 +- icd/api/vk_physical_device_manager.cpp | 4 +- icd/api/vk_pipeline.cpp | 4 +- icd/api/vk_pipeline_cache.cpp | 2 + icd/api/vk_pipeline_layout.cpp | 126 +- icd/api/vk_query.cpp | 33 +- icd/api/vk_queue.cpp | 33 +- icd/api/vk_sampler.cpp | 3 +- icd/api/vk_semaphore.cpp | 1 - icd/api/vk_shader.cpp | 5 - icd/res/ver.h | 4 +- icd/settings/settings.cpp | 1903 +++++++++-------- icd/settings/settings_xgl.json | 197 +- icd/tools/generate/genSettingsCode.py | 120 -- icd/tools/generate/genShaderProfile.py | 9 +- .../generate/vulkanSettingsCodeTemplates.py | 213 -- 79 files changed, 3283 insertions(+), 2967 deletions(-) create mode 100644 icd/api/appopt/shader_profiles/llpc/gfxIp11_0/Navi31/RainbowSixExtraction/profile.json create mode 100644 icd/api/appopt/split_raytracing_layer.cpp create mode 100644 icd/api/appopt/split_raytracing_layer.h delete mode 100644 icd/tools/generate/genSettingsCode.py delete mode 100644 icd/tools/generate/vulkanSettingsCodeTemplates.py diff --git a/CMakeLists.txt b/CMakeLists.txt index adc8c7bd..40cd61cf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -210,19 +210,13 @@ string(APPEND CMAKE_CXX_FLAGS "${ICD_GCOV_COMPILE_FLAGS}") ### Generator Dependencies ############################################################################################ # Python3 -find_package(PythonInterp 3) -if(NOT PYTHONINTERP_FOUND) +find_package(Python3 3.6) +if(NOT Python3_FOUND) if(UNIX) - message(FATAL_ERROR "Python 3 is needed to generate some source files.") - endif() -else() - if(UNIX) - if(${PYTHON_VERSION_MINOR} LESS "6") - message(FATAL_ERROR "Python 3.6(CPython) or higher is needed to generate some source files.") - endif() + message(FATAL_ERROR "Python 3.6 or higher is needed to generate some source files.") endif() endif() -set(PYTHON_CMD ${PYTHON_EXECUTABLE}) +set(PYTHON_CMD ${Python3_EXECUTABLE}) # Perl find_package(Perl 5) diff --git a/cmake/XglHelper.cmake b/cmake/XglHelper.cmake index aaba5869..d8afa3f4 100644 --- a/cmake/XglHelper.cmake +++ b/cmake/XglHelper.cmake @@ -66,3 +66,4 @@ macro(xgl_append_gcov_coverage_flags) message(FATAL_ERROR "Unknown compiler ID: ${CMAKE_CXX_COMPILER_ID}") endif() endmacro() + diff --git a/cmake/XglVersions.cmake b/cmake/XglVersions.cmake index 2f92310a..f1276f4c 100644 --- a/cmake/XglVersions.cmake +++ b/cmake/XglVersions.cmake @@ -28,7 +28,7 @@ include_guard() # This will become the value of PAL_CLIENT_INTERFACE_MAJOR_VERSION. It describes the version of the PAL interface # that the ICD supports. PAL uses this value to enable backwards-compatibility for older interface versions. # It must be updated on each PAL promotion after handling all of the interface changes described in palLib.h. -set(ICD_PAL_CLIENT_MAJOR_VERSION "880") +set(ICD_PAL_CLIENT_MAJOR_VERSION "887") # This will become the value of GPUOPEN_CLIENT_INTERFACE_MAJOR_VERSION if ICD_GPUOPEN_DEVMODE_BUILD=1. # It describes the interface version of the gpuopen shared module (part of PAL) that the ICD supports. @@ -42,4 +42,4 @@ set(ICD_GPURT_CLIENT_MAJOR_VERSION "46") # This will become the value of LLPC_CLIENT_INTERFACE_MAJOR_VERSION if ICD_BUILD_LLPC=1. # It describes the version of the interface version of LLPC that the ICD supports. -set(ICD_LLPC_CLIENT_MAJOR_VERSION "71") +set(ICD_LLPC_CLIENT_MAJOR_VERSION "74") diff --git a/icd/CMakeLists.txt b/icd/CMakeLists.txt index 3eefab43..52531fd9 100644 --- a/icd/CMakeLists.txt +++ b/icd/CMakeLists.txt @@ -170,6 +170,7 @@ if (VKI_RAY_TRACING) api/raytrace/ray_tracing_device.cpp api/vk_deferred_operation.cpp api/appopt/bvh_batch_layer.cpp + api/appopt/split_raytracing_layer.cpp ) endif() #endif diff --git a/icd/Loader/LunarG/Lnx/amd-icd.json b/icd/Loader/LunarG/Lnx/amd-icd.json index 6eb99348..2ae28859 100644 --- a/icd/Loader/LunarG/Lnx/amd-icd.json +++ b/icd/Loader/LunarG/Lnx/amd-icd.json @@ -2,13 +2,13 @@ "file_format_version": "1.0.0", "ICD": { "library_path": "@AMDVLK_INSTALL_PATH@/amdvlk@ISABITS@.so", - "api_version": "1.3.287" + "api_version": "1.3.292" }, "layer": { "name": "VK_LAYER_AMD_switchable_graphics_@ISABITS@", "type": "GLOBAL", "library_path": "@AMDVLK_INSTALL_PATH@/amdvlk@ISABITS@.so", - "api_version": "1.3.287", + "api_version": "1.3.292", "implementation_version": "1", "description": "AMD switchable graphics layer", "functions": { diff --git a/icd/api/app_profile.cpp b/icd/api/app_profile.cpp index c3013a5f..a5984c29 100644 --- a/icd/api/app_profile.cpp +++ b/icd/api/app_profile.cpp @@ -462,10 +462,10 @@ constexpr AppProfilePatternEntry AppNameRainbowSixSiege = "rainbow six siege" }; -constexpr AppProfilePatternEntry AppNameHyperscape = +constexpr AppProfilePatternEntry AppNameRainbowSixExtraction = { PatternAppNameLower, - "hyperscape" + "rainbow six extraction" }; constexpr AppProfilePatternEntry AppEngineScimitar = @@ -546,12 +546,6 @@ constexpr AppProfilePatternEntry AppEngineUnity = "unity" }; -constexpr AppProfilePatternEntry AppEngineAngle = -{ - PatternEngineNameLower, - "angle" -}; - constexpr AppProfilePatternEntry AppNameValheim = { PatternExeNameLower, @@ -612,6 +606,12 @@ constexpr AppProfilePatternEntry AppNameHaloInfiniteLauncher "haloinfinite.exe" }; +constexpr AppProfilePatternEntry AppNameStarfield +{ + PatternAppNameLower, + "starfield.exe" +}; + constexpr AppProfilePatternEntry AppNameTf2Win64 { PatternAppNameLower, @@ -1204,6 +1204,16 @@ AppProfilePattern AppPatternTable[] = PatternEnd } }, + + { + AppProfile::RainbowSixExtraction, + { + AppNameRainbowSixExtraction, + AppEngineScimitar, + PatternEnd + } + }, + { AppProfile::KnockoutCity, { @@ -1219,15 +1229,6 @@ AppProfilePattern AppPatternTable[] = } }, - { - AppProfile::Hyperscape, - { - AppNameHyperscape, - AppEngineScimitar, - PatternEnd - } - }, - { AppProfile::ScimitarEngine, { @@ -1360,14 +1361,6 @@ AppProfilePattern AppPatternTable[] = }, #endif - { - AppProfile::AngleEngine, - { - AppEngineAngle, - PatternEnd - } - }, - { AppProfile::CSGO, { @@ -1427,6 +1420,15 @@ AppProfilePattern AppPatternTable[] = } }, + { + AppProfile::Starfield, + { + AppNameStarfield, + AppEngineVKD3D, + PatternEnd + } + }, + { AppProfile::DxvkTf2, { diff --git a/icd/api/appopt/shader_profiles/llpc/gfxIp11_0/Navi31/RainbowSixExtraction/profile.json b/icd/api/appopt/shader_profiles/llpc/gfxIp11_0/Navi31/RainbowSixExtraction/profile.json new file mode 100644 index 00000000..b608ced8 --- /dev/null +++ b/icd/api/appopt/shader_profiles/llpc/gfxIp11_0/Navi31/RainbowSixExtraction/profile.json @@ -0,0 +1,29 @@ +{ + "entries": [ + { + "pattern": { + "ps": { + "codeHash": "0xdf44ae88f263605d 6d21f3936125b78b" + } + }, + "action": { + "ps": { + "waveSize": 32 + } + } + }, + { + "pattern": { + "ps": { + "codeHash": "0x0d4d93d110132347 fc0524f24ac8b128" + } + }, + "action": { + "ps": { + "useSiScheduler": true, + "nsaThreshold": 2 + } + } + } + ] +} \ No newline at end of file diff --git a/icd/api/appopt/split_raytracing_layer.cpp b/icd/api/appopt/split_raytracing_layer.cpp new file mode 100644 index 00000000..0578124a --- /dev/null +++ b/icd/api/appopt/split_raytracing_layer.cpp @@ -0,0 +1,203 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2014-2024 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************************************************************************/ +#if VKI_RAY_TRACING +#include "split_raytracing_layer.h" + +#include "include/vk_cmdbuffer.h" +#include "include/vk_device.h" +#include "include/vk_conv.h" +#include "raytrace/ray_tracing_device.h" +#include "raytrace/vk_ray_tracing_pipeline.h" + +namespace vk +{ + +// ===================================================================================================================== +// The method TraceRaysDispatchPerDevice is used to split a dispatch into multiple smaller ones, it helps prevent TDR +// for some specified scenarios and allows the Windows GUI to operate without stuttering. +// The limiations of this method: +// 1) It cannot prevent TDR when the IB needs more than 5 ~ 6 to be exectued on a Windows platform. +// 2) It cannot prevent TDR when there is no preemption request arrives in 2 seconds. +void SplitRaytracingLayer::TraceRaysDispatchPerDevice( + CmdBuffer* pCmdBuffer, + uint32_t deviceIdx, + uint32_t width, + uint32_t height, + uint32_t depth) +{ + const RuntimeSettings& settings = pCmdBuffer->VkDevice()->GetRuntimeSettings(); + const RayTracingPipeline* pPipeline = pCmdBuffer->RenderState()->pRayTracingPipeline; + + const uint32_t splitX = settings.rtDispatchSplitX; + const uint32_t splitY = settings.rtDispatchSplitY; + const uint32_t splitZ = settings.rtDispatchSplitZ; + + const uint32_t blockW = (width + splitX - 1) / splitX; + const uint32_t blockH = (height + splitY - 1) / splitY; + const uint32_t blockD = (depth + splitZ - 1) / splitZ; + + uint32_t dispatchSizeX = 0; + uint32_t dispatchSizeY = 0; + uint32_t dispatchSizeZ = 0; + + pPipeline->GetDispatchSize(&dispatchSizeX, &dispatchSizeY, &dispatchSizeZ, blockW, blockH, blockD); + + for (uint32_t z = 0; z < splitZ; z++) + { + uint32_t zOffset = z * blockD; + for (uint32_t x = 0; x < splitX; x++) + { + uint32_t xOffset = x * blockW; + for (uint32_t y = 0; y < splitY; y++) + { + uint32_t yOffset = y * blockH; + + uint32_t dispatchOffsetX = 0; + uint32_t dispatchOffsetY = 0; + uint32_t dispatchOffsetZ = 0; + + pPipeline->GetDispatchSize(&dispatchOffsetX, + &dispatchOffsetY, + &dispatchOffsetZ, + xOffset, + yOffset, + zOffset); + + pCmdBuffer->PalCmdBuffer(deviceIdx)->CmdDispatchOffset( + { dispatchOffsetX, dispatchOffsetY, dispatchOffsetZ }, + { dispatchSizeX, dispatchSizeY, dispatchSizeZ }, + { dispatchSizeX, dispatchSizeY, dispatchSizeZ }); + + // To avoid TDR, the large dispatch is split into mulitple smaller sub-dispatches. However, + // when a MCBP event arrives, PFP may have already processed all dispatch commands, so mulitple + // smaller sub-dispatches cannot be interrupted by MCBP in this case. + // The Barrier below is used to stall the PFP and allow MCBP to happen between dispatches. + Pal::BarrierTransition transition = {}; + transition.srcCacheMask = Pal::CoherShaderRead; + transition.dstCacheMask = Pal::CoherShaderRead; + const Pal::HwPipePoint postCs = Pal::HwPipePostCs; + Pal::BarrierInfo barrierInfo = {}; + barrierInfo.pipePointWaitCount = 1; + barrierInfo.pPipePoints = &postCs; + barrierInfo.waitPoint = Pal::HwPipeTop; + pCmdBuffer->PalCmdBuffer(deviceIdx)->CmdBarrier(barrierInfo); + + } + } + } +} + +// ===================================================================================================================== +VkResult SplitRaytracingLayer::CreateLayer( + Device* pDevice, + SplitRaytracingLayer** ppLayer) +{ + VkResult result = VK_SUCCESS; + SplitRaytracingLayer* pLayer = nullptr; + const RuntimeSettings& settings = pDevice->GetRuntimeSettings(); + + if (settings.splitRayTracingDispatch) + { + void* pMem = pDevice->VkInstance()->AllocMem(sizeof(SplitRaytracingLayer), VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); + + if (pMem != nullptr) + { + pLayer = VK_PLACEMENT_NEW(pMem) SplitRaytracingLayer(pDevice); + *ppLayer = pLayer; + } + else + { + result = VK_ERROR_OUT_OF_HOST_MEMORY; + } + } + + return result; +} + +// ===================================================================================================================== +SplitRaytracingLayer::SplitRaytracingLayer(Device* pDevice) + : + m_pInstance(pDevice->VkInstance()) +{ +} + +// ===================================================================================================================== +void SplitRaytracingLayer::DestroyLayer() +{ + Util::Destructor(this); + m_pInstance->FreeMem(this); +} + +namespace entry +{ + +namespace splitRaytracingLayer +{ +// ===================================================================================================================== +VKAPI_ATTR void VKAPI_CALL vkCmdTraceRaysKHR( + VkCommandBuffer commandBuffer, + const VkStridedDeviceAddressRegionKHR* pRaygenShaderBindingTable, + const VkStridedDeviceAddressRegionKHR* pMissShaderBindingTable, + const VkStridedDeviceAddressRegionKHR* pHitShaderBindingTable, + const VkStridedDeviceAddressRegionKHR* pCallableShaderBindingTable, + uint32_t width, + uint32_t height, + uint32_t depth) +{ + CmdBuffer* pCmdBuffer = ApiCmdBuffer::ObjectFromHandle(commandBuffer); + pCmdBuffer->SetTraceRaysDispatchPerDevice(SplitRaytracingLayer::TraceRaysDispatchPerDevice); + + SplitRaytracingLayer* pLayer = pCmdBuffer->VkDevice()->RayTrace()->GetSplitRaytracingLayer(); + pLayer->GetNextLayer()->GetEntryPoints().vkCmdTraceRaysKHR( + commandBuffer, + pRaygenShaderBindingTable, + pMissShaderBindingTable, + pHitShaderBindingTable, + pCallableShaderBindingTable, + width, + height, + depth); +} +} // splitRaytracingLayer entry +} // namespace entry + +/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +#define SPLIT_RAYTRACING_OVERRIDE_ALIAS(entry_name, func_name) \ + pDispatchTable->OverrideEntryPoints()->entry_name = vk::entry::splitRaytracingLayer::func_name + +#define SPLIT_RAYTRACING_OVERRIDE_ENTRY(entry_name) SPLIT_RAYTRACING_OVERRIDE_ALIAS(entry_name, entry_name) + +// ===================================================================================================================== +void SplitRaytracingLayer::OverrideDispatchTable( + DispatchTable* pDispatchTable) +{ + // Save current device dispatch table to use as the next layer. + m_nextLayer = *pDispatchTable; + + SPLIT_RAYTRACING_OVERRIDE_ENTRY(vkCmdTraceRaysKHR); +} + +} // namespace vk +#endif diff --git a/icd/api/appopt/split_raytracing_layer.h b/icd/api/appopt/split_raytracing_layer.h new file mode 100644 index 00000000..7edeaac6 --- /dev/null +++ b/icd/api/appopt/split_raytracing_layer.h @@ -0,0 +1,65 @@ +/* + *********************************************************************************************************************** + * + * Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + **********************************************************************************************************************/ +#if VKI_RAY_TRACING +#ifndef __SPLIT_RAYTRACING_LAYER_H__ +#define __SPLIT_RAYTRACING_LAYER_H__ + +#pragma once + +#include "opt_layer.h" +#include "vk_cmdbuffer.h" + +namespace vk +{ +// ===================================================================================================================== +// Class for the Split Raytracing Layer to simplify calls to the overriden dispatch table from the layer's entrypoints +class SplitRaytracingLayer final : public OptLayer +{ +public: + SplitRaytracingLayer(Device*); + virtual ~SplitRaytracingLayer() {} + + virtual void OverrideDispatchTable(DispatchTable* pDispatchTable) override; + + static VkResult CreateLayer(Device* pDevice, SplitRaytracingLayer** ppLayer); + void DestroyLayer(); + + Instance* VkInstance() { return m_pInstance; } + static void TraceRaysDispatchPerDevice( + CmdBuffer* pCmdBuffer, + uint32_t deviceIdx, + uint32_t width, + uint32_t height, + uint32_t depth); + +private: + Instance* m_pInstance; + PAL_DISALLOW_COPY_AND_ASSIGN(SplitRaytracingLayer); +}; + +}; // namespace vk + +#endif /* __SPLIT_RAYTRACING_LAYER_H__ */ +#endif /* VKI_RAY_TRACING */ diff --git a/icd/api/compiler_solution.cpp b/icd/api/compiler_solution.cpp index 3942a4a1..eb43bcdd 100644 --- a/icd/api/compiler_solution.cpp +++ b/icd/api/compiler_solution.cpp @@ -368,4 +368,5 @@ uint32_t CompilerSolution::GetRayTracingVgprLimit( return vgprLimit; } #endif + } diff --git a/icd/api/compiler_solution_llpc.cpp b/icd/api/compiler_solution_llpc.cpp index 462c7b0c..c9928c39 100644 --- a/icd/api/compiler_solution_llpc.cpp +++ b/icd/api/compiler_solution_llpc.cpp @@ -454,14 +454,18 @@ VkResult CompilerSolutionLlpc::CreateGraphicsShaderBinary( bool elfReplace = false; if (pCreateInfo->earlyElfPackage[gplType].pCode == nullptr) { - Util::MetroHash128 hasher; - Llpc::GraphicsPipelineBuildOut pipelineOut = {}; - int64_t startTime = Util::GetPerfCpuTime(); + Llpc::GraphicsPipelineBuildOut pipelineOut = {}; + int64_t startTime = Util::GetPerfCpuTime(); + bool binaryProvided = false; - hasher.Update(pCreateInfo->libraryHash[gplType]); - hasher.Update(PipelineCompilerTypeLlpc); - hasher.Update(m_pPhysicalDevice->GetSettingsLoader()->GetSettingsHash()); - hasher.Finalize(cacheId.bytes); + if (binaryProvided == false) + { + Util::MetroHash128 hasher; + hasher.Update(pCreateInfo->libraryHash[gplType]); + hasher.Update(PipelineCompilerTypeLlpc); + hasher.Update(m_pPhysicalDevice->GetSettingsLoader()->GetSettingsHash()); + hasher.Finalize(cacheId.bytes); + } Vkgc::BinaryData finalBinary = {}; if ((pDevice->GetRuntimeSettings().shaderReplaceMode == ShaderReplacePipelineBinaryHash) || @@ -493,7 +497,12 @@ VkResult CompilerSolutionLlpc::CreateGraphicsShaderBinary( &pCreateInfo->pipelineInfo.mesh, &pCreateInfo->pipelineInfo.fs }; - LoadShaderBinaryFromCache(pPipelineCache, &cacheId, &shaderLibraryBinary, &hitCache, &hitAppCache); + + if (binaryProvided == false) + { + LoadShaderBinaryFromCache(pPipelineCache, &cacheId, &shaderLibraryBinary, &hitCache, &hitAppCache); + } + if (pPipelineCache != nullptr) { // Update the shader feedback @@ -925,6 +934,7 @@ VkResult CompilerSolutionLlpc::CreateRayTracingPipelineBinary( void* pOutShaderGroup = static_cast(pipelineOut.shaderGroupHandle.shaderHandles); pPipelineBinary->librarySummary = pipelineOut.librarySummary; + pPipelineBinary->isCps = pipelineOut.isCps; } *pCompileTime = Util::GetPerfCpuTime() - startTime; @@ -1152,8 +1162,6 @@ VkResult CompilerSolutionLlpc::CreateLlpcCompiler( void CompilerSolutionLlpc::BuildPipelineInternalBufferData( const PipelineCompiler* pCompiler, const uint32_t uberFetchConstBufRegBase, - const uint32_t specConstBufVertexRegBase, - const uint32_t specConstBufFragmentRegBase, bool needCache, GraphicsPipelineBinaryCreateInfo* pCreateInfo) { @@ -1168,7 +1176,7 @@ void CompilerSolutionLlpc::BuildPipelineInternalBufferData( const VkPipelineVertexInputStateCreateInfo* pVertexInput = nullptr; bool needUberFetchShaderBuffer = false; - if (pCreateInfo->pipelineInfo.enableUberFetchShader || pCreateInfo->pipelineInfo.enableEarlyCompile) + if (pCreateInfo->pipelineInfo.enableUberFetchShader) { pVertexInput = pCreateInfo->pipelineInfo.pVertexInput; // For monolithic pipeline (needCache = true), we need save internal buffer data to cache, so we always need @@ -1193,7 +1201,6 @@ void CompilerSolutionLlpc::BuildPipelineInternalBufferData( VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); if (pInternalBufferInfo->pData == nullptr) { - pCreateInfo->pipelineInfo.enableEarlyCompile = false; pCreateInfo->pipelineInfo.enableUberFetchShader = false; pInternalBufferInfo->dataSize = 0; needUberFetchShaderBuffer = false; diff --git a/icd/api/devmode/devmode_mgr.h b/icd/api/devmode/devmode_mgr.h index 61d7ce80..c806ea9f 100644 --- a/icd/api/devmode/devmode_mgr.h +++ b/icd/api/devmode/devmode_mgr.h @@ -123,6 +123,7 @@ class IDevMode Pal::IQueueSemaphore* pQueueSemaphore) = 0; virtual bool IsQueueTimingActive(const Device* pDevice) const = 0; + virtual bool IsTraceRunning() const { return false; } virtual bool GetTraceFrameBeginTag(uint64_t* pTag) const = 0; virtual bool GetTraceFrameEndTag(uint64_t* pTag) const = 0; @@ -132,6 +133,16 @@ class IDevMode virtual void DeregisterPipelineCache( PipelineBinaryCache* pPipelineCache) = 0; + + virtual void ProcessMarkerTable( + uint32 sqttCbId, + uint32 numOps, + const uint32* pUserMarkerOpHistory, + uint32 numMarkerStrings, + const uint32* pMarkerStringOffsets, + uint32 markerStringDataSize, + const char* pMarkerStringData) {} + #endif }; diff --git a/icd/api/devmode/devmode_rgp.cpp b/icd/api/devmode/devmode_rgp.cpp index 1006a647..8693d306 100644 --- a/icd/api/devmode/devmode_rgp.cpp +++ b/icd/api/devmode/devmode_rgp.cpp @@ -52,6 +52,8 @@ #include "palHashBaseImpl.h" #include "palListImpl.h" #include "palVectorImpl.h" +#include "palStringTableTraceSource.h" +#include "palUserMarkerHistoryTraceSource.h" // gpuopen headers #include "devDriverServer.h" @@ -337,7 +339,10 @@ DevModeRgp::DevModeRgp( m_staticVmidActive(false), m_crashAnalysisEnabled(false), m_perfCounterIds(pInstance->Allocator()), - m_pipelineCaches(pInstance->Allocator()) + m_pipelineCaches(pInstance->Allocator()), + m_stringTableId(0), + m_pStringTableTraceSource(nullptr), + m_pUserMarkerHistoryTraceSource(nullptr) { memset(&m_trace, 0, sizeof(m_trace)); } @@ -345,6 +350,7 @@ DevModeRgp::DevModeRgp( // ===================================================================================================================== DevModeRgp::~DevModeRgp() { + DestroyUserMarkerTraceSources(); DestroyRGPTracing(&m_trace); } @@ -2199,8 +2205,8 @@ Pal::Result DevModeRgp::InitRGPTracing( pState->pGpaSession = VK_PLACEMENT_NEW(pStorage) GpuUtil::GpaSession( m_pInstance->PalPlatform(), pPalDevice, - VK_VERSION_MAJOR(apiVersion), - VK_VERSION_MINOR(apiVersion), + VK_API_VERSION_MAJOR(apiVersion), + VK_API_VERSION_MINOR(apiVersion), GpuUtil::ApiType::Vulkan, RgpSqttInstrumentationSpecVersion, RgpSqttInstrumentationApiVersion); @@ -2251,6 +2257,69 @@ Pal::Result DevModeRgp::InitRGPTracing( return result; } +// ===================================================================================================================== +// Init trace sources for exporting UserMarker to RRA (and RGP in the future) +Pal::Result DevModeRgp::InitUserMarkerTraceSources() +{ + Pal::Result result = Pal::Result::ErrorOutOfMemory; + + GpuUtil::TraceSession* pTraceSession = m_pInstance->PalPlatform()->GetTraceSession(); + + const size_t traceSourcesAllocSize = sizeof(GpuUtil::StringTableTraceSource) + + sizeof(GpuUtil::UserMarkerHistoryTraceSource); + + void* pStorage = m_pInstance->AllocMem(traceSourcesAllocSize, + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + + if (pStorage != nullptr) + { + void* pObjStorage = pStorage; + + m_pStringTableTraceSource = VK_PLACEMENT_NEW(pObjStorage) + GpuUtil::StringTableTraceSource(m_pInstance->PalPlatform()); + + pObjStorage = VoidPtrInc(pObjStorage, sizeof(GpuUtil::StringTableTraceSource)); + + m_pUserMarkerHistoryTraceSource = VK_PLACEMENT_NEW(pObjStorage) + GpuUtil::UserMarkerHistoryTraceSource(m_pInstance->PalPlatform()); + + result = pTraceSession->RegisterSource(m_pStringTableTraceSource); + + if (result == Pal::Result::Success) + { + result = pTraceSession->RegisterSource(m_pUserMarkerHistoryTraceSource); + } + + if (result != Pal::Result::Success) + { + DestroyUserMarkerTraceSources(); + } + } + + return result; +} + +// ===================================================================================================================== +// Clean up resources related to UserMarker trace sources +void DevModeRgp::DestroyUserMarkerTraceSources() +{ + GpuUtil::TraceSession* pTraceSession = m_pInstance->PalPlatform()->GetTraceSession(); + + if (m_pUserMarkerHistoryTraceSource != nullptr) + { + pTraceSession->UnregisterSource(m_pUserMarkerHistoryTraceSource); + m_pUserMarkerHistoryTraceSource = nullptr; + } + if (m_pStringTableTraceSource != nullptr) + { + pTraceSession->UnregisterSource(m_pStringTableTraceSource); + m_pStringTableTraceSource = nullptr; + } + + // The 2 trace sources are allocated in a single memory allocation + m_pInstance->FreeMem(m_pStringTableTraceSource); +} + // ===================================================================================================================== // Called when a new device is created. This will preallocate reusable RGP trace resources for that device. void DevModeRgp::PostDeviceCreate(Device* pDevice) @@ -2260,6 +2329,8 @@ void DevModeRgp::PostDeviceCreate(Device* pDevice) // Pre-allocate trace resources for this device CheckTraceDeviceChanged(&m_trace, pDevice); + InitUserMarkerTraceSources(); + auto* pDriverControlServer = m_pDevDriverServer->GetDriverControlServer(); VK_ASSERT(pDriverControlServer != nullptr); @@ -2776,6 +2847,31 @@ bool DevModeRgp::GetTraceFrameEndTag( return active; } +// ===================================================================================================================== +bool DevModeRgp::IsTraceRunning() const +{ + const GpuUtil::TraceSession* pTraceSession = m_pInstance->PalPlatform()->GetTraceSession(); + return pTraceSession->GetTraceSessionState() == GpuUtil::TraceSessionState::Running; +} + +// ===================================================================================================================== +void DevModeRgp::ProcessMarkerTable( + uint32 sqttCbId, + uint32 numOps, + const uint32* pUserMarkerOpHistory, + uint32 numMarkerStrings, + const uint32* pMarkerStringOffsets, + uint32 markerStringDataSize, + const char* pMarkerStringData) +{ + uint32_t tableId = ++m_stringTableId; + + m_pStringTableTraceSource->AddStringTable(tableId, + numMarkerStrings, pMarkerStringOffsets, + pMarkerStringData, markerStringDataSize); + m_pUserMarkerHistoryTraceSource->AddUserMarkerHistory(sqttCbId, tableId, numOps, pUserMarkerOpHistory); +} + }; // namespace vk #endif diff --git a/icd/api/devmode/devmode_rgp.h b/icd/api/devmode/devmode_rgp.h index d24eb82e..0c72e663 100644 --- a/icd/api/devmode/devmode_rgp.h +++ b/icd/api/devmode/devmode_rgp.h @@ -46,6 +46,8 @@ #include "gpuopen.h" #endif +#include + // PAL forward declarations namespace Pal { @@ -66,6 +68,13 @@ class RGPServer; } } +// GpuUtil forward declarations +namespace GpuUtil +{ +class StringTableTraceSource; +class UserMarkerHistoryTraceSource; +} + namespace vk { @@ -135,6 +144,7 @@ class DevModeRgp final : public IDevMode virtual bool IsQueueTimingActive(const Device* pDevice) const override; virtual bool GetTraceFrameBeginTag(uint64_t* pTag) const override; virtual bool GetTraceFrameEndTag(uint64_t* pTag) const override; + virtual bool IsTraceRunning() const override; virtual Util::Result RegisterPipelineCache( PipelineBinaryCache* pPipelineCache, @@ -143,6 +153,15 @@ class DevModeRgp final : public IDevMode virtual void DeregisterPipelineCache( PipelineBinaryCache* pPipelineCache) override; + virtual void ProcessMarkerTable( + uint32 sqttCbId, + uint32 numOps, + const uint32* pUserMarkerOpHistory, + uint32 numMarkerStrings, + const uint32* pMarkerStringOffsets, + uint32 markerStringDataSize, + const char* pMarkerStringData) override; + Util::ListIterator GetPipelineCacheListIterator() { return m_pipelineCaches.Begin(); } @@ -252,6 +271,9 @@ class DevModeRgp final : public IDevMode Pal::Result InitRGPTracing(TraceState* pState, Device* pDevice); void DestroyRGPTracing(TraceState* pState); + Pal::Result InitUserMarkerTraceSources(); + void DestroyUserMarkerTraceSources(); + Pal::Result InitTraceQueueResources(TraceState* pState, bool* pHasDebugVmid, const Queue* pQueue, bool auxQueue); Pal::Result InitTraceQueueResourcesForDevice(TraceState* pState, bool* pHasDebugVmid); Pal::Result InitTraceQueueFamilyResources(TraceState* pTraceState, TraceQueueFamilyState* pFamilyState); @@ -312,6 +334,10 @@ class DevModeRgp final : public IDevMode PipelineCacheList m_pipelineCaches; Util::RWLock m_pipelineReinjectionLock; + + std::atomic m_stringTableId; + GpuUtil::StringTableTraceSource* m_pStringTableTraceSource; + GpuUtil::UserMarkerHistoryTraceSource* m_pUserMarkerHistoryTraceSource; #endif }; diff --git a/icd/api/devmode/devmode_ubertrace.cpp b/icd/api/devmode/devmode_ubertrace.cpp index 0c55ce31..aa94258d 100644 --- a/icd/api/devmode/devmode_ubertrace.cpp +++ b/icd/api/devmode/devmode_ubertrace.cpp @@ -48,6 +48,8 @@ #include "pal.h" #include "palCodeObjectTraceSource.h" #include "palQueueTimingsTraceSource.h" +#include "palStringTableTraceSource.h" +#include "palUserMarkerHistoryTraceSource.h" // gpuopen headers #include "devDriverServer.h" @@ -75,7 +77,10 @@ DevModeUberTrace::DevModeUberTrace( m_globalFrameIndex(1), // Must start from 1 according to RGP spec m_pTraceSession(pInstance->PalPlatform()->GetTraceSession()), m_pCodeObjectTraceSource(nullptr), - m_pQueueTimingsTraceSource(nullptr) + m_pQueueTimingsTraceSource(nullptr), + m_pStringTableTraceSource(nullptr), + m_pUserMarkerHistoryTraceSource(nullptr), + m_stringTableId(0) { } @@ -509,67 +514,119 @@ bool DevModeUberTrace::IsQueueTimingActive( return (m_pQueueTimingsTraceSource != nullptr) ? m_pQueueTimingsTraceSource->IsTimingInProgress() : false; } +// ===================================================================================================================== +bool DevModeUberTrace::IsTraceRunning() const +{ + return m_pTraceSession->GetTraceSessionState() == GpuUtil::TraceSessionState::Running; +} + // ===================================================================================================================== Pal::Result DevModeUberTrace::InitUberTraceResources( Pal::IDevice* pPalDevice) { Pal::Result result = Pal::Result::ErrorOutOfMemory; - void* pStorage = m_pInstance->AllocMem(sizeof(GpuUtil::CodeObjectTraceSource), VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + const size_t traceSourcesAllocSize = sizeof(GpuUtil::CodeObjectTraceSource) + + sizeof(GpuUtil::QueueTimingsTraceSource) + + sizeof(GpuUtil::StringTableTraceSource) + + sizeof(GpuUtil::UserMarkerHistoryTraceSource); + + void* pStorage = m_pInstance->AllocMem(traceSourcesAllocSize, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); if (pStorage != nullptr) { - m_pCodeObjectTraceSource = VK_PLACEMENT_NEW(pStorage) + void* pObjStorage = pStorage; + + m_pCodeObjectTraceSource = VK_PLACEMENT_NEW(pObjStorage) GpuUtil::CodeObjectTraceSource(m_pInstance->PalPlatform()); - result = m_pTraceSession->RegisterSource(m_pCodeObjectTraceSource); - } + pObjStorage = VoidPtrInc(pObjStorage, sizeof(GpuUtil::CodeObjectTraceSource)); - if (result == Pal::Result::Success) - { - pStorage = m_pInstance->AllocMem(sizeof(GpuUtil::QueueTimingsTraceSource), VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + m_pQueueTimingsTraceSource = VK_PLACEMENT_NEW(pObjStorage) + GpuUtil::QueueTimingsTraceSource(m_pInstance->PalPlatform()); - if (pStorage != nullptr) - { - m_pQueueTimingsTraceSource = VK_PLACEMENT_NEW(pStorage) - GpuUtil::QueueTimingsTraceSource(m_pInstance->PalPlatform()); + pObjStorage = VoidPtrInc(pObjStorage, sizeof(GpuUtil::QueueTimingsTraceSource)); + + m_pStringTableTraceSource = VK_PLACEMENT_NEW(pObjStorage) + GpuUtil::StringTableTraceSource(m_pInstance->PalPlatform()); + + pObjStorage = VoidPtrInc(pObjStorage, sizeof(GpuUtil::StringTableTraceSource)); + m_pUserMarkerHistoryTraceSource = VK_PLACEMENT_NEW(pObjStorage) + GpuUtil::UserMarkerHistoryTraceSource(m_pInstance->PalPlatform()); + + result = m_pTraceSession->RegisterSource(m_pCodeObjectTraceSource); + + if (result == Pal::Result::Success) + { + result = m_pQueueTimingsTraceSource->Init(pPalDevice); + } + if (result == Pal::Result::Success) + { result = m_pTraceSession->RegisterSource(m_pQueueTimingsTraceSource); } - else + if (result == Pal::Result::Success) { - result = Pal::Result::ErrorOutOfMemory; + result = m_pTraceSession->RegisterSource(m_pStringTableTraceSource); + } + if (result == Pal::Result::Success) + { + result = m_pTraceSession->RegisterSource(m_pUserMarkerHistoryTraceSource); } - } - if (result == Pal::Result::Success) - { - result = m_pQueueTimingsTraceSource->Init(pPalDevice); + if (result != Pal::Result::Success) + { + DestroyUberTraceResources(); + } } - if (result != Pal::Result::Success) - { - DestroyUberTraceResources(); - } return result; } // ===================================================================================================================== void DevModeUberTrace::DestroyUberTraceResources() { - if (m_pCodeObjectTraceSource != nullptr) + if (m_pUserMarkerHistoryTraceSource != nullptr) { - m_pTraceSession->UnregisterSource(m_pCodeObjectTraceSource); - m_pInstance->FreeMem(m_pCodeObjectTraceSource); - m_pCodeObjectTraceSource = nullptr; + m_pTraceSession->UnregisterSource(m_pUserMarkerHistoryTraceSource); + m_pUserMarkerHistoryTraceSource = nullptr; + } + if (m_pStringTableTraceSource != nullptr) + { + m_pTraceSession->UnregisterSource(m_pStringTableTraceSource); + m_pStringTableTraceSource = nullptr; } - if (m_pQueueTimingsTraceSource != nullptr) { m_pTraceSession->UnregisterSource(m_pQueueTimingsTraceSource); - m_pInstance->FreeMem(m_pQueueTimingsTraceSource); m_pQueueTimingsTraceSource = nullptr; } + if (m_pCodeObjectTraceSource != nullptr) + { + m_pTraceSession->UnregisterSource(m_pCodeObjectTraceSource); + m_pCodeObjectTraceSource = nullptr; + } + + // The 4 trace sources are allocated in a single memory allocation + m_pInstance->FreeMem(m_pCodeObjectTraceSource); +} + +// ===================================================================================================================== +void DevModeUberTrace::ProcessMarkerTable( + uint32 sqttCbId, + uint32 numOps, + const uint32* pUserMarkerOpHistory, + uint32 numMarkerStrings, + const uint32* pMarkerStringOffsets, + uint32 markerStringDataSize, + const char* pMarkerStringData) +{ + uint32_t tableId = ++m_stringTableId; + + m_pStringTableTraceSource->AddStringTable(tableId, + numMarkerStrings, pMarkerStringOffsets, + pMarkerStringData, markerStringDataSize); + m_pUserMarkerHistoryTraceSource->AddUserMarkerHistory(sqttCbId, tableId, numOps, pUserMarkerOpHistory); } } // namespace vk diff --git a/icd/api/devmode/devmode_ubertrace.h b/icd/api/devmode/devmode_ubertrace.h index 4710ca17..56cbaf60 100644 --- a/icd/api/devmode/devmode_ubertrace.h +++ b/icd/api/devmode/devmode_ubertrace.h @@ -34,6 +34,7 @@ #pragma once +#include #include "devmode/devmode_mgr.h" #include "palTraceSession.h" @@ -47,6 +48,8 @@ namespace GpuUtil { class CodeObjectTraceSource; class QueueTimingsTraceSource; +class StringTableTraceSource; +class UserMarkerHistoryTraceSource; } namespace vk @@ -85,6 +88,7 @@ class DevModeUberTrace final : public IDevMode virtual bool IsTracingEnabled() const override; virtual bool IsCrashAnalysisEnabled() const override { return m_crashAnalysisEnabled; } virtual bool IsQueueTimingActive(const Device* pDevice) const override; + virtual bool IsTraceRunning() const override; virtual Pal::Result TimedQueueSubmit( uint32_t deviceIdx, @@ -123,6 +127,15 @@ class DevModeUberTrace final : public IDevMode virtual void DeregisterPipelineCache( PipelineBinaryCache* pPipelineCache) override { }; + virtual void ProcessMarkerTable( + uint32 sqttCbId, + uint32 numOps, + const uint32* pUserMarkerOpHistory, + uint32 numMarkerStrings, + const uint32* pMarkerStringOffsets, + uint32 markerStringDataSize, + const char* pMarkerStringData); + private: DevModeUberTrace(Instance* pInstance); @@ -140,6 +153,9 @@ class DevModeUberTrace final : public IDevMode GpuUtil::TraceSession* m_pTraceSession; GpuUtil::CodeObjectTraceSource* m_pCodeObjectTraceSource; GpuUtil::QueueTimingsTraceSource* m_pQueueTimingsTraceSource; + GpuUtil::StringTableTraceSource* m_pStringTableTraceSource; + GpuUtil::UserMarkerHistoryTraceSource* m_pUserMarkerHistoryTraceSource; + std::atomic m_stringTableId; #endif }; diff --git a/icd/api/entry.cpp b/icd/api/entry.cpp index 23728f08..17de2a14 100644 --- a/icd/api/entry.cpp +++ b/icd/api/entry.cpp @@ -1856,6 +1856,13 @@ VKAPI_ATTR void VKAPI_CALL vkCmdSetRenderingInputAttachmentIndicesKHR( ApiCmdBuffer::ObjectFromHandle(commandBuffer)->SetRenderingInputAttachmentIndices(pLocationInfo); } +// ===================================================================================================================== +VKAPI_ATTR void VKAPI_CALL vkCmdSetDepthBias2EXT( + VkCommandBuffer commandBuffer, + const VkDepthBiasInfoEXT* pDepthBiasInfo) +{ +} + } // namespace entry } // namespace vk diff --git a/icd/api/graphics_pipeline_common.cpp b/icd/api/graphics_pipeline_common.cpp index 4b5ae8f4..72b7dbdf 100644 --- a/icd/api/graphics_pipeline_common.cpp +++ b/icd/api/graphics_pipeline_common.cpp @@ -782,8 +782,8 @@ void GraphicsPipelineCommon::ExtractLibraryInfo( } if ((pLibInfo->flags.isLibrary == false) && - (pLibInfo->pPreRasterizationShaderLib != nullptr) || - (pLibInfo->pFragmentShaderLib != nullptr)) + ((pLibInfo->pPreRasterizationShaderLib != nullptr) || + (pLibInfo->pFragmentShaderLib != nullptr))) { uint64_t preRasterHash = 0; uint64_t fragmentHash = 0; @@ -869,19 +869,12 @@ VkResult GraphicsPipelineCommon::Create( { VkResult result; + UpdatePipelineCreateFlags(pDevice, &flags); + GraphicsPipelineExtStructs extStructs = {}; HandleExtensionStructs(pCreateInfo, &extStructs); - if (pDevice->GetRuntimeSettings().pipelineLinkOptimizationMode == PipelineLinkOptimizationNeverOptimized) - { - flags &= ~VK_PIPELINE_CREATE_2_LINK_TIME_OPTIMIZATION_BIT_EXT; - } - else if (pDevice->GetRuntimeSettings().pipelineLinkOptimizationMode == PipelineLinkOptimizationAlwaysOptimized) - { - flags |= VK_PIPELINE_CREATE_2_LINK_TIME_OPTIMIZATION_BIT_EXT; - } - if ((flags & VK_PIPELINE_CREATE_LIBRARY_BIT_KHR) != 0) { uint32_t compilerMask = pDevice->GetCompiler(DefaultDeviceIndex)->GetCompilerCollectionMask(); @@ -899,6 +892,77 @@ VkResult GraphicsPipelineCommon::Create( return result; } +// ===================================================================================================================== +VkResult GraphicsPipelineCommon::CreateCacheId( + const Device* pDevice, + const VkGraphicsPipelineCreateInfo* pCreateInfo, + const GraphicsPipelineExtStructs& extStructs, + const GraphicsPipelineLibraryInfo& libInfo, + VkPipelineCreateFlags2KHR flags, + GraphicsPipelineShaderStageInfo* pShaderStageInfo, + GraphicsPipelineBinaryCreateInfo* pBinaryCreateInfo, + ShaderOptimizerKey* pShaderOptimizerKeys, + PipelineOptimizerKey* pPipelineOptimizerKey, + uint64_t* pApiPsoHash, + ShaderModuleHandle* pTempModules, + Util::MetroHash::Hash* pCacheIds) +{ + VkResult result; + + UpdatePipelineCreateFlags(pDevice, &flags); + + if ((flags & VK_PIPELINE_CREATE_LIBRARY_BIT_KHR) != 0) + { + result = GraphicsPipelineLibrary::CreateApiPsoHashAndElfHash( + pDevice, + pCreateInfo, + extStructs, + libInfo, + flags, + pShaderStageInfo, + pBinaryCreateInfo, + pShaderOptimizerKeys, + pPipelineOptimizerKey, + pApiPsoHash, + pTempModules, + pCacheIds); + } + else + { + result = GraphicsPipeline::CreateCacheId( + pDevice, + pCreateInfo, + extStructs, + libInfo, + flags, + pShaderStageInfo, + pBinaryCreateInfo, + pShaderOptimizerKeys, + pPipelineOptimizerKey, + pApiPsoHash, + pTempModules, + pCacheIds); + } + + return result; +} + +// ===================================================================================================================== +void GraphicsPipelineCommon::UpdatePipelineCreateFlags( + const Device* pDevice, + VkPipelineCreateFlags2KHR* pFlags) +{ + + if (pDevice->GetRuntimeSettings().pipelineLinkOptimizationMode == PipelineLinkOptimizationNeverOptimized) + { + *pFlags &= ~VK_PIPELINE_CREATE_2_LINK_TIME_OPTIMIZATION_BIT_EXT; + } + else if (pDevice->GetRuntimeSettings().pipelineLinkOptimizationMode == PipelineLinkOptimizationAlwaysOptimized) + { + *pFlags |= VK_PIPELINE_CREATE_2_LINK_TIME_OPTIMIZATION_BIT_EXT; + } +} + // ===================================================================================================================== static void CopyVertexInputInterfaceState( const GraphicsPipelineLibrary* pLibrary, @@ -1024,6 +1088,8 @@ static void CopyFragmentOutputInterfaceState( pInfo->immedInfo.msaaCreateInfo.occlusionQuerySamples = libInfo.immedInfo.msaaCreateInfo.occlusionQuerySamples; pInfo->immedInfo.msaaCreateInfo.flags.enable1xMsaaSampleLocations = libInfo.immedInfo.msaaCreateInfo.flags.enable1xMsaaSampleLocations; + pInfo->immedInfo.msaaCreateInfo.flags.forceSampleRateShading = + libInfo.immedInfo.msaaCreateInfo.flags.forceSampleRateShading; pInfo->immedInfo.samplePattern = libInfo.immedInfo.samplePattern; pInfo->immedInfo.minSampleShading = libInfo.immedInfo.minSampleShading; @@ -1385,6 +1451,7 @@ static void BuildVrsRateParams( // ===================================================================================================================== static void BuildMultisampleState( + const Device* pDevice, const VkPipelineMultisampleStateCreateInfo* pMs, const RenderPass* pRenderPass, const uint32_t subpass, @@ -1433,14 +1500,17 @@ static void BuildMultisampleState( if (pMs->sampleShadingEnable && (pMs->minSampleShading > 0.0f)) { - pInfo->immedInfo.msaaCreateInfo.pixelShaderSamples = - Pow2Pad(static_cast(ceil(subpassColorSampleCount * pMs->minSampleShading))); - pInfo->immedInfo.minSampleShading = pMs->minSampleShading; + const uint32_t pixelShaderSamples = + static_cast(ceil(subpassColorSampleCount * pMs->minSampleShading)); + pInfo->immedInfo.msaaCreateInfo.pixelShaderSamples = Pow2Pad(pixelShaderSamples); + pInfo->immedInfo.msaaCreateInfo.flags.forceSampleRateShading = (pixelShaderSamples > 1) ? 1 : 0; + pInfo->immedInfo.minSampleShading = pMs->minSampleShading; } else { - pInfo->immedInfo.msaaCreateInfo.pixelShaderSamples = 1; - pInfo->immedInfo.minSampleShading = 0.0f; + pInfo->immedInfo.msaaCreateInfo.pixelShaderSamples = 1; + pInfo->immedInfo.msaaCreateInfo.flags.forceSampleRateShading = 0; + pInfo->immedInfo.minSampleShading = 0.0f; } pInfo->immedInfo.msaaCreateInfo.depthStencilSamples = subpassDepthSampleCount; @@ -1993,7 +2063,7 @@ static void BuildFragmentOutputInterfaceState( const uint32_t subpass = pIn->subpass; // Build states via VkPipelineMultisampleStateCreateInfo - BuildMultisampleState(pIn->pMultisampleState, pRenderPass, subpass, dynamicStateFlags, pInfo); + BuildMultisampleState(pDevice, pIn->pMultisampleState, pRenderPass, subpass, dynamicStateFlags, pInfo); auto pPipelineRenderingCreateInfoKHR = extStructs.pPipelineRenderingCreateInfo; diff --git a/icd/api/include/app_profile.h b/icd/api/include/app_profile.h index 27496109..c00a3159 100644 --- a/icd/api/include/app_profile.h +++ b/icd/api/include/app_profile.h @@ -77,6 +77,7 @@ enum class AppProfile : uint32_t DiRT4, // DiRT4 by Feral3D Rage2, // Rage2 by Avalanche Studios RainbowSixSiege, // Tom Clancy's Rainbow Six Siege: Operation Phantom by Ubisoft + RainbowSixExtraction, // Tom Clancy's Rainbow Six Extraction by Ubisoft WolfensteinYoungblood, // Wolfenstein Youngblood by Machine Games RedDeadRedemption2, // Red Dead Redemption 2 by Rockstar DoomEternal, // Doom Eternal by id Software @@ -106,7 +107,6 @@ enum class AppProfile : uint32_t XSystemEngine, // XSystem Engine by Laminar Research UnityEngine, // Unity Engine by Unity Technologies (Default) SaschaWillemsExamples, // Vulkan Examples by Sascha Willems - Hyperscape, // Hyperscape by Ubisoft SeriousSam4, // Serious Sam 4 by Croteam SniperElite5, // Sniper Elite 5 by Rebellion SeriousSamVrTheLastHope, // Serious Sam VR The Last Hope by Croteam @@ -118,14 +118,13 @@ enum class AppProfile : uint32_t ControlDX12, // VKD3D Control Ultimate Edition RayTracingWeekends, // RayTracingInVulkan demo #endif - Maxon, // Maxon - AngleEngine, // Angle Engine CSGO, // Counter-Strike: Global Offensive DxvkGodOfWar, // DXVK God of War ELEX2, // ELEX II X4Foundations, // X4: Foundations by Egosoft DxvkHaloInfiniteLauncher,// DXVK Halo Infinite Launcher (Don't Confuse it with VKD3D // Halo Infinite Game) + Starfield, // VKD3D Starfield DxvkTf2, // DXVK Team Fortress 2 MetalGearSolid5, // Metal Gear Solid5 : The Phantom Pain MetalGearSolid5Online, // Metal Gear Solid5 : The Phantom Pain Online diff --git a/icd/api/include/color_space_helper.h b/icd/api/include/color_space_helper.h index dbc222e1..68f1048f 100644 --- a/icd/api/include/color_space_helper.h +++ b/icd/api/include/color_space_helper.h @@ -98,7 +98,7 @@ class ColorSpaceHelper static bool IsColorSpaceHdr(VkColorSpaceKHR colorSpace) { - return (colorSpace != VK_COLORSPACE_SRGB_NONLINEAR_KHR); + return (colorSpace != VK_COLOR_SPACE_SRGB_NONLINEAR_KHR); } private: diff --git a/icd/api/include/compiler_solution.h b/icd/api/include/compiler_solution.h index 377401cc..5493be7c 100644 --- a/icd/api/include/compiler_solution.h +++ b/icd/api/include/compiler_solution.h @@ -148,7 +148,6 @@ struct PipelineMetadata bool pointSizeUsed; bool dualSrcBlendingUsed; bool shadingRateUsedInShader; - bool enableEarlyCompile; bool enableUberFetchShader; bool postDepthCoverageEnable; uint32_t psOnlyPointCoordEnable; @@ -256,6 +255,7 @@ struct RayTracingPipelineBinary { uint32_t maxFunctionCallDepth; bool hasTraceRay; + bool isCps; uint32_t pipelineBinCount; Vkgc::BinaryData* pPipelineBins; Vkgc::RayTracingShaderGroupHandle shaderGroupHandle; @@ -285,10 +285,6 @@ class CompilerSolution ShaderModuleHandle* pShaderModule, const PipelineOptimizerKey& profileKey) = 0; - virtual void TryEarlyCompileShaderModule( - const Device* pDevice, - ShaderModuleHandle* pShaderModule) = 0; - virtual void FreeShaderModule(ShaderModuleHandle* pShaderModule) = 0; virtual VkResult CreateGraphicsPipelineBinary( @@ -346,8 +342,6 @@ class CompilerSolution virtual void BuildPipelineInternalBufferData( const PipelineCompiler* pCompiler, const uint32_t uberFetchConstBufRegBase, - const uint32_t specConstBufVertexRegBase, - const uint32_t specConstBufFragmentRegBase, bool needCache, GraphicsPipelineBinaryCreateInfo* pCreateInfo) = 0; diff --git a/icd/api/include/compiler_solution_llpc.h b/icd/api/include/compiler_solution_llpc.h index bd888a61..9e49dae7 100644 --- a/icd/api/include/compiler_solution_llpc.h +++ b/icd/api/include/compiler_solution_llpc.h @@ -92,10 +92,6 @@ class CompilerSolutionLlpc final : public CompilerSolution ShaderModuleHandle* pShaderModule, const PipelineOptimizerKey& profileKey) override; - virtual void TryEarlyCompileShaderModule( - const Device* pDevice, - ShaderModuleHandle* pModule) override { } - virtual void FreeShaderModule(ShaderModuleHandle* pShaderModule) override; virtual VkResult CreateGraphicsPipelineBinary( @@ -159,8 +155,6 @@ class CompilerSolutionLlpc final : public CompilerSolution virtual void BuildPipelineInternalBufferData( const PipelineCompiler* pCompiler, const uint32_t uberFetchConstBufRegBase, - const uint32_t specConstBufVertexRegBase, - const uint32_t specConstBufFragmentRegBase, bool needCache, GraphicsPipelineBinaryCreateInfo* pCreateInfo) override; diff --git a/icd/api/include/graphics_pipeline_common.h b/icd/api/include/graphics_pipeline_common.h index 2ea9e2ad..bc7cab99 100644 --- a/icd/api/include/graphics_pipeline_common.h +++ b/icd/api/include/graphics_pipeline_common.h @@ -76,7 +76,6 @@ struct GraphicsPipelineObjectImmedInfo Pal::MsaaStateCreateInfo msaaCreateInfo; Pal::ColorBlendStateCreateInfo blendCreateInfo; bool rasterizerDiscardEnable; - bool checkDeferCompilePipeline; float minSampleShading; uint32_t colorWriteEnable; uint32_t colorWriteMask; @@ -218,6 +217,20 @@ class GraphicsPipelineCommon : public Pipeline const VkAllocationCallbacks* pAllocator, VkPipeline* pPipeline); + static VkResult CreateCacheId( + const Device* pDevice, + const VkGraphicsPipelineCreateInfo* pCreateInfo, + const GraphicsPipelineExtStructs& extStructs, + const GraphicsPipelineLibraryInfo& libInfo, + VkPipelineCreateFlags2KHR flags, + GraphicsPipelineShaderStageInfo* pShaderStageInfo, + GraphicsPipelineBinaryCreateInfo* pBinaryCreateInfo, + ShaderOptimizerKey* pShaderOptimizerKeys, + PipelineOptimizerKey* pPipelineOptimizerKey, + uint64_t* pApiPsoHash, + ShaderModuleHandle* pTempModules, + Util::MetroHash::Hash* pCacheIds); + // Get the active shader stages through API info static VkShaderStageFlagBits GetActiveShaderStages( const VkGraphicsPipelineCreateInfo* pGraphicsPipelineCreateInfo, @@ -266,6 +279,11 @@ class GraphicsPipelineCommon : public Pipeline VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT | VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT; + // Extracts extension structs from VkGraphicsPipelineCreateInfo + static void HandleExtensionStructs( + const VkGraphicsPipelineCreateInfo* pCreateInfo, + GraphicsPipelineExtStructs* pExtStructs); + protected: // Convert API information into internal create info used to create internal pipeline object static void BuildPipelineObjectCreateInfo( @@ -335,10 +353,10 @@ class GraphicsPipelineCommon : public Pipeline const GraphicsPipelineLibraryInfo* pLibInfo, uint64_t dynamicStateFlags); - // Extracts extension structs from VkGraphicsPipelineCreateInfo - static void HandleExtensionStructs( - const VkGraphicsPipelineCreateInfo* pCreateInfo, - GraphicsPipelineExtStructs* pExtStructs); + // Updates pipeline create flags prior to pipeline creation + static void UpdatePipelineCreateFlags( + const Device* pDevice, + VkPipelineCreateFlags2KHR* pFlags); // Constructor of GraphicsPipelineCommon GraphicsPipelineCommon( diff --git a/icd/api/include/khronos/sdk-1.3/vulkan/vulkan_core.h b/icd/api/include/khronos/sdk-1.3/vulkan/vulkan_core.h index 663697a9..caa55424 100644 --- a/icd/api/include/khronos/sdk-1.3/vulkan/vulkan_core.h +++ b/icd/api/include/khronos/sdk-1.3/vulkan/vulkan_core.h @@ -69,21 +69,25 @@ extern "C" { #define VK_API_VERSION_1_0 VK_MAKE_API_VERSION(0, 1, 0, 0)// Patch version should always be set to 0 // Version of this file -#define VK_HEADER_VERSION 287 +#define VK_HEADER_VERSION 292 // Complete version of this file #define VK_HEADER_VERSION_COMPLETE VK_MAKE_API_VERSION(0, 1, 3, VK_HEADER_VERSION) +// VK_MAKE_VERSION is deprecated, but no reason was given in the API XML // DEPRECATED: This define is deprecated. VK_MAKE_API_VERSION should be used instead. #define VK_MAKE_VERSION(major, minor, patch) \ ((((uint32_t)(major)) << 22U) | (((uint32_t)(minor)) << 12U) | ((uint32_t)(patch))) +// VK_VERSION_MAJOR is deprecated, but no reason was given in the API XML // DEPRECATED: This define is deprecated. VK_API_VERSION_MAJOR should be used instead. #define VK_VERSION_MAJOR(version) ((uint32_t)(version) >> 22U) +// VK_VERSION_MINOR is deprecated, but no reason was given in the API XML // DEPRECATED: This define is deprecated. VK_API_VERSION_MINOR should be used instead. #define VK_VERSION_MINOR(version) (((uint32_t)(version) >> 12U) & 0x3FFU) +// VK_VERSION_PATCH is deprecated, but no reason was given in the API XML // DEPRECATED: This define is deprecated. VK_API_VERSION_PATCH should be used instead. #define VK_VERSION_PATCH(version) ((uint32_t)(version) & 0xFFFU) @@ -193,6 +197,7 @@ typedef enum VkResult { VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS_KHR = VK_ERROR_INVALID_OPAQUE_CAPTURE_ADDRESS, VK_PIPELINE_COMPILE_REQUIRED_EXT = VK_PIPELINE_COMPILE_REQUIRED, VK_ERROR_PIPELINE_COMPILE_REQUIRED_EXT = VK_PIPELINE_COMPILE_REQUIRED, + // VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT is a deprecated alias VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT = VK_INCOMPATIBLE_SHADER_BINARY_EXT, VK_RESULT_MAX_ENUM = 0x7FFFFFFF } VkResult; @@ -1031,6 +1036,9 @@ typedef enum VkStructureType { VK_STRUCTURE_TYPE_IMAGE_SUBRESOURCE_2_KHR = 1000338003, VK_STRUCTURE_TYPE_PIPELINE_CREATE_FLAGS_2_CREATE_INFO_KHR = 1000470005, VK_STRUCTURE_TYPE_BUFFER_USAGE_FLAGS_2_CREATE_INFO_KHR = 1000470006, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ANTI_LAG_FEATURES_AMD = 1000476000, + VK_STRUCTURE_TYPE_ANTI_LAG_DATA_AMD = 1000476001, + VK_STRUCTURE_TYPE_ANTI_LAG_PRESENTATION_INFO_AMD = 1000476002, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_POSITION_FETCH_FEATURES_KHR = 1000481000, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_OBJECT_FEATURES_EXT = 1000482000, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_OBJECT_PROPERTIES_EXT = 1000482001, @@ -1112,6 +1120,12 @@ typedef enum VkStructureType { VK_STRUCTURE_TYPE_BIND_DESCRIPTOR_BUFFER_EMBEDDED_SAMPLERS_INFO_EXT = 1000545008, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_POOL_OVERALLOCATION_FEATURES_NV = 1000546000, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAW_ACCESS_CHAINS_FEATURES_NV = 1000555000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_RELAXED_EXTENDED_INSTRUCTION_FEATURES_KHR = 1000558000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_7_FEATURES_KHR = 1000562000, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_7_PROPERTIES_KHR = 1000562001, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LAYERED_API_PROPERTIES_LIST_KHR = 1000562002, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LAYERED_API_PROPERTIES_KHR = 1000562003, + VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LAYERED_API_VULKAN_PROPERTIES_KHR = 1000562004, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT16_VECTOR_FEATURES_NV = 1000563000, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_REPLICATED_COMPOSITES_FEATURES_EXT = 1000564000, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_VALIDATION_FEATURES_NV = 1000568000, @@ -1120,6 +1134,7 @@ typedef enum VkStructureType { VK_STRUCTURE_TYPE_IMAGE_ALIGNMENT_CONTROL_CREATE_INFO_MESA = 1000575002, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTER_FEATURES = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETER_FEATURES = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETERS_FEATURES, + // VK_STRUCTURE_TYPE_DEBUG_REPORT_CREATE_INFO_EXT is a deprecated alias VK_STRUCTURE_TYPE_DEBUG_REPORT_CREATE_INFO_EXT = VK_STRUCTURE_TYPE_DEBUG_REPORT_CALLBACK_CREATE_INFO_EXT, VK_STRUCTURE_TYPE_RENDERING_INFO_KHR = VK_STRUCTURE_TYPE_RENDERING_INFO, VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO_KHR = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO, @@ -1164,6 +1179,7 @@ typedef enum VkStructureType { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES, VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO_KHR = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO, + // VK_STRUCTURE_TYPE_SURFACE_CAPABILITIES2_EXT is a deprecated alias VK_STRUCTURE_TYPE_SURFACE_CAPABILITIES2_EXT = VK_STRUCTURE_TYPE_SURFACE_CAPABILITIES_2_EXT, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGELESS_FRAMEBUFFER_FEATURES_KHR = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGELESS_FRAMEBUFFER_FEATURES, VK_STRUCTURE_TYPE_FRAMEBUFFER_ATTACHMENTS_CREATE_INFO_KHR = VK_STRUCTURE_TYPE_FRAMEBUFFER_ATTACHMENTS_CREATE_INFO, @@ -1233,6 +1249,7 @@ typedef enum VkStructureType { VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO, VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO_KHR = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO, VK_STRUCTURE_TYPE_SEMAPHORE_SIGNAL_INFO_KHR = VK_STRUCTURE_TYPE_SEMAPHORE_SIGNAL_INFO, + // VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO_INTEL is a deprecated alias VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO_INTEL = VK_STRUCTURE_TYPE_QUERY_POOL_PERFORMANCE_QUERY_CREATE_INFO_INTEL, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_MEMORY_MODEL_FEATURES_KHR = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_MEMORY_MODEL_FEATURES, VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_TERMINATE_INVOCATION_FEATURES_KHR = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_TERMINATE_INVOCATION_FEATURES, @@ -1740,6 +1757,7 @@ typedef enum VkFormat { VK_FORMAT_G16_B16R16_2PLANE_444_UNORM_EXT = VK_FORMAT_G16_B16R16_2PLANE_444_UNORM, VK_FORMAT_A4R4G4B4_UNORM_PACK16_EXT = VK_FORMAT_A4R4G4B4_UNORM_PACK16, VK_FORMAT_A4B4G4R4_UNORM_PACK16_EXT = VK_FORMAT_A4B4G4R4_UNORM_PACK16, + // VK_FORMAT_R16G16_S10_5_NV is a deprecated alias VK_FORMAT_R16G16_S10_5_NV = VK_FORMAT_R16G16_SFIXED5_NV, VK_FORMAT_MAX_ENUM = 0x7FFFFFFF } VkFormat; @@ -2091,6 +2109,7 @@ typedef enum VkSamplerAddressMode { VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE = 2, VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER = 3, VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE = 4, + // VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE_KHR is a deprecated alias VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE_KHR = VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE, VK_SAMPLER_ADDRESS_MODE_MAX_ENUM = 0x7FFFFFFF } VkSamplerAddressMode; @@ -2174,7 +2193,8 @@ typedef enum VkIndexType { typedef enum VkSubpassContents { VK_SUBPASS_CONTENTS_INLINE = 0, VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS = 1, - VK_SUBPASS_CONTENTS_INLINE_AND_SECONDARY_COMMAND_BUFFERS_EXT = 1000451000, + VK_SUBPASS_CONTENTS_INLINE_AND_SECONDARY_COMMAND_BUFFERS_KHR = 1000451000, + VK_SUBPASS_CONTENTS_INLINE_AND_SECONDARY_COMMAND_BUFFERS_EXT = VK_SUBPASS_CONTENTS_INLINE_AND_SECONDARY_COMMAND_BUFFERS_KHR, VK_SUBPASS_CONTENTS_MAX_ENUM = 0x7FFFFFFF } VkSubpassContents; @@ -2615,7 +2635,9 @@ typedef enum VkPipelineCreateFlagBits { VK_PIPELINE_CREATE_NO_PROTECTED_ACCESS_BIT_EXT = 0x08000000, VK_PIPELINE_CREATE_PROTECTED_ACCESS_ONLY_BIT_EXT = 0x40000000, VK_PIPELINE_CREATE_DISPATCH_BASE = VK_PIPELINE_CREATE_DISPATCH_BASE_BIT, + // VK_PIPELINE_RASTERIZATION_STATE_CREATE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR is a deprecated alias VK_PIPELINE_RASTERIZATION_STATE_CREATE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR = VK_PIPELINE_CREATE_RENDERING_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR, + // VK_PIPELINE_RASTERIZATION_STATE_CREATE_FRAGMENT_DENSITY_MAP_ATTACHMENT_BIT_EXT is a deprecated alias VK_PIPELINE_RASTERIZATION_STATE_CREATE_FRAGMENT_DENSITY_MAP_ATTACHMENT_BIT_EXT = VK_PIPELINE_CREATE_RENDERING_FRAGMENT_DENSITY_MAP_ATTACHMENT_BIT_EXT, VK_PIPELINE_CREATE_VIEW_INDEX_FROM_DEVICE_INDEX_BIT_KHR = VK_PIPELINE_CREATE_VIEW_INDEX_FROM_DEVICE_INDEX_BIT, VK_PIPELINE_CREATE_DISPATCH_BASE_KHR = VK_PIPELINE_CREATE_DISPATCH_BASE, @@ -2824,6 +2846,7 @@ typedef enum VkStencilFaceFlagBits { VK_STENCIL_FACE_FRONT_BIT = 0x00000001, VK_STENCIL_FACE_BACK_BIT = 0x00000002, VK_STENCIL_FACE_FRONT_AND_BACK = 0x00000003, + // VK_STENCIL_FRONT_AND_BACK is a deprecated alias VK_STENCIL_FRONT_AND_BACK = VK_STENCIL_FACE_FRONT_AND_BACK, VK_STENCIL_FACE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF } VkStencilFaceFlagBits; @@ -3234,7 +3257,9 @@ typedef struct VkDeviceCreateInfo { VkDeviceCreateFlags flags; uint32_t queueCreateInfoCount; const VkDeviceQueueCreateInfo* pQueueCreateInfos; + // enabledLayerCount is deprecated and should not be used uint32_t enabledLayerCount; + // ppEnabledLayerNames is deprecated and should not be used const char* const* ppEnabledLayerNames; uint32_t enabledExtensionCount; const char* const* ppEnabledExtensionNames; @@ -6626,6 +6651,7 @@ static const VkPipelineStageFlagBits2 VK_PIPELINE_STAGE_2_MESH_SHADER_BIT_NV = 0 static const VkPipelineStageFlagBits2 VK_PIPELINE_STAGE_2_TASK_SHADER_BIT_EXT = 0x00080000ULL; static const VkPipelineStageFlagBits2 VK_PIPELINE_STAGE_2_MESH_SHADER_BIT_EXT = 0x00100000ULL; static const VkPipelineStageFlagBits2 VK_PIPELINE_STAGE_2_SUBPASS_SHADER_BIT_HUAWEI = 0x8000000000ULL; +// VK_PIPELINE_STAGE_2_SUBPASS_SHADING_BIT_HUAWEI is a deprecated alias static const VkPipelineStageFlagBits2 VK_PIPELINE_STAGE_2_SUBPASS_SHADING_BIT_HUAWEI = 0x8000000000ULL; static const VkPipelineStageFlagBits2 VK_PIPELINE_STAGE_2_INVOCATION_MASK_BIT_HUAWEI = 0x10000000000ULL; static const VkPipelineStageFlagBits2 VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_COPY_BIT_KHR = 0x10000000ULL; @@ -6717,11 +6743,12 @@ typedef enum VkRenderingFlagBits { VK_RENDERING_CONTENTS_SECONDARY_COMMAND_BUFFERS_BIT = 0x00000001, VK_RENDERING_SUSPENDING_BIT = 0x00000002, VK_RENDERING_RESUMING_BIT = 0x00000004, - VK_RENDERING_CONTENTS_INLINE_BIT_EXT = 0x00000010, VK_RENDERING_ENABLE_LEGACY_DITHERING_BIT_EXT = 0x00000008, + VK_RENDERING_CONTENTS_INLINE_BIT_KHR = 0x00000010, VK_RENDERING_CONTENTS_SECONDARY_COMMAND_BUFFERS_BIT_KHR = VK_RENDERING_CONTENTS_SECONDARY_COMMAND_BUFFERS_BIT, VK_RENDERING_SUSPENDING_BIT_KHR = VK_RENDERING_SUSPENDING_BIT, VK_RENDERING_RESUMING_BIT_KHR = VK_RENDERING_RESUMING_BIT, + VK_RENDERING_CONTENTS_INLINE_BIT_EXT = VK_RENDERING_CONTENTS_INLINE_BIT_KHR, VK_RENDERING_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF } VkRenderingFlagBits; typedef VkFlags VkRenderingFlags; @@ -7585,7 +7612,9 @@ typedef enum VkColorSpaceKHR { VK_COLOR_SPACE_PASS_THROUGH_EXT = 1000104013, VK_COLOR_SPACE_EXTENDED_SRGB_NONLINEAR_EXT = 1000104014, VK_COLOR_SPACE_DISPLAY_NATIVE_AMD = 1000213000, + // VK_COLORSPACE_SRGB_NONLINEAR_KHR is a deprecated alias VK_COLORSPACE_SRGB_NONLINEAR_KHR = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR, + // VK_COLOR_SPACE_DCI_P3_LINEAR_EXT is a deprecated alias VK_COLOR_SPACE_DCI_P3_LINEAR_EXT = VK_COLOR_SPACE_DISPLAY_P3_LINEAR_EXT, VK_COLOR_SPACE_MAX_ENUM_KHR = 0x7FFFFFFF } VkColorSpaceKHR; @@ -9016,7 +9045,9 @@ VKAPI_ATTR void VKAPI_CALL vkCmdDispatchBaseKHR( #define VK_KHR_maintenance1 1 #define VK_KHR_MAINTENANCE_1_SPEC_VERSION 2 #define VK_KHR_MAINTENANCE_1_EXTENSION_NAME "VK_KHR_maintenance1" +// VK_KHR_MAINTENANCE1_SPEC_VERSION is a deprecated alias #define VK_KHR_MAINTENANCE1_SPEC_VERSION VK_KHR_MAINTENANCE_1_SPEC_VERSION +// VK_KHR_MAINTENANCE1_EXTENSION_NAME is a deprecated alias #define VK_KHR_MAINTENANCE1_EXTENSION_NAME VK_KHR_MAINTENANCE_1_EXTENSION_NAME typedef VkCommandPoolTrimFlags VkCommandPoolTrimFlagsKHR; @@ -9498,8 +9529,11 @@ typedef enum VkPerformanceCounterScopeKHR { VK_PERFORMANCE_COUNTER_SCOPE_COMMAND_BUFFER_KHR = 0, VK_PERFORMANCE_COUNTER_SCOPE_RENDER_PASS_KHR = 1, VK_PERFORMANCE_COUNTER_SCOPE_COMMAND_KHR = 2, + // VK_QUERY_SCOPE_COMMAND_BUFFER_KHR is a deprecated alias VK_QUERY_SCOPE_COMMAND_BUFFER_KHR = VK_PERFORMANCE_COUNTER_SCOPE_COMMAND_BUFFER_KHR, + // VK_QUERY_SCOPE_RENDER_PASS_KHR is a deprecated alias VK_QUERY_SCOPE_RENDER_PASS_KHR = VK_PERFORMANCE_COUNTER_SCOPE_RENDER_PASS_KHR, + // VK_QUERY_SCOPE_COMMAND_KHR is a deprecated alias VK_QUERY_SCOPE_COMMAND_KHR = VK_PERFORMANCE_COUNTER_SCOPE_COMMAND_KHR, VK_PERFORMANCE_COUNTER_SCOPE_MAX_ENUM_KHR = 0x7FFFFFFF } VkPerformanceCounterScopeKHR; @@ -9517,7 +9551,9 @@ typedef enum VkPerformanceCounterStorageKHR { typedef enum VkPerformanceCounterDescriptionFlagBitsKHR { VK_PERFORMANCE_COUNTER_DESCRIPTION_PERFORMANCE_IMPACTING_BIT_KHR = 0x00000001, VK_PERFORMANCE_COUNTER_DESCRIPTION_CONCURRENTLY_IMPACTED_BIT_KHR = 0x00000002, + // VK_PERFORMANCE_COUNTER_DESCRIPTION_PERFORMANCE_IMPACTING_KHR is a deprecated alias VK_PERFORMANCE_COUNTER_DESCRIPTION_PERFORMANCE_IMPACTING_KHR = VK_PERFORMANCE_COUNTER_DESCRIPTION_PERFORMANCE_IMPACTING_BIT_KHR, + // VK_PERFORMANCE_COUNTER_DESCRIPTION_CONCURRENTLY_IMPACTED_KHR is a deprecated alias VK_PERFORMANCE_COUNTER_DESCRIPTION_CONCURRENTLY_IMPACTED_KHR = VK_PERFORMANCE_COUNTER_DESCRIPTION_CONCURRENTLY_IMPACTED_BIT_KHR, VK_PERFORMANCE_COUNTER_DESCRIPTION_FLAG_BITS_MAX_ENUM_KHR = 0x7FFFFFFF } VkPerformanceCounterDescriptionFlagBitsKHR; @@ -9619,7 +9655,9 @@ VKAPI_ATTR void VKAPI_CALL vkReleaseProfilingLockKHR( #define VK_KHR_maintenance2 1 #define VK_KHR_MAINTENANCE_2_SPEC_VERSION 1 #define VK_KHR_MAINTENANCE_2_EXTENSION_NAME "VK_KHR_maintenance2" +// VK_KHR_MAINTENANCE2_SPEC_VERSION is a deprecated alias #define VK_KHR_MAINTENANCE2_SPEC_VERSION VK_KHR_MAINTENANCE_2_SPEC_VERSION +// VK_KHR_MAINTENANCE2_EXTENSION_NAME is a deprecated alias #define VK_KHR_MAINTENANCE2_EXTENSION_NAME VK_KHR_MAINTENANCE_2_EXTENSION_NAME typedef VkPointClippingBehavior VkPointClippingBehaviorKHR; @@ -9886,7 +9924,9 @@ VKAPI_ATTR VkResult VKAPI_CALL vkBindImageMemory2KHR( #define VK_KHR_maintenance3 1 #define VK_KHR_MAINTENANCE_3_SPEC_VERSION 1 #define VK_KHR_MAINTENANCE_3_EXTENSION_NAME "VK_KHR_maintenance3" +// VK_KHR_MAINTENANCE3_SPEC_VERSION is a deprecated alias #define VK_KHR_MAINTENANCE3_SPEC_VERSION VK_KHR_MAINTENANCE_3_SPEC_VERSION +// VK_KHR_MAINTENANCE3_EXTENSION_NAME is a deprecated alias #define VK_KHR_MAINTENANCE3_EXTENSION_NAME VK_KHR_MAINTENANCE_3_EXTENSION_NAME typedef VkPhysicalDeviceMaintenance3Properties VkPhysicalDeviceMaintenance3PropertiesKHR; @@ -10614,10 +10654,6 @@ typedef enum VkVideoEncodeTuningModeKHR { VK_VIDEO_ENCODE_TUNING_MODE_LOSSLESS_KHR = 4, VK_VIDEO_ENCODE_TUNING_MODE_MAX_ENUM_KHR = 0x7FFFFFFF } VkVideoEncodeTuningModeKHR; - -typedef enum VkVideoEncodeFlagBitsKHR { - VK_VIDEO_ENCODE_FLAG_BITS_MAX_ENUM_KHR = 0x7FFFFFFF -} VkVideoEncodeFlagBitsKHR; typedef VkFlags VkVideoEncodeFlagsKHR; typedef enum VkVideoEncodeCapabilityFlagBitsKHR { @@ -11709,6 +11745,74 @@ VKAPI_ATTR void VKAPI_CALL vkCmdBindDescriptorBufferEmbeddedSamplers2EXT( #endif +// VK_KHR_shader_relaxed_extended_instruction is a preprocessor guard. Do not pass it to API calls. +#define VK_KHR_shader_relaxed_extended_instruction 1 +#define VK_KHR_SHADER_RELAXED_EXTENDED_INSTRUCTION_SPEC_VERSION 1 +#define VK_KHR_SHADER_RELAXED_EXTENDED_INSTRUCTION_EXTENSION_NAME "VK_KHR_shader_relaxed_extended_instruction" +typedef struct VkPhysicalDeviceShaderRelaxedExtendedInstructionFeaturesKHR { + VkStructureType sType; + void* pNext; + VkBool32 shaderRelaxedExtendedInstruction; +} VkPhysicalDeviceShaderRelaxedExtendedInstructionFeaturesKHR; + + + +// VK_KHR_maintenance7 is a preprocessor guard. Do not pass it to API calls. +#define VK_KHR_maintenance7 1 +#define VK_KHR_MAINTENANCE_7_SPEC_VERSION 1 +#define VK_KHR_MAINTENANCE_7_EXTENSION_NAME "VK_KHR_maintenance7" + +typedef enum VkPhysicalDeviceLayeredApiKHR { + VK_PHYSICAL_DEVICE_LAYERED_API_VULKAN_KHR = 0, + VK_PHYSICAL_DEVICE_LAYERED_API_D3D12_KHR = 1, + VK_PHYSICAL_DEVICE_LAYERED_API_METAL_KHR = 2, + VK_PHYSICAL_DEVICE_LAYERED_API_OPENGL_KHR = 3, + VK_PHYSICAL_DEVICE_LAYERED_API_OPENGLES_KHR = 4, + VK_PHYSICAL_DEVICE_LAYERED_API_MAX_ENUM_KHR = 0x7FFFFFFF +} VkPhysicalDeviceLayeredApiKHR; +typedef struct VkPhysicalDeviceMaintenance7FeaturesKHR { + VkStructureType sType; + void* pNext; + VkBool32 maintenance7; +} VkPhysicalDeviceMaintenance7FeaturesKHR; + +typedef struct VkPhysicalDeviceMaintenance7PropertiesKHR { + VkStructureType sType; + void* pNext; + VkBool32 robustFragmentShadingRateAttachmentAccess; + VkBool32 separateDepthStencilAttachmentAccess; + uint32_t maxDescriptorSetTotalUniformBuffersDynamic; + uint32_t maxDescriptorSetTotalStorageBuffersDynamic; + uint32_t maxDescriptorSetTotalBuffersDynamic; + uint32_t maxDescriptorSetUpdateAfterBindTotalUniformBuffersDynamic; + uint32_t maxDescriptorSetUpdateAfterBindTotalStorageBuffersDynamic; + uint32_t maxDescriptorSetUpdateAfterBindTotalBuffersDynamic; +} VkPhysicalDeviceMaintenance7PropertiesKHR; + +typedef struct VkPhysicalDeviceLayeredApiPropertiesKHR { + VkStructureType sType; + void* pNext; + uint32_t vendorID; + uint32_t deviceID; + VkPhysicalDeviceLayeredApiKHR layeredAPI; + char deviceName[VK_MAX_PHYSICAL_DEVICE_NAME_SIZE]; +} VkPhysicalDeviceLayeredApiPropertiesKHR; + +typedef struct VkPhysicalDeviceLayeredApiPropertiesListKHR { + VkStructureType sType; + void* pNext; + uint32_t layeredApiCount; + VkPhysicalDeviceLayeredApiPropertiesKHR* pLayeredApis; +} VkPhysicalDeviceLayeredApiPropertiesListKHR; + +typedef struct VkPhysicalDeviceLayeredApiVulkanPropertiesKHR { + VkStructureType sType; + void* pNext; + VkPhysicalDeviceProperties2 properties; +} VkPhysicalDeviceLayeredApiVulkanPropertiesKHR; + + + // VK_EXT_debug_report is a preprocessor guard. Do not pass it to API calls. #define VK_EXT_debug_report 1 VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkDebugReportCallbackEXT) @@ -11757,7 +11861,9 @@ typedef enum VkDebugReportObjectTypeEXT { VK_DEBUG_REPORT_OBJECT_TYPE_CUDA_MODULE_NV_EXT = 1000307000, VK_DEBUG_REPORT_OBJECT_TYPE_CUDA_FUNCTION_NV_EXT = 1000307001, VK_DEBUG_REPORT_OBJECT_TYPE_BUFFER_COLLECTION_FUCHSIA_EXT = 1000366000, + // VK_DEBUG_REPORT_OBJECT_TYPE_DEBUG_REPORT_EXT is a deprecated alias VK_DEBUG_REPORT_OBJECT_TYPE_DEBUG_REPORT_EXT = VK_DEBUG_REPORT_OBJECT_TYPE_DEBUG_REPORT_CALLBACK_EXT_EXT, + // VK_DEBUG_REPORT_OBJECT_TYPE_VALIDATION_CACHE_EXT is a deprecated alias VK_DEBUG_REPORT_OBJECT_TYPE_VALIDATION_CACHE_EXT = VK_DEBUG_REPORT_OBJECT_TYPE_VALIDATION_CACHE_EXT_EXT, VK_DEBUG_REPORT_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_KHR_EXT = VK_DEBUG_REPORT_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_EXT, VK_DEBUG_REPORT_OBJECT_TYPE_SAMPLER_YCBCR_CONVERSION_KHR_EXT = VK_DEBUG_REPORT_OBJECT_TYPE_SAMPLER_YCBCR_CONVERSION_EXT, @@ -12529,6 +12635,7 @@ VKAPI_ATTR VkResult VKAPI_CALL vkReleaseDisplayEXT( typedef enum VkSurfaceCounterFlagBitsEXT { VK_SURFACE_COUNTER_VBLANK_BIT_EXT = 0x00000001, + // VK_SURFACE_COUNTER_VBLANK_EXT is a deprecated alias VK_SURFACE_COUNTER_VBLANK_EXT = VK_SURFACE_COUNTER_VBLANK_BIT_EXT, VK_SURFACE_COUNTER_FLAG_BITS_MAX_ENUM_EXT = 0x7FFFFFFF } VkSurfaceCounterFlagBitsEXT; @@ -12697,7 +12804,9 @@ VKAPI_ATTR VkResult VKAPI_CALL vkGetPastPresentationTimingGOOGLE( #define VK_NV_viewport_array2 1 #define VK_NV_VIEWPORT_ARRAY_2_SPEC_VERSION 1 #define VK_NV_VIEWPORT_ARRAY_2_EXTENSION_NAME "VK_NV_viewport_array2" +// VK_NV_VIEWPORT_ARRAY2_SPEC_VERSION is a deprecated alias #define VK_NV_VIEWPORT_ARRAY2_SPEC_VERSION VK_NV_VIEWPORT_ARRAY_2_SPEC_VERSION +// VK_NV_VIEWPORT_ARRAY2_EXTENSION_NAME is a deprecated alias #define VK_NV_VIEWPORT_ARRAY2_EXTENSION_NAME VK_NV_VIEWPORT_ARRAY_2_EXTENSION_NAME @@ -12852,13 +12961,13 @@ typedef struct VkPipelineRasterizationDepthClipStateCreateInfoEXT { // VK_EXT_swapchain_colorspace is a preprocessor guard. Do not pass it to API calls. #define VK_EXT_swapchain_colorspace 1 -#define VK_EXT_SWAPCHAIN_COLOR_SPACE_SPEC_VERSION 4 +#define VK_EXT_SWAPCHAIN_COLOR_SPACE_SPEC_VERSION 5 #define VK_EXT_SWAPCHAIN_COLOR_SPACE_EXTENSION_NAME "VK_EXT_swapchain_colorspace" // VK_EXT_hdr_metadata is a preprocessor guard. Do not pass it to API calls. #define VK_EXT_hdr_metadata 1 -#define VK_EXT_HDR_METADATA_SPEC_VERSION 2 +#define VK_EXT_HDR_METADATA_SPEC_VERSION 3 #define VK_EXT_HDR_METADATA_EXTENSION_NAME "VK_EXT_hdr_metadata" typedef struct VkXYColorEXT { float x; @@ -14545,7 +14654,9 @@ typedef VkPhysicalDeviceScalarBlockLayoutFeatures VkPhysicalDeviceScalarBlockLay #define VK_GOOGLE_hlsl_functionality1 1 #define VK_GOOGLE_HLSL_FUNCTIONALITY_1_SPEC_VERSION 1 #define VK_GOOGLE_HLSL_FUNCTIONALITY_1_EXTENSION_NAME "VK_GOOGLE_hlsl_functionality1" +// VK_GOOGLE_HLSL_FUNCTIONALITY1_SPEC_VERSION is a deprecated alias #define VK_GOOGLE_HLSL_FUNCTIONALITY1_SPEC_VERSION VK_GOOGLE_HLSL_FUNCTIONALITY_1_SPEC_VERSION +// VK_GOOGLE_HLSL_FUNCTIONALITY1_EXTENSION_NAME is a deprecated alias #define VK_GOOGLE_HLSL_FUNCTIONALITY1_EXTENSION_NAME VK_GOOGLE_HLSL_FUNCTIONALITY_1_EXTENSION_NAME @@ -16062,14 +16173,14 @@ typedef struct VkDescriptorAddressInfoEXT { typedef struct VkDescriptorBufferBindingInfoEXT { VkStructureType sType; - void* pNext; + const void* pNext; VkDeviceAddress address; VkBufferUsageFlags usage; } VkDescriptorBufferBindingInfoEXT; typedef struct VkDescriptorBufferBindingPushDescriptorBufferHandleEXT { VkStructureType sType; - void* pNext; + const void* pNext; VkBuffer buffer; } VkDescriptorBufferBindingPushDescriptorBufferHandleEXT; @@ -17701,7 +17812,7 @@ typedef struct VkRenderPassStripeSubmitInfoARM { // VK_QCOM_fragment_density_map_offset is a preprocessor guard. Do not pass it to API calls. #define VK_QCOM_fragment_density_map_offset 1 -#define VK_QCOM_FRAGMENT_DENSITY_MAP_OFFSET_SPEC_VERSION 1 +#define VK_QCOM_FRAGMENT_DENSITY_MAP_OFFSET_SPEC_VERSION 2 #define VK_QCOM_FRAGMENT_DENSITY_MAP_OFFSET_EXTENSION_NAME "VK_QCOM_fragment_density_map_offset" typedef struct VkPhysicalDeviceFragmentDensityMapOffsetFeaturesQCOM { VkStructureType sType; @@ -18534,6 +18645,53 @@ typedef struct VkPhysicalDevicePipelineProtectedAccessFeaturesEXT { +// VK_AMD_anti_lag is a preprocessor guard. Do not pass it to API calls. +#define VK_AMD_anti_lag 1 +#define VK_AMD_ANTI_LAG_SPEC_VERSION 1 +#define VK_AMD_ANTI_LAG_EXTENSION_NAME "VK_AMD_anti_lag" + +typedef enum VkAntiLagModeAMD { + VK_ANTI_LAG_MODE_DRIVER_CONTROL_AMD = 0, + VK_ANTI_LAG_MODE_ON_AMD = 1, + VK_ANTI_LAG_MODE_OFF_AMD = 2, + VK_ANTI_LAG_MODE_MAX_ENUM_AMD = 0x7FFFFFFF +} VkAntiLagModeAMD; + +typedef enum VkAntiLagStageAMD { + VK_ANTI_LAG_STAGE_INPUT_AMD = 0, + VK_ANTI_LAG_STAGE_PRESENT_AMD = 1, + VK_ANTI_LAG_STAGE_MAX_ENUM_AMD = 0x7FFFFFFF +} VkAntiLagStageAMD; +typedef struct VkPhysicalDeviceAntiLagFeaturesAMD { + VkStructureType sType; + void* pNext; + VkBool32 antiLag; +} VkPhysicalDeviceAntiLagFeaturesAMD; + +typedef struct VkAntiLagPresentationInfoAMD { + VkStructureType sType; + void* pNext; + VkAntiLagStageAMD stage; + uint64_t frameIndex; +} VkAntiLagPresentationInfoAMD; + +typedef struct VkAntiLagDataAMD { + VkStructureType sType; + const void* pNext; + VkAntiLagModeAMD mode; + uint32_t maxFPS; + const VkAntiLagPresentationInfoAMD* pPresentationInfo; +} VkAntiLagDataAMD; + +typedef void (VKAPI_PTR *PFN_vkAntiLagUpdateAMD)(VkDevice device, const VkAntiLagDataAMD* pData); + +#ifndef VK_NO_PROTOTYPES +VKAPI_ATTR void VKAPI_CALL vkAntiLagUpdateAMD( + VkDevice device, + const VkAntiLagDataAMD* pData); +#endif + + // VK_EXT_shader_object is a preprocessor guard. Do not pass it to API calls. #define VK_EXT_shader_object 1 VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkShaderEXT) diff --git a/icd/api/include/pipeline_compiler.h b/icd/api/include/pipeline_compiler.h index 696092a3..a69e97a1 100644 --- a/icd/api/include/pipeline_compiler.h +++ b/icd/api/include/pipeline_compiler.h @@ -162,10 +162,6 @@ class PipelineCompiler const Vkgc::BinaryData& shaderBinary, ShaderModuleHandle* pShaderModule); - void TryEarlyCompileShaderModule( - const Device* pDevice, - ShaderModuleHandle* pModule); - bool IsValidShaderModule( const ShaderModuleHandle* pShaderModule) const; @@ -270,7 +266,6 @@ class PipelineCompiler void FreeGraphicsPipelineCreateInfo( Device* pDevice, GraphicsPipelineBinaryCreateInfo* pCreateInfo, - bool keepConvertTempMem, bool keepInternalMem); #if VKI_RAY_TRACING diff --git a/icd/api/include/vk_cmdbuffer.h b/icd/api/include/vk_cmdbuffer.h index 7ff18c47..442c2c49 100644 --- a/icd/api/include/vk_cmdbuffer.h +++ b/icd/api/include/vk_cmdbuffer.h @@ -79,6 +79,7 @@ namespace vk class ComputePipeline; class Device; class ApiCmdBuffer; +class CmdBuffer; class Framebuffer; class GraphicsPipeline; class Image; @@ -102,6 +103,15 @@ constexpr uint8_t DefaultTidValue = 0xFF; constexpr uint8_t AssociatedFlag = 31; constexpr uint32_t DefaultAssociatedFlagValue = (1 << AssociatedFlag); +#if VKI_RAY_TRACING +typedef void (*PFN_traceRaysDispatchPerDevice)( + CmdBuffer* pCmdBuffer, + uint32_t deviceIdx, + uint32_t width, + uint32_t height, + uint32_t depth); +#endif + // Internal API pipeline binding points enum PipelineBindPoint { @@ -218,6 +228,9 @@ struct PerGpuRenderState // backendSize is normally considered as scratch size. Pal::CompilerStackSizes maxPipelineStackSizes; + // Dynamic pipeline stack size that is set by API. + uint32_t dynamicPipelineStackSize; + // VB bindings in source non-SRD form Pal::BufferViewInfo vbBindings[Pal::MaxVertexBuffers]; @@ -409,6 +422,9 @@ class CmdBuffer gpusize sizeInBytes, InternalSubAllocPool poolId, InternalMemory** ppInternalMemory); + + void SetTraceRaysDispatchPerDevice(PFN_traceRaysDispatchPerDevice pfnRTDispatch) + { m_pfnTraceRaysDispatchPerDevice = pfnRTDispatch; } #endif VkResult End(void); @@ -1323,6 +1339,8 @@ class CmdBuffer SqttCmdBufferState* GetSqttState() { return m_pSqttState; } + uint64_t GetUserMarkerContextValue() const; + static bool IsStaticStateDifferent( uint32_t currentToken, uint32_t newToken) @@ -1599,6 +1617,7 @@ class CmdBuffer void RPBeginSubpass(); void RPEndSubpass(); void RPResolveAttachments(uint32_t count, const RPResolveInfo* pResolves); + void RPResolveMsaa(const RPResolveInfo& params); void RPSyncPoint(const RPSyncPointInfo& syncPoint, VirtualStackFrame* pVirtStack); void RPSyncPointLegacy(const RPSyncPointInfo& syncPoint, VirtualStackFrame* pVirtStack); void RPLoadOpClearColor(uint32_t count, const RPLoadOpClearInfo* pClears); @@ -1842,6 +1861,13 @@ class CmdBuffer uint32_t height, uint32_t depth); + static void TraceRaysDispatchPerDevice( + CmdBuffer* pCmdBuffer, + uint32_t deviceIdx, + uint32_t width, + uint32_t height, + uint32_t depth); + void TraceRaysIndirectPerDevice( const uint32_t deviceIdx, GpuRt::ExecuteIndirectArgType indirectArgType, @@ -1849,7 +1875,8 @@ class CmdBuffer const VkStridedDeviceAddressRegionKHR& missShaderBindingTable, const VkStridedDeviceAddressRegionKHR& hitShaderBindingTable, const VkStridedDeviceAddressRegionKHR& callableShaderBindingTable, - VkDeviceAddress indirectDeviceAddress); + VkDeviceAddress indirectDeviceAddress, + uint64_t userMarkerContext); void GetRayTracingDispatchArgs( uint32_t deviceIdx, @@ -1993,6 +2020,10 @@ class CmdBuffer typedef Util::Vector PatchCpsVector; PatchCpsVector m_patchCpsList[MaxPalDevices]; #endif + +#if VKI_RAY_TRACING + PFN_traceRaysDispatchPerDevice m_pfnTraceRaysDispatchPerDevice; +#endif }; // ===================================================================================================================== @@ -2953,6 +2984,10 @@ VKAPI_ATTR void VKAPI_CALL vkCmdSetRenderingInputAttachmentIndicesKHR( VkCommandBuffer commandBuffer, const VkRenderingInputAttachmentIndexInfoKHR* pInputAttachmentIndexInfo); +VKAPI_ATTR void VKAPI_CALL vkCmdSetDepthBias2EXT( + VkCommandBuffer commandBuffer, + const VkDepthBiasInfoEXT* pDepthBiasInfo); + } // namespace entry } // namespace vk diff --git a/icd/api/include/vk_compute_pipeline.h b/icd/api/include/vk_compute_pipeline.h index 2e6e80b3..c2c0bddc 100644 --- a/icd/api/include/vk_compute_pipeline.h +++ b/icd/api/include/vk_compute_pipeline.h @@ -91,8 +91,6 @@ class ComputePipeline final : public Pipeline, public NonDispatchable return m_pPalEvents[deviceIdx]; } - VK_FORCEINLINE uint32 GetSyncToken() const + VK_FORCEINLINE Pal::ReleaseToken GetSyncToken() const { return m_syncToken; } - VK_FORCEINLINE void SetSyncToken(uint32 syncToken) + VK_FORCEINLINE void SetSyncToken(Pal::ReleaseToken syncToken) { m_syncToken = syncToken; } @@ -108,7 +108,7 @@ class Event final : public NonDispatchable union { Pal::IGpuEvent* m_pPalEvents[MaxPalDevices]; - uint32 m_syncToken; + Pal::ReleaseToken m_syncToken; }; InternalMemory m_internalGpuMem; diff --git a/icd/api/include/vk_extensions.h b/icd/api/include/vk_extensions.h index 443ef9ed..fdbc0b35 100644 --- a/icd/api/include/vk_extensions.h +++ b/icd/api/include/vk_extensions.h @@ -60,6 +60,8 @@ #define VK_KHR_MAINTENANCE5_SPEC_VERSION VK_KHR_MAINTENANCE_5_SPEC_VERSION #define VK_KHR_MAINTENANCE6_EXTENSION_NAME VK_KHR_MAINTENANCE_6_EXTENSION_NAME #define VK_KHR_MAINTENANCE6_SPEC_VERSION VK_KHR_MAINTENANCE_6_SPEC_VERSION +#define VK_KHR_MAINTENANCE7_EXTENSION_NAME VK_KHR_MAINTENANCE_7_EXTENSION_NAME +#define VK_KHR_MAINTENANCE7_SPEC_VERSION VK_KHR_MAINTENANCE_7_SPEC_VERSION #define VK_KHR_MAP_MEMORY2_SPEC_VERSION VK_KHR_MAP_MEMORY_2_SPEC_VERSION #if VKI_RAY_TRACING #define VK_KHR_RAY_TRACING_MAINTENANCE1_SPEC_VERSION VK_KHR_RAY_TRACING_MAINTENANCE_1_SPEC_VERSION @@ -318,6 +320,7 @@ class DeviceExtensions final : public Extensions KHR_MAINTENANCE4, KHR_MAINTENANCE5, KHR_MAINTENANCE6, + KHR_MAINTENANCE7, KHR_MAP_MEMORY2, KHR_MULTIVIEW, KHR_PIPELINE_EXECUTABLE_PROPERTIES, @@ -372,6 +375,7 @@ class DeviceExtensions final : public Extensions EXT_CONSERVATIVE_RASTERIZATION, EXT_CUSTOM_BORDER_COLOR, EXT_DEBUG_MARKER, + EXT_DEPTH_BIAS_CONTROL, EXT_DEPTH_CLAMP_ZERO_ONE, EXT_DEPTH_CLIP_CONTROL, EXT_DEPTH_CLIP_ENABLE, diff --git a/icd/api/include/vk_graphics_pipeline.h b/icd/api/include/vk_graphics_pipeline.h index 21e69dce..a0e9e55d 100644 --- a/icd/api/include/vk_graphics_pipeline.h +++ b/icd/api/include/vk_graphics_pipeline.h @@ -159,7 +159,7 @@ class GraphicsPipeline final : public GraphicsPipelineCommon, public NonDispatch VkPipeline* pPipeline); static VkResult CreateCacheId( - Device* pDevice, + const Device* pDevice, const VkGraphicsPipelineCreateInfo* pCreateInfo, const GraphicsPipelineExtStructs& extStructs, const GraphicsPipelineLibraryInfo& libInfo, @@ -192,7 +192,7 @@ class GraphicsPipeline final : public GraphicsPipelineCommon, public NonDispatch { return m_pPalColorBlend[deviceIdx]; } const Pal::IPipeline* GetPalPipeline(uint32_t deviceIdx) const - { return UseOptimizedPipeline() ? m_pOptimizedPipeline[deviceIdx] : m_pPalPipeline[deviceIdx]; } + { return m_pPalPipeline[deviceIdx]; } const Pal::IShaderLibrary* GetPalShaderLibrary(GraphicsLibraryType type) const { return m_pPalShaderLibrary[type]; } @@ -215,8 +215,6 @@ class GraphicsPipeline final : public GraphicsPipelineCommon, public NonDispatch bool IsPointSizeUsed() const { return m_flags.isPointSizeUsed; } - static void BindNullPipeline(CmdBuffer* pCmdBuffer); - // Returns value of VK_PIPELINE_CREATE_VIEW_INDEX_FROM_DEVICE_INDEX_BIT // defined by flags member of VkGraphicsPipelineCreateInfo. bool ViewIndexFromDeviceIndex() const @@ -285,15 +283,6 @@ class GraphicsPipeline final : public GraphicsPipelineCommon, public NonDispatch private: PAL_DISALLOW_COPY_AND_ASSIGN(GraphicsPipeline); - VkResult DeferCreateOptimizedPipeline( - Device* pDevice, - PipelineCache* pPipelineCache, - GraphicsPipelineBinaryCreateInfo* pBinaryCreateInfo, - GraphicsPipelineShaderStageInfo* pShaderStageInfo, - GraphicsPipelineObjectCreateInfo* pObjectCreateInfo, - const GraphicsPipelineExtStructs& extStructs, - Util::MetroHash::Hash* pCacheIds); - static VkResult CreatePalPipelineObjects( Device* pDevice, PipelineCache* pPipelineCache, @@ -303,29 +292,6 @@ class GraphicsPipeline final : public GraphicsPipelineCommon, public NonDispatch void* pSystemMem, Pal::IPipeline** pPalPipeline); - void SetOptimizedPipeline(Pal::IPipeline** pPalPipeline); - - bool UseOptimizedPipeline() const - { - bool result = m_info.checkDeferCompilePipeline; - if (result) - { - Util::MutexAuto pipelineSwitchLock(const_cast(&m_pipelineSwitchLock)); - result = m_pOptimizedPipeline[0] != nullptr && m_optimizedPipelineHash != 0; - } - return result; - } - VkResult BuildDeferCompileWorkload( - Device* pDevice, - PipelineCache* pPipelineCache, - GraphicsPipelineBinaryCreateInfo* pBinaryCreateInfo, - GraphicsPipelineShaderStageInfo* pShaderStageInfo, - GraphicsPipelineObjectCreateInfo* pObjectCreateInfo, - const GraphicsPipelineExtStructs& extStructs, - Util::MetroHash::Hash* pCacheIds); - - static void ExecuteDeferCreateOptimizedPipeline(void* pPayload); - GraphicsPipelineObjectImmedInfo m_info; // Immediate state that will go in CmdSet* functions Pal::IMsaaState* m_pPalMsaa[MaxPalDevices]; // PAL MSAA state object Pal::IColorBlendState* m_pPalColorBlend[MaxPalDevices]; // PAL color blend state object @@ -334,10 +300,6 @@ class GraphicsPipeline final : public GraphicsPipelineCommon, public NonDispatch const InternalMemory* m_pInternalMem; // Memory object of internal buffer VbBindingInfo m_vbInfo; // Information about vertex buffer bindings PipelineInternalBufferInfo m_internalBufferInfo; // Information about internal buffer - Pal::IPipeline* m_pOptimizedPipeline[MaxPalDevices]; // Optimized PAL pipelines - uint64_t m_optimizedPipelineHash; // Pipeline hash of optimized PAL pipelines - Util::Mutex m_pipelineSwitchLock; // Lock for optimized pipeline and default pipeline - DeferredCompileWorkload m_deferWorkload; // Workload of deferred compiled GraphicsPipelineObjectFlags m_flags; }; diff --git a/icd/api/include/vk_graphics_pipeline_library.h b/icd/api/include/vk_graphics_pipeline_library.h index 94a70ba9..7647f357 100644 --- a/icd/api/include/vk_graphics_pipeline_library.h +++ b/icd/api/include/vk_graphics_pipeline_library.h @@ -47,6 +47,20 @@ class GraphicsPipelineLibrary final : public GraphicsPipelineCommon, public NonD const VkAllocationCallbacks* pAllocator, VkPipeline* pPipeline); + static VkResult CreateApiPsoHashAndElfHash( + const Device* pDevice, + const VkGraphicsPipelineCreateInfo* pCreateInfo, + const GraphicsPipelineExtStructs& extStructs, + const GraphicsPipelineLibraryInfo& libInfo, + VkPipelineCreateFlags2KHR flags, + GraphicsPipelineShaderStageInfo* pShaderStageInfo, + GraphicsPipelineBinaryCreateInfo* pBinaryCreateInfo, + ShaderOptimizerKey* pShaderOptimizerKeys, + PipelineOptimizerKey* pPipelineOptimizerKey, + uint64_t* pApiPsoHash, + ShaderModuleHandle* pTempModules, + Util::MetroHash::Hash* pElfHash); + VkResult Destroy( Device* pDevice, const VkAllocationCallbacks* pAllocator) override; @@ -88,6 +102,7 @@ class GraphicsPipelineLibrary final : public GraphicsPipelineCommon, public NonD const Device* pDevice, PipelineCache* pPipelineCache, const VkGraphicsPipelineCreateInfo* pCreateInfo, + const GraphicsPipelineExtStructs& extStructs, const GraphicsPipelineLibraryInfo* pLibInfo, const GraphicsPipelineShaderStageInfo* pShaderStageInfo, GraphicsPipelineBinaryCreateInfo* pBinaryCreateInfo, diff --git a/icd/api/include/vk_indirect_commands_layout.h b/icd/api/include/vk_indirect_commands_layout.h index 9c58ef80..92175b7e 100644 --- a/icd/api/include/vk_indirect_commands_layout.h +++ b/icd/api/include/vk_indirect_commands_layout.h @@ -71,11 +71,11 @@ struct IndirectCommandsInfo // // Indirect commands layout objects describe the information of indirect commands, as well as how to interpret and // process indirect buffers. -class IndirectCommandsLayout final : public NonDispatchable +class IndirectCommandsLayoutNV final : public NonDispatchable { public: static VkResult Create( - const Device* pDevice, + Device* pDevice, const VkIndirectCommandsLayoutCreateInfoNV* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkIndirectCommandsLayoutNV* pLayout); @@ -90,7 +90,7 @@ class IndirectCommandsLayout final : public NonDispatchableNumPalDevices() - 1) * sizeof(PerGpuInfo)); - } + VkResult Initialize( + Device* pDevice); static void BuildPalCreateInfo( const Device* pDevice, @@ -126,15 +117,10 @@ class IndirectCommandsLayout final : public NonDispatchable return m_createInfo.pSubpasses[subpass].stencilResolveMode; } - VkImageAspectFlags GetResolveDepthStecilAspect(uint32_t subpass) const - { - return m_createInfo.pSubpasses[subpass].depthStencilResolveAttachment.aspectMask; - } - uint32_t GetSubpassColorReferenceCount(uint32_t subPassIndex) const; uint32_t GetAttachmentCount() const { return m_createInfo.attachmentCount; } const AttachmentDescription& GetAttachmentDesc(uint32_t attachmentIndex) const; diff --git a/icd/api/pipeline_compiler.cpp b/icd/api/pipeline_compiler.cpp index 30c6fa53..14226cc6 100644 --- a/icd/api/pipeline_compiler.cpp +++ b/icd/api/pipeline_compiler.cpp @@ -198,7 +198,7 @@ static bool SupportInternalModuleCache( const uint32_t compilerMask, const VkShaderModuleCreateFlags internalShaderFlags) { - bool supportInternalModuleCache = pDevice->GetRuntimeSettings().enableEarlyCompile; + bool supportInternalModuleCache = false; if (Util::TestAnyFlagSet(internalShaderFlags, VK_INTERNAL_SHADER_FLAGS_FORCE_UNCACHED_BIT)) { @@ -318,8 +318,7 @@ VkResult PipelineCompiler::Initialize() const RuntimeSettings& settings = m_pPhysicalDevice->GetRuntimeSettings(); // Initialize GfxIp informations per PAL device properties - Pal::DeviceProperties info; - pPalDevice->GetProperties(&info); + const Pal::DeviceProperties& info = m_pPhysicalDevice->PalProperties(); switch (info.gfxLevel) { @@ -337,7 +336,6 @@ VkResult PipelineCompiler::Initialize() m_gfxIp.minor = 0; break; #endif - default: VK_NEVER_CALLED(); break; @@ -353,6 +351,7 @@ VkResult PipelineCompiler::Initialize() ((settings.usePalPipelineCaching) || (m_pPhysicalDevice->VkInstance()->GetDevModeMgr() != nullptr))) { + // This call to PipelineBinaryCache::Create must use the VkInstance allocation callbacks to avoid issues. m_pBinaryCache = PipelineBinaryCache::Create( m_pPhysicalDevice->VkInstance()->GetAllocCallbacks(), m_pPhysicalDevice->GetPlatformKey(), @@ -396,12 +395,6 @@ VkResult PipelineCompiler::Initialize() result = InitializeUberFetchShaderFormatTable(m_pPhysicalDevice, &m_uberFetchShaderInfoFormatMap); } - if (result == VK_SUCCESS) - { - uint32_t threadCount = settings.deferCompileOptimizedPipeline ? settings.deferCompileThreadCount : 0; - m_deferCompileMgr.Init(threadCount, m_pPhysicalDevice->VkInstance()->Allocator()); - } - return result; } @@ -700,21 +693,6 @@ VkResult PipelineCompiler::BuildShaderModule( return result; } -// ===================================================================================================================== -// Try to early compile shader if possible -void PipelineCompiler::TryEarlyCompileShaderModule( - const Device* pDevice, - ShaderModuleHandle* pModule) -{ - const uint32_t compilerMask = GetCompilerCollectionMask(); - - if (compilerMask & (1 << PipelineCompilerTypeLlpc)) - { - m_compilerSolutionLlpc.TryEarlyCompileShaderModule(pDevice, pModule); - } - -} - // ===================================================================================================================== // Check whether the shader module is valid bool PipelineCompiler::IsValidShaderModule( @@ -1140,7 +1118,6 @@ VkResult PipelineCompiler::CreateGraphicsPipelineBinary( if (shouldCompile && (result == VK_SUCCESS)) { - pCreateInfo->pBinaryMetadata->enableEarlyCompile = pCreateInfo->pipelineInfo.enableEarlyCompile; pCreateInfo->pBinaryMetadata->enableUberFetchShader = pCreateInfo->pipelineInfo.enableUberFetchShader; result = GetSolution(pCreateInfo->compilerType)->CreateGraphicsPipelineBinary( @@ -1211,7 +1188,7 @@ VkResult PipelineCompiler::CreateGraphicsShaderBinary( PipelineCache* pPipelineCache, GraphicsLibraryType gplType, GraphicsPipelineBinaryCreateInfo* pCreateInfo, - GplModuleState* pModuleState) + GplModuleState* pModuleState) { VkResult result = VK_SUCCESS; @@ -1739,8 +1716,6 @@ static void CopyPipelineShadersInfo( libInfo.pShaderLibraries[GraphicsLibraryPreRaster]; pCreateInfo->pBinaryMetadata->pointSizeUsed = libInfo.pBinaryMetadata->pointSizeUsed; pCreateInfo->pBinaryMetadata->enableUberFetchShader = libInfo.pBinaryMetadata->enableUberFetchShader; - pCreateInfo->pBinaryMetadata->enableEarlyCompile = libInfo.pBinaryMetadata->enableEarlyCompile; - } else if (shaderMask == FgsShaderMask) { @@ -1808,6 +1783,7 @@ static void CopyVertexInputInterfaceState( pCreateInfo->pipelineInfo.iaState.topology = libInfo.pipelineInfo.iaState.topology; pCreateInfo->pipelineInfo.iaState.disableVertexReuse = libInfo.pipelineInfo.iaState.disableVertexReuse; pCreateInfo->pipelineInfo.dynamicVertexStride = libInfo.pipelineInfo.dynamicVertexStride; + pCreateInfo->pipelineInfo.dynamicTopology = libInfo.pipelineInfo.dynamicTopology; if (pCreateInfo->pipelineInfo.pVertexInput != nullptr) { @@ -1847,8 +1823,7 @@ static void MergePipelineOptions( pDst->enableInterpModePatch |= src.enableInterpModePatch; pDst->pageMigrationEnabled |= src.pageMigrationEnabled; pDst->optimizationLevel |= src.optimizationLevel; - pDst->disableTruncCoordForGather |= src.disableTruncCoordForGather; - + pDst->glState.disableTruncCoordForGather |= src.glState.disableTruncCoordForGather; pDst->shadowDescriptorTableUsage = src.shadowDescriptorTableUsage; pDst->shadowDescriptorTablePtrHigh = src.shadowDescriptorTablePtrHigh; pDst->overrideThreadGroupSizeX = src.overrideThreadGroupSizeX; @@ -2071,8 +2046,9 @@ static void BuildMultisampleState( gridSize = pPipelineSampleLocationsStateCreateInfoEXT->sampleLocationsInfo.sampleLocationGridSize; } - if ((gridSize.width <= 1) && (gridSize.height <= 1) - ) + if ((gridSize.width <= 1) && + (gridSize.height <= 1) && + (pDevice->GetRuntimeSettings().disablePatchInterpMode == false)) { pCreateInfo->pipelineInfo.options.enableInterpModePatch = true; } @@ -2400,7 +2376,7 @@ static void BuildPipelineShadersInfo( } // Uber fetch shader is actually used in the following scenes: - // * enableUberFetchShader or enableEarlyCompile is set as TRUE in panel. + // * enableUberFetchShader is set as TRUE in panel. // * When creating pipeline, GraphicsPipelineBuildInfo::enableUberFetchShader controls the actual enablement. It is // only set when Vertex Input Interface section (VII) is not available and Pre-Rasterization Shader (PRS) is // available, or inherits from its PRS parent (referenced library). However, enableUberFetchShader would also be @@ -2410,7 +2386,6 @@ static void BuildPipelineShadersInfo( // PS: For standard gfx pipeline, GraphicsPipelineBuildInfo::enableUberFetchShader is never set as TRUE with default // panel setting because VII and PRS are always available at the same time. if (settings.enableUberFetchShader || - settings.enableEarlyCompile || (((pCreateInfo->libFlags & VK_GRAPHICS_PIPELINE_LIBRARY_VERTEX_INPUT_INTERFACE_BIT_EXT) == 0) && ((pCreateInfo->libFlags & VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT) != 0)) || (IsDynamicStateEnabled(dynamicStateFlags, DynamicStatesInternal::VertexInput) == true) @@ -2590,6 +2565,11 @@ static void BuildVertexInputInterfaceState( const VkShaderStageFlagBits activeStages, GraphicsPipelineBinaryCreateInfo* pCreateInfo) { + if (IsDynamicStateEnabled(dynamicStateFlags, DynamicStatesInternal::PrimitiveTopology) == true) + { + pCreateInfo->pipelineInfo.dynamicTopology = true; + } + pCreateInfo->pipelineInfo.iaState.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; if ((pIn->pInputAssemblyState) && (Util::TestAnyFlagSet(activeStages, VK_SHADER_STAGE_MESH_BIT_EXT) == false)) { @@ -2640,7 +2620,7 @@ static void BuildPreRasterizationShaderState( pDevice->GetEnabledFeatures().assumeDynamicTopologyInLibs || (IsDynamicStateEnabled(dynamicStateFlags, DynamicStatesInternal::PrimitiveTopology) && pDevice->GetEnabledFeatures().dynamicPrimitiveTopologyUnrestricted) || - (vertexInputAbsent && pDevice->GetRuntimeSettings().useShaderLibraryForPipelineLibraryFastLink); + vertexInputAbsent; BuildRasterizationState(pIn->pRasterizationState, dynamicStateFlags, &isConservativeOverestimation, pCreateInfo); @@ -2900,7 +2880,6 @@ VkResult PipelineCompiler::UploadInternalBufferData( if (result != VK_SUCCESS) { VK_NEVER_CALLED(); - pCreateInfo->pipelineInfo.enableEarlyCompile = false; pCreateInfo->pipelineInfo.enableUberFetchShader = false; if (pMem != nullptr) { @@ -3021,7 +3000,6 @@ VkResult PipelineCompiler::ConvertGraphicsPipelineInfo( BuildFragmentShaderState(pDevice, pIn, libInfo, pShaderInfo, pCreateInfo, dynamicStateFlags); pCreateInfo->pipelineInfo.enableColorExportShader = (libInfo.flags.isLibrary && - pDevice->GetRuntimeSettings().useShaderLibraryForPipelineLibraryFastLink && ((pShaderInfo->stages[ShaderStageFragment].pModuleHandle != nullptr) || (pShaderInfo->stages[ShaderStageFragment].codeHash.lower != 0) || (pShaderInfo->stages[ShaderStageFragment].codeHash.upper != 0))); @@ -3078,6 +3056,8 @@ VkResult PipelineCompiler::ConvertGraphicsPipelineInfo( (libInfo.pVertexInputInterfaceLib != nullptr))) { pCreateInfo->pipelineInfo.enableUberFetchShader = false; + pCreateInfo->pBinaryMetadata->enableUberFetchShader = false; + pCreateInfo->pBinaryMetadata->internalBufferInfo.internalBufferCount = 0; } if (libInfo.flags.isLibrary) @@ -3339,7 +3319,7 @@ void PipelineCompiler::ApplyPipelineOptions( pOptions->reverseThreadGroup = settings.enableAlternatingThreadGroupOrder; - pOptions->disableTruncCoordForGather = settings.disableTruncCoordForGather; + pOptions->glState.disableTruncCoordForGather = settings.disableTruncCoordForGather; pOptions->disablePerCompFetch = settings.disablePerCompFetch; @@ -3696,12 +3676,11 @@ void PipelineCompiler::FreeComputePipelineCreateInfo( void PipelineCompiler::FreeGraphicsPipelineCreateInfo( Device* pDevice, GraphicsPipelineBinaryCreateInfo* pCreateInfo, - bool keepConvertTempMemory, bool keepInternalMem) { auto pInstance = pDevice->VkInstance(); - if ((pCreateInfo->pTempBuffer != nullptr) && (keepConvertTempMemory == false)) + if (pCreateInfo->pTempBuffer != nullptr) { pInstance->FreeMem(pCreateInfo->pTempBuffer); pCreateInfo->pTempBuffer = nullptr; @@ -3822,7 +3801,11 @@ VkResult PipelineCompiler::ConvertRayTracingPipelineInfo( pCreateInfo->pipelineInfo.maxRecursionDepth = pIn->maxPipelineRayRecursionDepth; pCreateInfo->pipelineInfo.indirectStageMask = settings.rtIndirectStageMask; - static_assert(RaytracingNone == static_cast(Vkgc::LlpcRaytracingMode::None)); +#if LLPC_CLIENT_INTERFACE_MAJOR_VERSION < 74 + static_assert(RaytracingAuto == static_cast(Vkgc::LlpcRaytracingMode::None)); +#else + static_assert(RaytracingAuto == static_cast(Vkgc::LlpcRaytracingMode::Auto)); +#endif static_assert(RaytracingLegacy == static_cast(Vkgc::LlpcRaytracingMode::Legacy)); #if LLPC_CLIENT_INTERFACE_MAJOR_VERSION < 69 static_assert(RaytracingContinufy == static_cast(Vkgc::LlpcRaytracingMode::Gpurt2)); @@ -4975,8 +4958,6 @@ void PipelineCompiler::BuildPipelineInternalBufferData( GraphicsPipelineBinaryCreateInfo* pCreateInfo) { uint32_t fetchShaderConstBufRegBase = PipelineLayout::InvalidReg; - uint32_t specConstBufVertexRegBase = PipelineLayout::InvalidReg; - uint32_t specConstBufFragmentRegBase = PipelineLayout::InvalidReg; const UserDataLayout& layout = pPipelineLayout->GetInfo().userDataLayout; @@ -4984,8 +4965,6 @@ void PipelineCompiler::BuildPipelineInternalBufferData( { case PipelineLayoutScheme::Compact: fetchShaderConstBufRegBase = layout.compact.uberFetchConstBufRegBase; - specConstBufVertexRegBase = layout.compact.specConstBufVertexRegBase; - specConstBufFragmentRegBase = layout.compact.specConstBufFragmentRegBase; break; case PipelineLayoutScheme::Indirect: fetchShaderConstBufRegBase = layout.indirect.uberFetchConstBufRegBase; @@ -4998,8 +4977,6 @@ void PipelineCompiler::BuildPipelineInternalBufferData( GetSolution(pCreateInfo->compilerType)->BuildPipelineInternalBufferData( this, fetchShaderConstBufRegBase, - specConstBufVertexRegBase, - specConstBufFragmentRegBase, needCache, pCreateInfo); } @@ -5118,7 +5095,12 @@ VkResult PipelineCompiler::WriteBinaryMetadata( auto pInstance = pPhysicalDevice->Manager()->VkInstance(); Util::Abi::PipelineAbiProcessor abiProcessor(pDevice->VkInstance()->Allocator()); - palResult = abiProcessor.LoadFromBuffer(pElfBinary->pCode, pElfBinary->codeSize); + palResult = abiProcessor.Init(); + + if (palResult == Pal::Result::Success) + { + palResult = abiProcessor.LoadFromBuffer(pElfBinary->pCode, pElfBinary->codeSize); + } if (palResult == Pal::Result::Success) { @@ -5563,14 +5545,12 @@ void PipelineCompiler::DumpPipelineMetadata( ";pointSizeUsed = %u\n" ";dualSrcBlendingUsed = %u\n" ";shadingRateUsedInShader = %u\n" - ";enableEarlyCompile = %u\n" ";enableUberFetchShader = %u\n" ";postDepthCoverageEnable = %u\n" ";psOnlyPointCoordEnable = %u\n", pBinaryMetadata->pointSizeUsed, pBinaryMetadata->dualSrcBlendingUsed, pBinaryMetadata->shadingRateUsedInShader, - pBinaryMetadata->enableEarlyCompile, pBinaryMetadata->enableUberFetchShader, pBinaryMetadata->postDepthCoverageEnable, pBinaryMetadata->psOnlyPointCoordEnable); diff --git a/icd/api/raytrace/ray_tracing_device.cpp b/icd/api/raytrace/ray_tracing_device.cpp index be8958be..fbb0bbb3 100644 --- a/icd/api/raytrace/ray_tracing_device.cpp +++ b/icd/api/raytrace/ray_tracing_device.cpp @@ -53,6 +53,7 @@ RayTracingDevice::RayTracingDevice( m_gpurtOptions(pDevice->VkInstance()->Allocator()), m_cmdContext(), m_pBvhBatchLayer(nullptr), + m_pSplitRaytracingLayer(nullptr), m_accelStructTrackerResources() { @@ -147,6 +148,11 @@ VkResult RayTracingDevice::Init() result = BvhBatchLayer::CreateLayer(m_pDevice, &m_pBvhBatchLayer); } + if (result == VK_SUCCESS) + { + result = SplitRaytracingLayer::CreateLayer(m_pDevice, &m_pSplitRaytracingLayer); + } + if (result != VK_SUCCESS) { VK_NEVER_CALLED(); @@ -157,6 +163,11 @@ VkResult RayTracingDevice::Init() { m_pBvhBatchLayer->DestroyLayer(); } + + if (m_pSplitRaytracingLayer != nullptr) + { + m_pSplitRaytracingLayer->DestroyLayer(); + } } } } @@ -350,6 +361,11 @@ void RayTracingDevice::Destroy() m_pBvhBatchLayer->DestroyLayer(); } + if (m_pSplitRaytracingLayer != nullptr) + { + m_pSplitRaytracingLayer->DestroyLayer(); + } + Util::Destructor(this); m_pDevice->VkInstance()->FreeMem(this); @@ -648,6 +664,7 @@ void RayTracingDevice::SetDispatchInfo( uint32_t depth, uint32_t shaderCount, uint64_t apiHash, + uint64_t userMarkerContext, const VkStridedDeviceAddressRegionKHR* pRaygenSbt, const VkStridedDeviceAddressRegionKHR* pMissSbt, const VkStridedDeviceAddressRegionKHR* pHitSbt, @@ -684,6 +701,8 @@ void RayTracingDevice::SetDispatchInfo( dispatchInfo.hitGroupTable.addr = static_cast(pHitSbt->deviceAddress); dispatchInfo.hitGroupTable.size = static_cast(pHitSbt->size); dispatchInfo.hitGroupTable.stride = static_cast(pHitSbt->stride); + + dispatchInfo.userMarkerContext = userMarkerContext; } (*pDispatchInfo) = dispatchInfo; @@ -699,6 +718,7 @@ void RayTracingDevice::TraceDispatch( uint32_t depth, uint32_t shaderCount, uint64_t apiHash, + uint64_t userMarkerContext, const VkStridedDeviceAddressRegionKHR* pRaygenSbt, const VkStridedDeviceAddressRegionKHR* pMissSbt, const VkStridedDeviceAddressRegionKHR* pHitSbt, @@ -713,6 +733,7 @@ void RayTracingDevice::TraceDispatch( depth, shaderCount, apiHash, + userMarkerContext, pRaygenSbt, pMissSbt, pHitSbt, @@ -735,6 +756,7 @@ void RayTracingDevice::TraceIndirectDispatch( uint32_t originalThreadGroupSizeZ, uint32_t shaderCount, uint64_t apiHash, + uint64_t userMarkerContext, const VkStridedDeviceAddressRegionKHR* pRaygenSbt, const VkStridedDeviceAddressRegionKHR* pMissSbt, const VkStridedDeviceAddressRegionKHR* pHitSbt, @@ -750,6 +772,7 @@ void RayTracingDevice::TraceIndirectDispatch( 0, shaderCount, apiHash, + userMarkerContext, pRaygenSbt, pMissSbt, pHitSbt, @@ -835,9 +858,20 @@ Pal::Result RayTracingDevice::ClientCreateInternalComputePipeline( nodes[nodeIndex].node.srdRange.set = node.descSet; nodes[nodeIndex].node.srdRange.binding = node.binding; } + else if (node.type == GpuRt::NodeType::Srv) + { + nodes[nodeIndex].node.type = + Vkgc::ResourceMappingNodeType::DescriptorResource; + nodes[nodeIndex].node.sizeInDwords = node.dwSize; + nodes[nodeIndex].node.offsetInDwords = node.dwOffset; + nodes[nodeIndex].node.srdRange.set = node.descSet; + nodes[nodeIndex].node.srdRange.binding = node.binding; + } else if ((node.type == GpuRt::NodeType::ConstantBufferTable) || - (node.type == GpuRt::NodeType::UavTable) || - (node.type == GpuRt::NodeType::TypedUavTable)) + (node.type == GpuRt::NodeType::UavTable) || + (node.type == GpuRt::NodeType::TypedUavTable) || + (node.type == GpuRt::NodeType::SrvTable) || + (node.type == GpuRt::NodeType::TypedSrvTable)) { Vkgc::ResourceMappingNode* pSubNode = &subNodes[subNodeIndex++]; nodes[nodeIndex].node.type = @@ -858,6 +892,10 @@ Pal::Result RayTracingDevice::ClientCreateInternalComputePipeline( case GpuRt::NodeType::ConstantBufferTable: pSubNode->type = Vkgc::ResourceMappingNodeType::DescriptorConstBuffer; break; + case GpuRt::NodeType::SrvTable: + case GpuRt::NodeType::TypedSrvTable: + pSubNode->type = Vkgc::ResourceMappingNodeType::DescriptorResource; + break; default: VK_NEVER_CALLED(); } diff --git a/icd/api/raytrace/ray_tracing_device.h b/icd/api/raytrace/ray_tracing_device.h index 8829dae4..bbe7a575 100644 --- a/icd/api/raytrace/ray_tracing_device.h +++ b/icd/api/raytrace/ray_tracing_device.h @@ -32,6 +32,7 @@ #include "khronos/vulkan.h" #include "vk_defines.h" #include "appopt/bvh_batch_layer.h" +#include "appopt/split_raytracing_layer.h" #include "vkgcDefs.h" @@ -92,6 +93,7 @@ class RayTracingDevice uint64_t GetAccelerationStructureUUID(const Pal::DeviceProperties& palProps); BvhBatchLayer* GetBvhBatchLayer() { return m_pBvhBatchLayer; } + SplitRaytracingLayer* GetSplitRaytracingLayer() { return m_pSplitRaytracingLayer; } uint32_t GetProfileRayFlags() const { return m_profileRayFlags; } uint32_t GetProfileMaxIterations() const { return m_profileMaxIterations; } @@ -107,6 +109,7 @@ class RayTracingDevice uint32_t depth, uint32_t shaderCount, uint64_t apiHash, + uint64_t userMarkerContext, const VkStridedDeviceAddressRegionKHR* pRaygenSbt, const VkStridedDeviceAddressRegionKHR* pMissSbt, const VkStridedDeviceAddressRegionKHR* pHitSbt, @@ -120,6 +123,7 @@ class RayTracingDevice uint32_t originalThreadGroupSizeZ, uint32_t shaderCount, uint64_t apiHash, + uint64_t userMarkerContext, const VkStridedDeviceAddressRegionKHR* pRaygenSbt, const VkStridedDeviceAddressRegionKHR* pMissSbt, const VkStridedDeviceAddressRegionKHR* pHitSbt, @@ -198,6 +202,7 @@ class RayTracingDevice uint32_t depth, uint32_t shaderCount, uint64_t apiHash, + uint64_t userMarkerContext, const VkStridedDeviceAddressRegionKHR* pRaygenSbt, const VkStridedDeviceAddressRegionKHR* pMissSbt, const VkStridedDeviceAddressRegionKHR* pHitSbt, @@ -206,6 +211,7 @@ class RayTracingDevice void CollectGpurtOptions(GpurtOptions* const pGpurtOptions) const; BvhBatchLayer* m_pBvhBatchLayer; + SplitRaytracingLayer* m_pSplitRaytracingLayer; AccelStructTrackerResources m_accelStructTrackerResources[MaxPalDevices]; }; diff --git a/icd/api/raytrace/vk_ray_tracing_pipeline.cpp b/icd/api/raytrace/vk_ray_tracing_pipeline.cpp index 8b398d8e..fccb927f 100644 --- a/icd/api/raytrace/vk_ray_tracing_pipeline.cpp +++ b/icd/api/raytrace/vk_ray_tracing_pipeline.cpp @@ -313,6 +313,7 @@ RayTracingPipeline::RayTracingPipeline( m_ppShaderLibraries(nullptr), m_createInfo(pDevice), m_hasTraceRay(false), + m_isCps(false), m_elfHash{}, m_captureReplayVaMappingBufferInfo{} { @@ -474,16 +475,13 @@ VkResult RayTracingPipeline::CreateImpl( PipelineCompiler* pDefaultCompiler = m_pDevice->GetCompiler(DefaultDeviceIndex); - Util::MetroHash::Hash elfHash = {}; - uint64_t apiPsoHash = {}; - BuildApiHash(pCreateInfo, flags, &elfHash, &apiPsoHash); - binaryCreateInfo.pDeferredWorkload = pDeferredWorkload; - binaryCreateInfo.apiPsoHash = apiPsoHash; auto pPipelineCreationFeedbackCreateInfo = extStructs.pPipelineCreationFeedbackCreateInfoEXT; PipelineCompiler::InitPipelineCreationFeedback(pPipelineCreationFeedbackCreateInfo); + bool binariesProvided = false; + Util::MetroHash::Hash cacheId[MaxPalDevices] = {}; RayTracingPipelineShaderStageInfo shaderInfo = {}; PipelineOptimizerKey optimizerKey = {}; @@ -526,17 +524,6 @@ VkResult RayTracingPipeline::CreateImpl( placement.FixupPtrs(pShaderTempBuffer); shaderInfo.stageCount = nativeShaderCount; - - result = BuildShaderStageInfo(m_pDevice, - nativeShaderCount, - pCreateInfo->pStages, - [](const uint32_t inputIdx, const uint32_t stageIdx) - { - return inputIdx; - }, - shaderInfo.pStages, - pTempModules, - nullptr); } else { @@ -544,42 +531,6 @@ VkResult RayTracingPipeline::CreateImpl( } } - if (result == VK_SUCCESS) - { - uint32_t shaderIdx = 0; - optimizerKey.shaderCount = totalShaderCount; - - for (; shaderIdx < nativeShaderCount; ++shaderIdx) - { - const auto* pModuleData = reinterpret_cast( - ShaderModule::GetFirstValidShaderData(shaderInfo.pStages[shaderIdx].pModuleHandle)); - - m_pDevice->GetShaderOptimizer()->CreateShaderOptimizerKey( - pModuleData, - shaderInfo.pStages[shaderIdx].codeHash, - shaderInfo.pStages[shaderIdx].stage, - shaderInfo.pStages[shaderIdx].codeSize, - &optimizerKey.pShaders[shaderIdx]); - } - - if (hasLibraries) - { - for (uint32_t libraryIdx = 0; libraryIdx < pCreateInfo->pLibraryInfo->libraryCount; ++libraryIdx) - { - const auto pLibrary = RayTracingPipeline::ObjectFromHandle( - pCreateInfo->pLibraryInfo->pLibraries[libraryIdx]); - const auto shaderCount = pLibrary->GetTotalShaderCount(); - - memcpy(&optimizerKey.pShaders[shaderIdx], - pLibrary->GetShaderOptKeys(), - sizeof(ShaderOptimizerKey) * shaderCount); - shaderIdx += shaderCount; - } - } - - VK_ASSERT(shaderIdx == totalShaderCount); - } - // Allocate buffer for shader groups uint32_t pipelineLibGroupCount = 0; @@ -711,12 +662,38 @@ VkResult RayTracingPipeline::CreateImpl( result = VK_ERROR_OUT_OF_HOST_MEMORY; } } - - Util::MetroHash::Hash cacheId[MaxPalDevices] = {}; - const auto pPipelineBinaryCache = (pPipelineCache != nullptr) ? pPipelineCache->GetPipelineCache() : nullptr; + // Build API and ELF hashes + Util::MetroHash::Hash elfHash = {}; + uint64_t apiPsoHash = {}; + + if (result == VK_SUCCESS) + { + optimizerKey.shaderCount = totalShaderCount; + + if (binariesProvided == false) + { + result = CreateCacheId( + m_pDevice, + pCreateInfo, + flags, + hasLibraries, + &shaderInfo, + &optimizerKey, + &apiPsoHash, + &elfHash, + pTempModules, + cacheId); + + binaryCreateInfo.apiPsoHash = apiPsoHash; + } + } + + bool storeBinaryToPipeline = false; + bool storeBinaryToCache = true; + for (uint32_t deviceIdx = 0; (result == VK_SUCCESS) && (deviceIdx < m_pDevice->NumPalDevices()); deviceIdx++) { bool isUserCacheHit = false; @@ -725,39 +702,44 @@ VkResult RayTracingPipeline::CreateImpl( // PAL Pipeline caching Util::Result cacheResult = Util::Result::NotFound; - ElfHashToCacheId( - m_pDevice, - deviceIdx, - elfHash, - optimizerKey, - &cacheId[deviceIdx] - ); - bool forceCompilation = false; if (forceCompilation == false) { Vkgc::BinaryData cachedBinData = {}; - // Search the pipeline binary cache. - cacheResult = pDefaultCompiler->GetCachedPipelineBinary( - &cacheId[deviceIdx], - pPipelineBinaryCache, - &cachedBinData, - &isUserCacheHit, - &isInternalCacheHit, - &binaryCreateInfo.freeCompilerBinary, - &binaryCreateInfo.pipelineFeedback); - - // Found the pipeline; Add it to any cache layers where it's missing. - if (cacheResult == Util::Result::Success) + if (binariesProvided == false) { - m_pDevice->GetCompiler(deviceIdx)->CachePipelineBinary( + // Search the pipeline binary cache. + cacheResult = pDefaultCompiler->GetCachedPipelineBinary( &cacheId[deviceIdx], pPipelineBinaryCache, &cachedBinData, - isUserCacheHit, - isInternalCacheHit); + &isUserCacheHit, + &isInternalCacheHit, + &binaryCreateInfo.freeCompilerBinary, + &binaryCreateInfo.pipelineFeedback); + // Found the pipeline; Add it to any cache layers where it's missing. + if (cacheResult == Util::Result::Success) + { + if (storeBinaryToCache) + { + m_pDevice->GetCompiler(deviceIdx)->CachePipelineBinary( + &cacheId[deviceIdx], + pPipelineBinaryCache, + &cachedBinData, + isUserCacheHit, + isInternalCacheHit); + } + + } + } + else + { + } + + if (cacheResult == Util::Result::Success) + { // Unpack the cached blob into separate binaries. pDefaultCompiler->ExtractRayTracingPipelineBinary( &cachedBinData, @@ -844,14 +826,19 @@ VkResult RayTracingPipeline::CreateImpl( if (cachedBinData.pCode != nullptr) { - m_pDevice->GetCompiler(deviceIdx)->CachePipelineBinary( - &cacheId[deviceIdx], - pPipelineBinaryCache, - &cachedBinData, - isUserCacheHit, - isInternalCacheHit); - - m_pDevice->VkInstance()->FreeMem(const_cast(cachedBinData.pCode)); + if (storeBinaryToCache) + { + m_pDevice->GetCompiler(deviceIdx)->CachePipelineBinary( + &cacheId[deviceIdx], + pPipelineBinaryCache, + &cachedBinData, + isUserCacheHit, + isInternalCacheHit); + } + + { + m_pDevice->VkInstance()->FreeMem(const_cast(cachedBinData.pCode)); + } } } } @@ -871,6 +858,7 @@ VkResult RayTracingPipeline::CreateImpl( } m_hasTraceRay = pipelineBinaries[DefaultDeviceIndex].hasTraceRay; + m_isCps = pipelineBinaries[DefaultDeviceIndex].isCps; uint32_t funcCount = 0; if (result == VK_SUCCESS) @@ -1119,7 +1107,7 @@ VkResult RayTracingPipeline::CreateImpl( auto UpdateLibStackSizes = [&](uint32_t libIdx) { auto pShaderLibrary = ppDeviceShaderLibraries[libIdx]; - if (settings.llpcRaytracingMode >= RaytracingContinufy) + if (CheckIsCps()) { auto libFuncList = pShaderLibrary->GetShaderLibFunctionInfos(); @@ -1399,7 +1387,7 @@ VkResult RayTracingPipeline::CreateImpl( Util::Max(closestHitStackMax, missStackMax)) + (2 * callableStackMax); - if (settings.llpcRaytracingMode >= RaytracingContinufy) + if (CheckIsCps()) { // The size we calculated above is frontend stack size for continuations. m_defaultPipelineStackSizes[deviceIdx].frontendSize = defaultPipelineStackSize; @@ -1412,7 +1400,7 @@ VkResult RayTracingPipeline::CreateImpl( // TraceRay is the last function in function list, record it regardless we are building library or // not, so that a pipeline will get its own TraceRayGpuVa correctly. - if (funcCount > 0) + if (m_hasTraceRay && (funcCount > 0)) { const auto traceRayFuncIndex = funcCount - 1; traceRayGpuVas[deviceIdx] = @@ -1512,6 +1500,7 @@ VkResult RayTracingPipeline::CreateImpl( } else { + for (uint32_t deviceIdx = 0; deviceIdx < m_pDevice->NumPalDevices(); deviceIdx++) { // Internal memory allocation failed, free PAL event object if it gets created @@ -1545,11 +1534,14 @@ VkResult RayTracingPipeline::CreateImpl( } // Free the created pipeline binaries now that the PAL Pipelines/PipelineBinaryInfo have read them. - for (uint32_t deviceIdx = 0; deviceIdx < m_pDevice->NumPalDevices(); deviceIdx++) + if (binariesProvided == false) { - m_pDevice->GetCompiler(deviceIdx)->FreeRayTracingPipelineBinary( - &binaryCreateInfo, - &pipelineBinaries[deviceIdx]); + for (uint32_t deviceIdx = 0; deviceIdx < m_pDevice->NumPalDevices(); deviceIdx++) + { + m_pDevice->GetCompiler(deviceIdx)->FreeRayTracingPipelineBinary( + &binaryCreateInfo, + &pipelineBinaries[deviceIdx]); + } } pAllocator->pfnFree(pAllocator->pUserData, pTempBuffer); @@ -1586,6 +1578,90 @@ VkResult RayTracingPipeline::CreateImpl( return result; } +VkResult RayTracingPipeline::CreateCacheId( + const Device* pDevice, + const VkRayTracingPipelineCreateInfoKHR* pCreateInfo, + VkPipelineCreateFlags2KHR flags, + const bool hasLibraries, + RayTracingPipelineShaderStageInfo* pShaderInfo, + PipelineOptimizerKey* pPipelineOptimizerKey, + uint64_t* pApiPsoHash, + Util::MetroHash::Hash* pElfHash, + ShaderModuleHandle* pTempModules, + Util::MetroHash::Hash* pCacheIds) +{ + VkResult result = VK_SUCCESS; + + // 1. Build shader stage info + if (pPipelineOptimizerKey->shaderCount > 0) + { + result = BuildShaderStageInfo( + pDevice, + pCreateInfo->stageCount, + pCreateInfo->pStages, + [](const uint32_t inputIdx, const uint32_t stageIdx) + { + return inputIdx; + }, + pShaderInfo->pStages, + pTempModules, + nullptr); + } + + if (result == VK_SUCCESS) + { + // 2. Build ShaderOptimizer pipeline key + uint32_t shaderIdx = 0; + + for (; shaderIdx < pCreateInfo->stageCount; ++shaderIdx) + { + const auto* pModuleData = reinterpret_cast( + ShaderModule::GetFirstValidShaderData(pShaderInfo->pStages[shaderIdx].pModuleHandle)); + + pDevice->GetShaderOptimizer()->CreateShaderOptimizerKey( + pModuleData, + pShaderInfo->pStages[shaderIdx].codeHash, + pShaderInfo->pStages[shaderIdx].stage, + pShaderInfo->pStages[shaderIdx].codeSize, + &pPipelineOptimizerKey->pShaders[shaderIdx]); + } + + if (hasLibraries) + { + for (uint32_t libraryIdx = 0; libraryIdx < pCreateInfo->pLibraryInfo->libraryCount; ++libraryIdx) + { + const auto pLibrary = RayTracingPipeline::ObjectFromHandle( + pCreateInfo->pLibraryInfo->pLibraries[libraryIdx]); + const auto shaderCount = pLibrary->GetTotalShaderCount(); + + memcpy(&pPipelineOptimizerKey->pShaders[shaderIdx], + pLibrary->GetShaderOptKeys(), + sizeof(ShaderOptimizerKey) * shaderCount); + shaderIdx += shaderCount; + } + } + + VK_ASSERT(shaderIdx == pPipelineOptimizerKey->shaderCount); + + // 3. Build API and ELF hashes + BuildApiHash(pCreateInfo, flags, pElfHash, pApiPsoHash); + + // 4. Build Cache IDs + for (uint32_t deviceIdx = 0; deviceIdx < pDevice->NumPalDevices(); deviceIdx++) + { + ElfHashToCacheId( + pDevice, + deviceIdx, + *pElfHash, + *pPipelineOptimizerKey, + &pCacheIds[deviceIdx] + ); + } + } + + return result; +} + // ===================================================================================================================== static int32_t DeferredCreateRayTracingPipelineCallback( Device* pDevice, @@ -1913,22 +1989,6 @@ void RayTracingPipeline::BindToCmdBuffer( } } -// ===================================================================================================================== -void RayTracingPipeline::BindNullPipeline( - CmdBuffer* pCmdBuffer) -{ - const uint32_t numGroupedCmdBuffers = pCmdBuffer->VkDevice()->NumPalDevices(); - - Pal::PipelineBindParams params = {}; - params.pipelineBindPoint = Pal::PipelineBindPoint::Compute; - params.apiPsoHash = Pal::InternalApiPsoHash; - - for (uint32_t deviceIdx = 0; deviceIdx < numGroupedCmdBuffers; deviceIdx++) - { - pCmdBuffer->PalCmdBuffer(deviceIdx)->CmdBindPipeline(params); - } -} - // ===================================================================================================================== bool RayTracingPipeline::MapShaderIdToShaderHandle( Pal::ShaderLibraryFunctionInfo* pIndirectFuncList, @@ -2140,7 +2200,11 @@ void RayTracingPipeline::GetDispatchSize( const RuntimeSettings& settings = m_pDevice->GetRuntimeSettings(); - if (settings.rtFlattenThreadGroupSize == 0) + // NOTE: For CPS, we only support flatten thread group so far. + const uint32_t flattenThreadGroupSize = + CheckIsCps() ? settings.dispatchRaysThreadGroupSize : settings.rtFlattenThreadGroupSize; + + if (flattenThreadGroupSize == 0) { *pDispatchSizeX = Util::RoundUpQuotient(width, settings.rtThreadGroupSizeX); *pDispatchSizeY = Util::RoundUpQuotient(height, settings.rtThreadGroupSizeY); @@ -2152,15 +2216,15 @@ void RayTracingPipeline::GetDispatchSize( if ((width > 1) && (height > 1)) { - const uint32_t tileHeight = settings.rtFlattenThreadGroupSize / RayTracingTileWidth; + const uint32_t tileHeight = flattenThreadGroupSize / RayTracingTileWidth; const uint32_t paddedWidth = Util::Pow2Align(width, RayTracingTileWidth); const uint32_t paddedHeight = Util::Pow2Align(height, tileHeight); - dispatchSize = Util::RoundUpQuotient(paddedWidth * paddedHeight, settings.rtFlattenThreadGroupSize); + dispatchSize = Util::RoundUpQuotient(paddedWidth * paddedHeight, flattenThreadGroupSize); } else { - dispatchSize = Util::RoundUpQuotient(width * height, settings.rtFlattenThreadGroupSize); + dispatchSize = Util::RoundUpQuotient(width * height, flattenThreadGroupSize); } *pDispatchSizeX = dispatchSize; diff --git a/icd/api/raytrace/vk_ray_tracing_pipeline.h b/icd/api/raytrace/vk_ray_tracing_pipeline.h index b484a932..8f01917a 100644 --- a/icd/api/raytrace/vk_ray_tracing_pipeline.h +++ b/icd/api/raytrace/vk_ray_tracing_pipeline.h @@ -160,6 +160,18 @@ class RayTracingPipeline final : public Pipeline, public NonDispatchableVkInstance()->Allocator()) + m_debugTags(pCmdBuf->VkInstance()->Allocator()), + m_userMarkerOpHistory(pCmdBuf->VkInstance()->Allocator()), + m_userMarkerStrings(pCmdBuf->VkInstance()->Allocator()) { m_cbId.u32All = 0; m_deviceId = reinterpret_cast(ApiDevice::FromObject(m_pCmdBuf->VkDevice())); @@ -317,6 +321,8 @@ void SqttCmdBufferState::Begin( const VkCommandBufferBeginInfo* pBeginInfo) { m_currentEventId = 0; + m_userMarkerOpHistory.Clear(); + m_userMarkerStrings.Clear(); #if ICD_GPUOPEN_DEVMODE_BUILD if (m_pDevMode != nullptr) @@ -1136,12 +1142,26 @@ void SqttCmdBufferState::DebugMarkerInsert( void SqttCmdBufferState::DebugLabelBegin( const VkDebugUtilsLabelEXT* pMarkerInfo) { + DevUserMarkerString userMarkerString; + userMarkerString.length = static_cast(strlen(pMarkerInfo->pLabelName)) + 1; + Util::Strncpy(userMarkerString.string, pMarkerInfo->pLabelName, sizeof(userMarkerString.string)); + m_userMarkerStrings.PushBack(userMarkerString); + + Pal::Developer::UserMarkerOpInfo opInfo; + opInfo.opType = static_cast(Pal::Developer::UserMarkerOpType::Push); + opInfo.strIndex = static_cast(m_userMarkerStrings.size()); + m_userMarkerOpHistory.PushBack(opInfo.u32All); + WriteUserEventMarker(RgpSqttMarkerUserEventPush, pMarkerInfo->pLabelName); } // ===================================================================================================================== void SqttCmdBufferState::DebugLabelEnd() { + Pal::Developer::UserMarkerOpInfo opInfo; + opInfo.opType = static_cast(Pal::Developer::UserMarkerOpType::Pop); + m_userMarkerOpHistory.PushBack(opInfo.u32All); + WriteUserEventMarker(RgpSqttMarkerUserEventPop, nullptr); } @@ -1181,6 +1201,24 @@ bool SqttCmdBufferState::HasDebugTag( return false; } +// ===================================================================================================================== +uint64_t SqttCmdBufferState::GetUserMarkerContextValue() const +{ + return (uint64_t(m_cbId.u32All) << 32) | m_userMarkerOpHistory.NumElements(); +} + +// ===================================================================================================================== +const DevUserMarkerOpHistory& SqttCmdBufferState::GetUserMarkerOpHistory() const +{ + return m_userMarkerOpHistory; +} + +// ===================================================================================================================== +const DevStringTable& SqttCmdBufferState::GetUserMarkerStrings() const +{ + return m_userMarkerStrings; +} + // ===================================================================================================================== SqttCmdBufferState::~SqttCmdBufferState() { @@ -2115,6 +2153,7 @@ VKAPI_ATTR void VKAPI_CALL vkCmdDebugMarkerBeginEXT( const VkDebugMarkerMarkerInfoEXT* pMarkerInfo) { const VkCommandBuffer cmdBuffer = commandBuffer; + SQTT_SETUP(); pSqtt->DebugMarkerBegin(pMarkerInfo); @@ -2127,6 +2166,7 @@ VKAPI_ATTR void VKAPI_CALL vkCmdDebugMarkerEndEXT( VkCommandBuffer commandBuffer) { VkCommandBuffer cmdBuffer = commandBuffer; + SQTT_SETUP(); pSqtt->DebugMarkerEnd(); @@ -2794,6 +2834,49 @@ VKAPI_ATTR VkResult VKAPI_CALL vkQueueSubmit( CheckRGPFrameBegin(pQueue, pDevMode, submitCount, pSubmits); #endif + if (pDevMode->IsTraceRunning()) + { + for (uint32_t i = 0; i < pSubmits->commandBufferCount; ++i) + { + CmdBuffer* pCmdBuf = ApiCmdBuffer::ObjectFromHandle(pSubmits->pCommandBuffers[i]); + + const auto& userMarkerOpHistory = pCmdBuf->GetSqttState()->GetUserMarkerOpHistory(); + const auto& userMarkerStrings = pCmdBuf->GetSqttState()->GetUserMarkerStrings(); + + if ((userMarkerOpHistory.NumElements() > 0) && (userMarkerStrings.IsEmpty() == false)) + { + uint32_t stringDataSizeInBytes = 0; + for (uint32 j = 0; j < userMarkerStrings.NumElements(); ++j) + { + stringDataSizeInBytes += userMarkerStrings.At(j).length; + } + + const uint32 baseOffset = sizeof(uint32) * userMarkerStrings.NumElements(); + AutoBuffer stringOffsets( + userMarkerStrings.NumElements(), pQueue->VkDevice()->VkInstance()->Allocator()); + AutoBuffer stringData( + stringDataSizeInBytes, pQueue->VkDevice()->VkInstance()->Allocator()); + + for (uint32 j = 0, offset = 0; j < userMarkerStrings.NumElements(); ++j) + { + const DevUserMarkerString& markerString = userMarkerStrings.At(j); + memcpy(stringData.Data() + offset, markerString.string, markerString.length); + stringOffsets[j] = offset + baseOffset; + offset += markerString.length; + } + + pDevMode->ProcessMarkerTable( + pCmdBuf->GetSqttState()->GetId().u32All, + userMarkerOpHistory.NumElements(), + userMarkerOpHistory.Data(), + userMarkerStrings.NumElements(), + stringOffsets.Data(), + stringDataSizeInBytes, + stringData.Data()); + } + } + } + VkResult result = SQTT_CALL_NEXT_LAYER(vkQueueSubmit)(queue, submitCount, pSubmits, fence); #if ICD_GPUOPEN_DEVMODE_BUILD diff --git a/icd/api/sqtt/sqtt_layer.h b/icd/api/sqtt/sqtt_layer.h index 9d93e2f0..e105e193 100644 --- a/icd/api/sqtt/sqtt_layer.h +++ b/icd/api/sqtt/sqtt_layer.h @@ -46,6 +46,7 @@ #include "palList.h" #include "palHashMap.h" +#include "palVector.h" namespace vk { @@ -66,6 +67,15 @@ struct SqttBindTargetParams const Pal::BindTargetParams* pBindParams; }; +struct DevUserMarkerString +{ + uint32_t length; + char string[128]; +}; + +using DevUserMarkerOpHistory = Util::Vector; +using DevStringTable = Util::Vector; + // ===================================================================================================================== // This is an auxiliary structure that tracks whatever queue-level state is necessary to handle SQTT marker // annotations. @@ -180,6 +190,10 @@ class SqttCmdBufferState void AddDebugTag(uint64_t tag); bool HasDebugTag(uint64_t tag) const; + uint64_t GetUserMarkerContextValue() const; + const DevUserMarkerOpHistory& GetUserMarkerOpHistory() const; + const DevStringTable& GetUserMarkerStrings() const; + private: RgpSqttMarkerEvent BuildEventMarker(RgpSqttMarkerEventType apiType); void WriteCbStartMarker() const; @@ -240,6 +254,9 @@ class SqttCmdBufferState } m_currentBarrier; Util::List m_debugTags; + + DevUserMarkerOpHistory m_userMarkerOpHistory; // User marker operation history + DevStringTable m_userMarkerStrings; // User marker strings informatio }; void SqttOverrideDispatchTable(DispatchTable* pDispatchTable, SqttMgr* pMgr); diff --git a/icd/api/strings/entry_points.txt b/icd/api/strings/entry_points.txt index 483270db..689ac378 100644 --- a/icd/api/strings/entry_points.txt +++ b/icd/api/strings/entry_points.txt @@ -583,3 +583,4 @@ vkCmdBindDescriptorBufferEmbeddedSamplers2EXT @device @dext(KHR_main vkCmdSetRenderingAttachmentLocationsKHR @device @dext(KHR_dynamic_rendering_local_read) vkCmdSetRenderingInputAttachmentIndicesKHR @device @dext(KHR_dynamic_rendering_local_read) +vkCmdSetDepthBias2EXT @device @dext(EXT_depth_bias_control) diff --git a/icd/api/strings/extensions.txt b/icd/api/strings/extensions.txt index 458a16c4..ef9fdbe4 100644 --- a/icd/api/strings/extensions.txt +++ b/icd/api/strings/extensions.txt @@ -181,6 +181,7 @@ VK_EXT_load_store_op_none VK_KHR_maintenance4 VK_KHR_maintenance5 VK_KHR_maintenance6 +VK_KHR_maintenance7 VK_AMD_shader_early_and_late_fragment_tests VK_EXT_mesh_shader VK_EXT_image_view_min_lod @@ -220,3 +221,4 @@ VK_EXT_frame_boundary VK_EXT_image_compression_control #if VKI_RAY_TRACING #endif +VK_EXT_depth_bias_control diff --git a/icd/api/vk_cmdbuffer.cpp b/icd/api/vk_cmdbuffer.cpp index 7e95bd61..a977fecb 100644 --- a/icd/api/vk_cmdbuffer.cpp +++ b/icd/api/vk_cmdbuffer.cpp @@ -633,8 +633,7 @@ CmdBuffer::CmdBuffer( m_flags.preBindDefaultState = settings.preBindDefaultState; m_flags.offsetMode = pDevice->GetEnabledFeatures().robustVertexBufferExtend; - Pal::DeviceProperties info; - m_pDevice->PalDevice(DefaultDeviceIndex)->GetProperties(&info); + const Pal::DeviceProperties& info = m_pDevice->GetPalProperties(); m_flags.useBackupBuffer = false; memset(m_pBackupPalCmdBuffers, 0, sizeof(Pal::ICmdBuffer*) * MaxPalDevices); @@ -646,7 +645,7 @@ CmdBuffer::CmdBuffer( m_flags.useReleaseAcquire = info.gfxipProperties.flags.supportReleaseAcquireInterface && settings.useAcquireReleaseInterface; m_flags.useSplitReleaseAcquire = m_flags.useReleaseAcquire && - info.gfxipProperties.flags.supportSplitReleaseAcquire; + info.queueProperties[m_palQueueType].flags.supportSplitReleaseAcquire; } // ===================================================================================================================== @@ -872,6 +871,10 @@ VkResult CmdBuffer::Initialize( { m_debugPrintf.Init(m_pDevice); } + +#if VKI_RAY_TRACING + m_pfnTraceRaysDispatchPerDevice = CmdBuffer::TraceRaysDispatchPerDevice; +#endif return PalToVkResult(result); } @@ -1801,6 +1804,7 @@ void CmdBuffer::ResetPipelineState() pPerGpuState->viewport.vertDiscardRatio = 1.0f; pPerGpuState->viewport.depthRange = Pal::DepthRange::ZeroToOne; pPerGpuState->maxPipelineStackSizes = {}; + pPerGpuState->dynamicPipelineStackSize = 0; deviceIdx++; } @@ -1937,83 +1941,70 @@ void CmdBuffer::RebindPipeline() { const ComputePipeline* pPipeline = m_allGpuState.pComputePipeline; - if (pPipeline != nullptr) - { - const PhysicalDevice* pPhysicalDevice = m_pDevice->VkPhysicalDevice(DefaultDeviceIndex); + VK_ASSERT(pPipeline != nullptr); - if ((pPhysicalDevice->GetQueueFamilyPalQueueType(m_queueFamilyIndex) == Pal::QueueTypeCompute) && - (m_asyncComputeQueueMaxWavesPerCu > 0)) - { - Pal::DynamicComputeShaderInfo dynamicInfo = {}; + const PhysicalDevice* pPhysicalDevice = m_pDevice->VkPhysicalDevice(DefaultDeviceIndex); - dynamicInfo.maxWavesPerCu = static_cast(m_asyncComputeQueueMaxWavesPerCu); + if ((pPhysicalDevice->GetQueueFamilyPalQueueType(m_queueFamilyIndex) == Pal::QueueTypeCompute) && + (m_asyncComputeQueueMaxWavesPerCu > 0)) + { + Pal::DynamicComputeShaderInfo dynamicInfo = {}; - pPipeline->BindToCmdBuffer(this, dynamicInfo); - } - else - { - pPipeline->BindToCmdBuffer(this, pPipeline->GetBindInfo()); - } + dynamicInfo.maxWavesPerCu = static_cast(m_asyncComputeQueueMaxWavesPerCu); - palBindPoint = Pal::PipelineBindPoint::Compute; - pNewUserDataLayout = pPipeline->GetUserDataLayout(); + pPipeline->BindToCmdBuffer(this, dynamicInfo); } else { - ComputePipeline::BindNullPipeline(this); + pPipeline->BindToCmdBuffer(this, pPipeline->GetBindInfo()); } - palBindPoint = Pal::PipelineBindPoint::Compute; + pNewUserDataLayout = pPipeline->GetUserDataLayout(); + palBindPoint = Pal::PipelineBindPoint::Compute; } else if (bindPoint == PipelineBindGraphics) { const GraphicsPipeline* pPipeline = m_allGpuState.pGraphicsPipeline; - if (pPipeline != nullptr) - { - pPipeline->BindToCmdBuffer(this); + VK_ASSERT(pPipeline != nullptr); - if (pPipeline->ContainsStaticState(DynamicStatesInternal::VertexInputBindingStride)) - { - UpdateVertexBufferStrides(pPipeline); - } + pPipeline->BindToCmdBuffer(this); - pNewUserDataLayout = pPipeline->GetUserDataLayout(); + if (pPipeline->ContainsStaticState(DynamicStatesInternal::VertexInputBindingStride)) + { + UpdateVertexBufferStrides(pPipeline); + } + + pNewUserDataLayout = pPipeline->GetUserDataLayout(); + palBindPoint = Pal::PipelineBindPoint::Graphics; - // Update dynamic vertex input state and check whether need rebind uber-fetch shader internal memory - PipelineBindState* pBindState = &m_allGpuState.pipelineState[PipelineBindGraphics]; - if (pPipeline->ContainsDynamicState(DynamicStatesInternal::VertexInput)) + // Update dynamic vertex input state and check whether need rebind uber-fetch shader internal memory + PipelineBindState* pBindState = &m_allGpuState.pipelineState[PipelineBindGraphics]; + if (pPipeline->ContainsDynamicState(DynamicStatesInternal::VertexInput)) + { + if (pBindState->hasDynamicVertexInput == false) { - if (pBindState->hasDynamicVertexInput == false) - { - if (pBindState->pVertexInputInternalData != nullptr) - { - rebindFlags |= RebindUberFetchInternalMem; - } - pBindState->hasDynamicVertexInput = true; - } - uint32_t newUberFetchShaderUserData = GetUberFetchShaderUserData(pNewUserDataLayout); - if (GetUberFetchShaderUserData(&pBindState->userDataLayout) != newUberFetchShaderUserData) + if (pBindState->pVertexInputInternalData != nullptr) { - SetUberFetchShaderUserData(&pBindState->userDataLayout, newUberFetchShaderUserData); - - if (pBindState->pVertexInputInternalData != nullptr) - { - rebindFlags |= RebindUberFetchInternalMem; - } + rebindFlags |= RebindUberFetchInternalMem; } + pBindState->hasDynamicVertexInput = true; } - else + uint32_t newUberFetchShaderUserData = GetUberFetchShaderUserData(pNewUserDataLayout); + if (GetUberFetchShaderUserData(&pBindState->userDataLayout) != newUberFetchShaderUserData) { - pBindState->hasDynamicVertexInput = false; + SetUberFetchShaderUserData(&pBindState->userDataLayout, newUberFetchShaderUserData); + + if (pBindState->pVertexInputInternalData != nullptr) + { + rebindFlags |= RebindUberFetchInternalMem; + } } } else { - GraphicsPipeline::BindNullPipeline(this); + pBindState->hasDynamicVertexInput = false; } - - palBindPoint = Pal::PipelineBindPoint::Graphics; } #if VKI_RAY_TRACING @@ -2021,32 +2012,26 @@ void CmdBuffer::RebindPipeline() { const RayTracingPipeline* pPipeline = m_allGpuState.pRayTracingPipeline; - if (pPipeline != nullptr) - { - const PhysicalDevice* pPhysicalDevice = m_pDevice->VkPhysicalDevice(DefaultDeviceIndex); + VK_ASSERT(pPipeline != nullptr); - if ((pPhysicalDevice->GetQueueFamilyPalQueueType(m_queueFamilyIndex) == Pal::QueueTypeCompute) && - (m_asyncComputeQueueMaxWavesPerCu > 0)) - { - Pal::DynamicComputeShaderInfo dynamicInfo = {}; + const PhysicalDevice* pPhysicalDevice = m_pDevice->VkPhysicalDevice(DefaultDeviceIndex); - dynamicInfo.maxWavesPerCu = static_cast(m_asyncComputeQueueMaxWavesPerCu); + if ((pPhysicalDevice->GetQueueFamilyPalQueueType(m_queueFamilyIndex) == Pal::QueueTypeCompute) && + (m_asyncComputeQueueMaxWavesPerCu > 0)) + { + Pal::DynamicComputeShaderInfo dynamicInfo = {}; - pPipeline->BindToCmdBuffer(this, dynamicInfo); - } - else - { - pPipeline->BindToCmdBuffer(this, pPipeline->GetBindInfo()); - } + dynamicInfo.maxWavesPerCu = static_cast(m_asyncComputeQueueMaxWavesPerCu); - pNewUserDataLayout = pPipeline->GetUserDataLayout(); + pPipeline->BindToCmdBuffer(this, dynamicInfo); } else { - RayTracingPipeline::BindNullPipeline(this); + pPipeline->BindToCmdBuffer(this, pPipeline->GetBindInfo()); } - palBindPoint = Pal::PipelineBindPoint::Compute; + pNewUserDataLayout = pPipeline->GetUserDataLayout(); + palBindPoint = Pal::PipelineBindPoint::Compute; } #endif else @@ -2054,6 +2039,8 @@ void CmdBuffer::RebindPipeline() VK_NEVER_CALLED(); } + VK_ASSERT(pNewUserDataLayout != nullptr); + // Push Constant user data layouts are scheme-agnostic, which will always be checked and rebound if // needed. // In compact scheme, the top-level user data layout of two compatible pipeline layout may be different. @@ -2084,10 +2071,7 @@ void CmdBuffer::RebindPipeline() VK_ASSERT(PalPipelineBindingOwnedBy(Pal::PipelineBindPoint::Graphics, PipelineBindGraphics)); // A user data layout switch may also require some user data to be reloaded (for both gfx and compute). - if (pNewUserDataLayout != nullptr) - { - rebindFlags |= SwitchCompactSchemeUserDataLayouts(bindPoint, pNewUserDataLayout); - } + rebindFlags |= SwitchCompactSchemeUserDataLayouts(bindPoint, pNewUserDataLayout); } rebindFlags |= SwitchCommonUserDataLayouts(bindPoint, pNewUserDataLayout); @@ -3580,8 +3564,8 @@ void CmdBuffer::ExecuteIndirect( VkBool32 isPreprocessed, const VkGeneratedCommandsInfoNV* pInfo) { - IndirectCommandsLayout* pLayout = IndirectCommandsLayout::ObjectFromHandle(pInfo->indirectCommandsLayout); - IndirectCommandsInfo info = pLayout->GetIndirectCommandsInfo(); + IndirectCommandsLayoutNV* pLayout = IndirectCommandsLayoutNV::ObjectFromHandle(pInfo->indirectCommandsLayout); + IndirectCommandsInfo info = pLayout->GetIndirectCommandsInfo(); uint64_t barrierCmd = 0; @@ -4895,7 +4879,7 @@ void CmdBuffer::StoreAttachmentInfo( { const ImageView* const pImageView = ImageView::ObjectFromHandle(renderingAttachmentInfo.imageView); - if(pImageView != nullptr) + if (pImageView != nullptr) { const Image* pColorImage = pImageView->GetImage(); @@ -4917,12 +4901,17 @@ void CmdBuffer::StoreAttachmentInfo( if (pResolveImage != nullptr) { + const RPImageLayout resolveLayout = { renderingAttachmentInfo.resolveImageLayout, + Pal::LayoutResolveDst }; pDynamicRenderingAttachement->resolveImageLayout = - pResolveImage->GetAttachmentLayout( - { renderingAttachmentInfo.resolveImageLayout, Pal::LayoutResolveDst }, 0, this); + pResolveImage->GetAttachmentLayout(resolveLayout, 0, this); } } } + else + { + *pDynamicRenderingAttachement = {}; + } } // ===================================================================================================================== @@ -5210,9 +5199,11 @@ void CmdBuffer::EndRendering() if ((renderingAttachmentInfo.resolveMode != VK_RESOLVE_MODE_NONE) && (renderingAttachmentInfo.pResolveImageView != nullptr)) { - ResolveImage( - VK_IMAGE_ASPECT_COLOR_BIT, - renderingAttachmentInfo); + { + ResolveImage( + VK_IMAGE_ASPECT_COLOR_BIT, + renderingAttachmentInfo); + } } } @@ -5255,7 +5246,8 @@ void CmdBuffer::ResetEvent( if (pEvent->IsUseToken()) { - pEvent->SetSyncToken(0xFFFFFFFF); + const Pal::ReleaseToken token = { 0xFFFFFF, 0xFF }; + pEvent->SetSyncToken(token); } else { @@ -6536,9 +6528,8 @@ void CmdBuffer::PipelineBarrier( imageMemoryBarrierCount, pImageMemoryBarriers, &barrier); - - DbgBarrierPostCmd(DbgBarrierPipelineBarrierWaitEvents); } + DbgBarrierPostCmd(DbgBarrierPipelineBarrierWaitEvents); } // ===================================================================================================================== @@ -7297,7 +7288,8 @@ void CmdBuffer::PalCmdAcquire( if (pEvent->IsUseToken()) { // Allocate space to store sync token values (automatically rewound on unscope) - uint32* pSyncTokens = eventCount > 0 ? pVirtStackFrame->AllocArray(eventCount) : nullptr; + Pal::ReleaseToken* pSyncTokens = eventCount > 0 ? + pVirtStackFrame->AllocArray(eventCount) : nullptr; if (pSyncTokens != nullptr) { @@ -7477,7 +7469,6 @@ void CmdBuffer::WriteTimestamp( if (remainingQueryCount > 0) { const auto firstRemainingQuery = query + 1; - constexpr uint32_t TimestampZeroChunk = 0; // Set value of each remaining query to 0 and to make them avaliable. // Note that values of remaining queries (to which 0 was written) are not considered timestamps. @@ -7485,7 +7476,7 @@ void CmdBuffer::WriteTimestamp( *pQueryPool, firstRemainingQuery, remainingQueryCount, - TimestampZeroChunk); + 0u); } } } @@ -8666,7 +8657,7 @@ void CmdBuffer::RPLoadOpClearDepthStencil( } // ===================================================================================================================== -// Launches one or more MSAA resolves during a render pass instance. +// Launches one or more resolves during a render pass instance. void CmdBuffer::RPResolveAttachments( uint32_t count, const RPResolveInfo* pResolves) @@ -8681,137 +8672,137 @@ void CmdBuffer::RPResolveAttachments( { const RPResolveInfo& params = pResolves[i]; - const Framebuffer::Attachment& srcAttachment = - m_allGpuState.pFramebuffer->GetAttachment(params.src.attachment); - const Framebuffer::Attachment& dstAttachment = - m_allGpuState.pFramebuffer->GetAttachment(params.dst.attachment); - - // Both color and depth-stencil resolves are allowed by resolve attachments - // SubresRange shall be exactly same for src and dst. - VK_ASSERT(srcAttachment.subresRangeCount == dstAttachment.subresRangeCount); - VK_ASSERT(srcAttachment.subresRange[0].numMips == 1); + { + RPResolveMsaa(params); + } + } - const uint32_t sliceCount = Util::Min( - srcAttachment.subresRange[0].numSlices, - dstAttachment.subresRange[0].numSlices); + if (m_pSqttState != nullptr) + { + m_pSqttState->EndRenderPassResolve(); + } +} - // We expect MSAA images to never have mipmaps - VK_ASSERT(srcAttachment.subresRange[0].startSubres.mipLevel == 0); +// ===================================================================================================================== +// Launches one or more MSAA resolves during a render pass instance. +void CmdBuffer::RPResolveMsaa( + const RPResolveInfo& params) +{ + const Framebuffer::Attachment& srcAttachment = + m_allGpuState.pFramebuffer->GetAttachment(params.src.attachment); + const Framebuffer::Attachment& dstAttachment = + m_allGpuState.pFramebuffer->GetAttachment(params.dst.attachment); - uint32_t aspectRegionCount = 0; - uint32_t srcResolvePlanes[MaxRangePerAttachment] = {}; - uint32_t dstResolvePlanes[MaxRangePerAttachment] = {}; - const VkFormat srcResolveFormat = srcAttachment.pView->GetViewFormat(); - const VkFormat dstResolveFormat = dstAttachment.pView->GetViewFormat(); - Pal::ResolveMode resolveModes[MaxRangePerAttachment] = {}; + // Both color and depth-stencil resolves are allowed by resolve attachments + // SubresRange shall be exactly same for src and dst. + VK_ASSERT(srcAttachment.subresRangeCount == dstAttachment.subresRangeCount); + VK_ASSERT(srcAttachment.subresRange[0].numMips == 1); - const Pal::MsaaQuadSamplePattern* pSampleLocations = nullptr; + const uint32_t sliceCount = Util::Min( + srcAttachment.subresRange[0].numSlices, + dstAttachment.subresRange[0].numSlices); - if (Formats::IsDepthStencilFormat(srcResolveFormat) == false) - { - resolveModes[0] = Pal::ResolveMode::Average; - srcResolvePlanes[0] = 0; - dstResolvePlanes[0] = 0; - aspectRegionCount = 1; - } - else - { - const uint32_t subpass = m_renderPassInstance.subpass; + // We expect MSAA images to never have mipmaps + VK_ASSERT(srcAttachment.subresRange[0].startSubres.mipLevel == 0); - const VkResolveModeFlagBits depthResolveMode = - m_allGpuState.pRenderPass->GetDepthResolveMode(subpass); - const VkResolveModeFlagBits stencilResolveMode = - m_allGpuState.pRenderPass->GetStencilResolveMode(subpass); - const VkImageAspectFlags depthStecilAcpect = - m_allGpuState.pRenderPass->GetResolveDepthStecilAspect(subpass); + uint32_t aspectRegionCount = 0; + uint32_t srcResolvePlanes[MaxRangePerAttachment] = {}; + uint32_t dstResolvePlanes[MaxRangePerAttachment] = {}; + const VkFormat srcResolveFormat = srcAttachment.pView->GetViewFormat(); + const VkFormat dstResolveFormat = dstAttachment.pView->GetViewFormat(); + Pal::ResolveMode resolveModes[MaxRangePerAttachment] = {}; - if (Formats::HasDepth(srcResolveFormat)) - { - // Must be specified because the source image was created with sampleLocsAlwaysKnown set - pSampleLocations = &m_renderPassInstance.pSamplePatterns[subpass].locations; - } + const Pal::MsaaQuadSamplePattern* pSampleLocations = nullptr; - if ((depthResolveMode != VK_RESOLVE_MODE_NONE) && - ((depthStecilAcpect & VK_IMAGE_ASPECT_DEPTH_BIT) != 0)) - { + if (Formats::IsDepthStencilFormat(srcResolveFormat) == false) + { + resolveModes[0] = Pal::ResolveMode::Average; + srcResolvePlanes[0] = 0; + dstResolvePlanes[0] = 0; + aspectRegionCount = 1; + } + else + { + const uint32_t subpass = m_renderPassInstance.subpass; - bool bResolveAspect = Formats::HasDepth(srcResolveFormat) && Formats::HasDepth(dstResolveFormat); + const VkResolveModeFlagBits depthResolveMode = + m_allGpuState.pRenderPass->GetDepthResolveMode(subpass); + const VkResolveModeFlagBits stencilResolveMode = + m_allGpuState.pRenderPass->GetStencilResolveMode(subpass); - if (bResolveAspect) - { - resolveModes[aspectRegionCount] = VkToPalResolveMode(depthResolveMode); - srcResolvePlanes[aspectRegionCount] = 0; - dstResolvePlanes[aspectRegionCount] = 0; - aspectRegionCount++; - } - } + if (Formats::HasDepth(srcResolveFormat)) + { + // Must be specified because the source image was created with sampleLocsAlwaysKnown set + pSampleLocations = &m_renderPassInstance.pSamplePatterns[subpass].locations; + } - if ((stencilResolveMode != VK_RESOLVE_MODE_NONE) && - ((depthStecilAcpect & VK_IMAGE_ASPECT_STENCIL_BIT) != 0)) + if (depthResolveMode != VK_RESOLVE_MODE_NONE) + { + if (Formats::HasDepth(srcResolveFormat) && Formats::HasDepth(dstResolveFormat)) { - - bool bResolveAspect = Formats::HasStencil(srcResolveFormat) && Formats::HasStencil(dstResolveFormat); - - if (bResolveAspect) - { - resolveModes[aspectRegionCount] = VkToPalResolveMode(stencilResolveMode); - srcResolvePlanes[aspectRegionCount] = Formats::HasDepth(srcResolveFormat) ? 1 : 0; - dstResolvePlanes[aspectRegionCount] = Formats::HasDepth(dstResolveFormat) ? 1 : 0; - aspectRegionCount++; - } + resolveModes[aspectRegionCount] = VkToPalResolveMode(depthResolveMode); + srcResolvePlanes[aspectRegionCount] = 0; + dstResolvePlanes[aspectRegionCount] = 0; + aspectRegionCount++; } } - // Depth and stencil might have different resolve mode, so allowing resolve each aspect independently. - for (uint32_t aspectRegionIndex = 0; aspectRegionIndex < aspectRegionCount; ++aspectRegionIndex) + if (stencilResolveMode != VK_RESOLVE_MODE_NONE) { - // During split-frame-rendering, the image to resolve could be split across multiple devices. - Pal::ImageResolveRegion regions[MaxPalDevices]; + if (Formats::HasStencil(srcResolveFormat) && Formats::HasStencil(dstResolveFormat)) + { + resolveModes[aspectRegionCount] = VkToPalResolveMode(stencilResolveMode); + srcResolvePlanes[aspectRegionCount] = Formats::HasDepth(srcResolveFormat) ? 1 : 0; + dstResolvePlanes[aspectRegionCount] = Formats::HasDepth(dstResolveFormat) ? 1 : 0; + aspectRegionCount++; + } + } + } - const Pal::ImageLayout srcLayout = RPGetAttachmentLayout(params.src.attachment, - srcResolvePlanes[aspectRegionIndex]); - const Pal::ImageLayout dstLayout = RPGetAttachmentLayout(params.dst.attachment, - dstResolvePlanes[aspectRegionIndex]); + // Depth and stencil might have different resolve mode, so allowing resolve each aspect independently. + for (uint32_t aspectRegionIndex = 0; aspectRegionIndex < aspectRegionCount; ++aspectRegionIndex) + { + // During split-frame-rendering, the image to resolve could be split across multiple devices. + Pal::ImageResolveRegion regions[MaxPalDevices]; - for (uint32_t idx = 0; idx < m_renderPassInstance.renderAreaCount; idx++) - { - const Pal::Rect& renderArea = m_renderPassInstance.renderArea[idx]; + const Pal::ImageLayout srcLayout = RPGetAttachmentLayout(params.src.attachment, + srcResolvePlanes[aspectRegionIndex]); + const Pal::ImageLayout dstLayout = RPGetAttachmentLayout(params.dst.attachment, + dstResolvePlanes[aspectRegionIndex]); - regions[idx].srcPlane = srcResolvePlanes[aspectRegionIndex]; - regions[idx].srcSlice = srcAttachment.subresRange[0].startSubres.arraySlice; - regions[idx].srcOffset.x = renderArea.offset.x; - regions[idx].srcOffset.y = renderArea.offset.y; - regions[idx].srcOffset.z = 0; - regions[idx].dstPlane = dstResolvePlanes[aspectRegionIndex]; - regions[idx].dstMipLevel = dstAttachment.subresRange[0].startSubres.mipLevel; - regions[idx].dstSlice = dstAttachment.subresRange[0].startSubres.arraySlice; - regions[idx].dstOffset.x = renderArea.offset.x; - regions[idx].dstOffset.y = renderArea.offset.y; - regions[idx].dstOffset.z = 0; - regions[idx].extent.width = renderArea.extent.width; - regions[idx].extent.height = renderArea.extent.height; - regions[idx].extent.depth = 1; - regions[idx].numSlices = sliceCount; - regions[idx].swizzledFormat = Pal::UndefinedSwizzledFormat; + for (uint32_t idx = 0; idx < m_renderPassInstance.renderAreaCount; idx++) + { + const Pal::Rect& renderArea = m_renderPassInstance.renderArea[idx]; - regions[idx].pQuadSamplePattern = pSampleLocations; - } + regions[idx].srcPlane = srcResolvePlanes[aspectRegionIndex]; + regions[idx].srcSlice = srcAttachment.subresRange[0].startSubres.arraySlice; + regions[idx].srcOffset.x = renderArea.offset.x; + regions[idx].srcOffset.y = renderArea.offset.y; + regions[idx].srcOffset.z = 0; + regions[idx].dstPlane = dstResolvePlanes[aspectRegionIndex]; + regions[idx].dstMipLevel = dstAttachment.subresRange[0].startSubres.mipLevel; + regions[idx].dstSlice = dstAttachment.subresRange[0].startSubres.arraySlice; + regions[idx].dstOffset.x = renderArea.offset.x; + regions[idx].dstOffset.y = renderArea.offset.y; + regions[idx].dstOffset.z = 0; + regions[idx].extent.width = renderArea.extent.width; + regions[idx].extent.height = renderArea.extent.height; + regions[idx].extent.depth = 1; + regions[idx].numSlices = sliceCount; + regions[idx].swizzledFormat = Pal::UndefinedSwizzledFormat; - PalCmdResolveImage( - *srcAttachment.pImage, - srcLayout, - *dstAttachment.pImage, - dstLayout, - resolveModes[aspectRegionIndex], - m_renderPassInstance.renderAreaCount, - regions, - GetRpDeviceMask()); + regions[idx].pQuadSamplePattern = pSampleLocations; } - } - if (m_pSqttState != nullptr) - { - m_pSqttState->EndRenderPassResolve(); + PalCmdResolveImage( + *srcAttachment.pImage, + srcLayout, + *dstAttachment.pImage, + dstLayout, + resolveModes[aspectRegionIndex], + m_renderPassInstance.renderAreaCount, + regions, + GetRpDeviceMask()); } } @@ -10911,7 +10902,7 @@ void CmdBuffer::GetRayTracingDispatchArgs( m_pDevice->RayTrace()->GetAccelStructTrackerSrd(deviceIdx), sizeof(pConstants->descriptorTable.accelStructTrackerSrd)); - if (settings.llpcRaytracingMode >= RaytracingContinufy) + if (pPipeline->CheckIsCps()) { Pal::CompilerStackSizes stackSizes = PerGpuState(deviceIdx)->maxPipelineStackSizes; pConstants->constData.cpsFrontendStackSize = stackSizes.frontendSize; @@ -10954,6 +10945,7 @@ void CmdBuffer::GetRayTracingDispatchArgs( depth, pPipeline->GetShaderGroupCount() + 1, pPipeline->GetApiHash(), + GetUserMarkerContextValue(), &raygenSbt, &missSbt, &hitSbt, @@ -11038,13 +11030,7 @@ void CmdBuffer::TraceRaysPerDevice( PalCmdBuffer(deviceIdx)->CmdSetUserData(Pal::PipelineBindPoint::Compute, dispatchRaysUserData, 1, &constGpuAddrLow); - uint32_t dispatchSizeX = 0; - uint32_t dispatchSizeY = 0; - uint32_t dispatchSizeZ = 0; - - pPipeline->GetDispatchSize(&dispatchSizeX, &dispatchSizeY, &dispatchSizeZ, width, height, depth); - - PalCmdBuffer(deviceIdx)->CmdDispatch({ dispatchSizeX, dispatchSizeY, dispatchSizeZ }); + m_pfnTraceRaysDispatchPerDevice(this, deviceIdx, width, height, depth); DbgBarrierPostCmd(DbgTraceRays); } @@ -11071,7 +11057,8 @@ void CmdBuffer::TraceRaysIndirect( missShaderBindingTable, hitShaderBindingTable, callableShaderBindingTable, - indirectDeviceAddress); + indirectDeviceAddress, + GetUserMarkerContextValue()); } while (deviceGroup.IterateNext()); } @@ -11117,6 +11104,23 @@ void CmdBuffer::SyncIndirectCopy( } } +// ===================================================================================================================== +void CmdBuffer::TraceRaysDispatchPerDevice( + CmdBuffer* pCmdBuffer, + uint32_t deviceIdx, + uint32_t width, + uint32_t height, + uint32_t depth) +{ + const RayTracingPipeline* pPipeline = pCmdBuffer->m_allGpuState.pRayTracingPipeline; + uint32_t dispatchSizeX = 0; + uint32_t dispatchSizeY = 0; + uint32_t dispatchSizeZ = 0; + + pPipeline->GetDispatchSize(&dispatchSizeX, &dispatchSizeY, &dispatchSizeZ, width, height, depth); + pCmdBuffer->PalCmdBuffer(deviceIdx)->CmdDispatch({ dispatchSizeX, dispatchSizeY, dispatchSizeZ }); +} + // ===================================================================================================================== void CmdBuffer::TraceRaysIndirectPerDevice( const uint32_t deviceIdx, @@ -11125,7 +11129,8 @@ void CmdBuffer::TraceRaysIndirectPerDevice( const VkStridedDeviceAddressRegionKHR& missShaderBindingTable, const VkStridedDeviceAddressRegionKHR& hitShaderBindingTable, const VkStridedDeviceAddressRegionKHR& callableShaderBindingTable, - VkDeviceAddress indirectDeviceAddress) + VkDeviceAddress indirectDeviceAddress, + uint64_t userMarkerContext) { DbgBarrierPreCmd(DbgTraceRays); @@ -11167,7 +11172,11 @@ void CmdBuffer::TraceRaysIndirectPerDevice( pInitConstants->indirectMode = (indirectArgType == GpuRt::ExecuteIndirectArgType::DispatchDimensions) ? 0 : 1; - if (settings.rtFlattenThreadGroupSize == 0) + // NOTE: For CPS, we only support flatten thread group so far. + const uint32_t flattenThreadGroupSize = + pPipeline->CheckIsCps() ? settings.dispatchRaysThreadGroupSize : settings.rtFlattenThreadGroupSize; + + if (flattenThreadGroupSize == 0) { pInitConstants->dispatchDimSwizzleMode = 0; pInitConstants->rtThreadGroupSizeX = settings.rtThreadGroupSizeX; @@ -11177,7 +11186,7 @@ void CmdBuffer::TraceRaysIndirectPerDevice( else { pInitConstants->dispatchDimSwizzleMode = 1; - pInitConstants->rtThreadGroupSizeX = settings.rtFlattenThreadGroupSize; + pInitConstants->rtThreadGroupSizeX = flattenThreadGroupSize; pInitConstants->rtThreadGroupSizeY = 1; pInitConstants->rtThreadGroupSizeZ = 1; } @@ -11197,6 +11206,7 @@ void CmdBuffer::TraceRaysIndirectPerDevice( 0, pPipeline->GetShaderGroupCount() + 1, pPipeline->GetApiHash(), + userMarkerContext, &raygenShaderBindingTable, &missShaderBindingTable, &hitShaderBindingTable, @@ -11351,18 +11361,7 @@ void CmdBuffer::SetRayTracingPipelineStackSize( do { const uint32_t deviceIdx = deviceGroup.Index(); - - const RuntimeSettings& settings = m_pDevice->GetRuntimeSettings(); - Pal::CompilerStackSizes stackSizes = {}; - if (settings.llpcRaytracingMode >= RaytracingContinufy) - { - stackSizes.frontendSize = pipelineStackSize; - } - else - { - stackSizes.backendSize = pipelineStackSize; - } - UpdateLargestPipelineStackSizes(deviceIdx, stackSizes); + PerGpuState(deviceIdx)->dynamicPipelineStackSize = pipelineStackSize; } while (deviceGroup.IterateNext()); } @@ -11439,6 +11438,7 @@ void CmdBuffer::BindRayQueryConstants( depth * pOrigThreadgroupDims[2], 1, pPipeline->GetApiHash(), + GetUserMarkerContextValue(), nullptr, nullptr, nullptr, @@ -11455,6 +11455,7 @@ void CmdBuffer::BindRayQueryConstants( pOrigThreadgroupDims[2], 1, pPipeline->GetApiHash(), + GetUserMarkerContextValue(), nullptr, nullptr, nullptr, @@ -11518,18 +11519,22 @@ uint32_t CmdBuffer::GetPipelineScratchSize( { uint32_t scratchSize = 0; #if VKI_RAY_TRACING - const RuntimeSettings& settings = m_pDevice->GetRuntimeSettings(); - auto stackSizes = PerGpuState(deviceIdx)->maxPipelineStackSizes; - if ((settings.llpcRaytracingMode >= RaytracingContinufy) && - ((settings.cpsFlags & CpsFlagStackInGlobalMem) == 0)) + if (m_allGpuState.pRayTracingPipeline != nullptr) { - // Continuations with stack in scratch - scratchSize = stackSizes.backendSize + stackSizes.frontendSize; - } - else - { - // Non-continuations or continuations stack in global memory - scratchSize = stackSizes.backendSize; + const RuntimeSettings& settings = m_pDevice->GetRuntimeSettings(); + auto stackSizes = PerGpuState(deviceIdx)->maxPipelineStackSizes; + auto dynamicStackSize = PerGpuState(deviceIdx)->dynamicPipelineStackSize; + + if (m_allGpuState.pRayTracingPipeline->CheckIsCps() && ((settings.cpsFlags & CpsFlagStackInGlobalMem) == 0)) + { + // Continuations with stack in scratch + scratchSize = stackSizes.backendSize + Util::Max(stackSizes.frontendSize, dynamicStackSize); + } + else + { + // Non-continuations or continuations stack in global memory + scratchSize = Util::Max(stackSizes.backendSize, dynamicStackSize); + } } #endif return scratchSize; @@ -11669,6 +11674,22 @@ void CmdBuffer::ValidateGraphicsStates() } } + if (m_allGpuState.dirtyGraphics.msaa || m_allGpuState.dirtyGraphics.pipeline) + { + const GraphicsPipeline* pGraphicsPipeline = m_allGpuState.pGraphicsPipeline; + if (pGraphicsPipeline != nullptr) + { + const uint8 forceSampleRateShading = + (m_allGpuState.msaaCreateInfo.pixelShaderSamples > 1) && + (pGraphicsPipeline->GetPipelineFlags().sampleShadingEnable != 0); + if (m_allGpuState.msaaCreateInfo.flags.forceSampleRateShading != forceSampleRateShading) + { + m_allGpuState.msaaCreateInfo.flags.forceSampleRateShading = forceSampleRateShading; + m_allGpuState.dirtyGraphics.msaa = 1; + } + } + } + auto pDynamicState = &m_allGpuState.pipelineState[PipelineBindGraphics].dynamicBindInfo.gfxDynState; utils::IterateMask deviceGroup(m_cbBeginDeviceMask); @@ -13036,6 +13057,12 @@ PFN_vkCmdPushDescriptorSetWithTemplate2KHR CmdBuffer::GetCmdPushDescriptorSetWit return pFunc; } +// ===================================================================================================================== +uint64_t CmdBuffer::GetUserMarkerContextValue() const +{ + return (m_pSqttState != nullptr) ? m_pSqttState->GetUserMarkerContextValue() : 0; +} + // ===================================================================================================================== // Template instantiation needed for references entry.cpp. diff --git a/icd/api/vk_cmdbuffer_transfer.cpp b/icd/api/vk_cmdbuffer_transfer.cpp index 9310d62a..610636f3 100644 --- a/icd/api/vk_cmdbuffer_transfer.cpp +++ b/icd/api/vk_cmdbuffer_transfer.cpp @@ -420,14 +420,12 @@ void CmdBuffer::BlitImage( palCopyInfo.pRegions = pPalRegions; - // PAL does gamma correction whenever the destination is a SRGB image or treated as one. - // If the source image is an UNORM image that contains SRGB data, we need to set dstAsNorm - // so PAL doesn't end up doing gamma correction on values that are already in SRGB space. if (pSrcImage->TreatAsSrgb()) { - palCopyInfo.flags.dstAsNorm = true; + palCopyInfo.flags.srcAsSrgb = true; } - else if (pDstImage->TreatAsSrgb()) + + if (pDstImage->TreatAsSrgb()) { palCopyInfo.flags.dstAsSrgb = true; } @@ -444,11 +442,20 @@ void CmdBuffer::BlitImage( static_cast(region.srcOffsets[1].y - region.srcOffsets[0].y), static_cast(region.srcOffsets[1].z - region.srcOffsets[0].z) }; + const VkExtent3D dstExtent = + { + static_cast(region.dstOffsets[1].x - region.dstOffsets[0].x), + static_cast(region.dstOffsets[1].y - region.dstOffsets[0].y), + static_cast(region.dstOffsets[1].z - region.dstOffsets[0].z) + }; + const bool directCopyDepth = (srcExtent.depth == dstExtent.depth) || + ((srcExtent.depth == region.dstSubresource.layerCount) && + (dstExtent.depth == region.srcSubresource.layerCount)); if ((pSrcImage->GetFormat() == pDstImage->GetFormat()) && - (srcExtent.width == static_cast(region.dstOffsets[1].x - region.dstOffsets[0].x)) && - (srcExtent.height == static_cast(region.dstOffsets[1].y - region.dstOffsets[0].y)) && - (srcExtent.depth == static_cast(region.dstOffsets[1].z - region.dstOffsets[0].z))) + (srcExtent.width == dstExtent.width) && + (srcExtent.height == dstExtent.height) && + directCopyDepth) { const VkImageCopy imageCopy = { @@ -476,7 +483,7 @@ void CmdBuffer::BlitImage( (palCopyInfo.regionCount <= (regionBatch - MaxPalAspectsPerMask))) { VkToPalImageScaledCopyRegion(pRegions[regionIdx], srcFormat.format, pSrcImage->GetArraySize(), - dstFormat.format, pPalRegions, &palCopyInfo.regionCount); + dstFormat.format, pDstImage->GetArraySize(), pPalRegions, &palCopyInfo.regionCount); ++regionIdx; } diff --git a/icd/api/vk_compute_pipeline.cpp b/icd/api/vk_compute_pipeline.cpp index fbda6e8b..dca14a6e 100644 --- a/icd/api/vk_compute_pipeline.cpp +++ b/icd/api/vk_compute_pipeline.cpp @@ -751,19 +751,4 @@ void ComputePipeline::BindToCmdBuffer( } } -// ===================================================================================================================== -void ComputePipeline::BindNullPipeline(CmdBuffer* pCmdBuffer) -{ - const uint32_t numGroupedCmdBuffers = pCmdBuffer->VkDevice()->NumPalDevices(); - - Pal::PipelineBindParams params = {}; - params.pipelineBindPoint = Pal::PipelineBindPoint::Compute; - params.apiPsoHash = Pal::InternalApiPsoHash; - - for (uint32_t deviceIdx = 0; deviceIdx < numGroupedCmdBuffers; deviceIdx++) - { - pCmdBuffer->PalCmdBuffer(deviceIdx)->CmdBindPipeline(params); - } -} - } // namespace vk diff --git a/icd/api/vk_conv.cpp b/icd/api/vk_conv.cpp index 24445a72..e89d0650 100644 --- a/icd/api/vk_conv.cpp +++ b/icd/api/vk_conv.cpp @@ -954,8 +954,8 @@ const char* VkResultName( errName = "VK_ERROR_PIPELINE_COMPILE_REQUIRED_EXT"; break; - case VkResult::VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT: - errName = "VK_ERROR_INCOMPATIBLE_SHADER_BINARY_EXT"; + case VkResult::VK_INCOMPATIBLE_SHADER_BINARY_EXT: + errName = "VK_INCOMPATIBLE_SHADER_BINARY_EXT"; break; default: @@ -1258,47 +1258,47 @@ VkResult InitializeUberFetchShaderFormatTable( INIT_UBER_FORMATINFO(R32_UINT, PalFmt_R(32, Uint), PalFmt_R(32, Uint), 1, 0, 1, 4, 4); INIT_UBER_FORMATINFO(R32G32_SFLOAT, - PalFmt_RG(32, 32, Float), PalFmt_RG(32, 32, Float), 1, 0, 2, 4, 4); + PalFmt_RG(32, 32, Float), PalFmt_R(32, Float), 1, 0, 2, 4, 4); INIT_UBER_FORMATINFO(R32G32_SINT, - PalFmt_RG(32, 32, Sint), PalFmt_RG(32, 32, Sint), 1, 0, 2, 4, 4); + PalFmt_RG(32, 32, Sint), PalFmt_R(32, Sint), 1, 0, 2, 4, 4); INIT_UBER_FORMATINFO(R32G32_UINT, - PalFmt_RG(32, 32, Uint), PalFmt_RG(32, 32, Uint), 1, 0, 2, 4, 4); + PalFmt_RG(32, 32, Uint), PalFmt_R(32, Uint), 1, 0, 2, 4, 4); INIT_UBER_FORMATINFO(R32G32B32_SFLOAT, - PalFmt_RGB(32, 32, 32, Float), PalFmt_RGB(32, 32, 32, Float), 1, 0, 3, 4, 4); + PalFmt_RGB(32, 32, 32, Float), PalFmt_R(32, Float), 1, 0, 3, 4, 4); INIT_UBER_FORMATINFO(R32G32B32_SINT, - PalFmt_RGB(32, 32, 32, Sint), PalFmt_RGB(32, 32, 32, Sint), 1, 0, 3, 4, 4); + PalFmt_RGB(32, 32, 32, Sint), PalFmt_R(32, Sint), 1, 0, 3, 4, 4); INIT_UBER_FORMATINFO(R32G32B32_UINT, - PalFmt_RGB(32, 32, 32, Uint), PalFmt_RGB(32, 32, 32, Uint), 1, 0, 3, 4, 4); + PalFmt_RGB(32, 32, 32, Uint), PalFmt_R(32, Uint), 1, 0, 3, 4, 4); INIT_UBER_FORMATINFO(R32G32B32A32_SFLOAT, - PalFmt_RGBA(32, 32, 32, 32, Float), PalFmt_RGBA(32, 32, 32, 32, Float), 1, 0, 4, 4, 4); + PalFmt_RGBA(32, 32, 32, 32, Float), PalFmt_R(32, Float), 1, 0, 4, 4, 4); INIT_UBER_FORMATINFO(R32G32B32A32_SINT, - PalFmt_RGBA(32, 32, 32, 32, Sint), PalFmt_RGBA(32, 32, 32, 32, Sint), 1, 0, 4, 4, 4); + PalFmt_RGBA(32, 32, 32, 32, Sint), PalFmt_R(32, Sint), 1, 0, 4, 4, 4); INIT_UBER_FORMATINFO(R32G32B32A32_UINT, - PalFmt_RGBA(32, 32, 32, 32, Uint), PalFmt_RGBA(32, 32, 32, 32, Uint), 1, 0, 4, 4, 4); + PalFmt_RGBA(32, 32, 32, 32, Uint), PalFmt_R(32, Uint), 1, 0, 4, 4, 4); INIT_UBER_FORMATINFO(R64_SFLOAT, - PalFmt_RG(32, 32, Uint), PalFmt_RG(32, 32, Uint), 0, 0, 1, 8, 4); + PalFmt_RG(32, 32, Uint), PalFmt_R(32, Uint), 0, 0, 1, 8, 4); INIT_UBER_FORMATINFO(R64_SINT, - PalFmt_RG(32, 32, Uint), PalFmt_RG(32, 32, Uint), 0, 0, 1, 8, 4); + PalFmt_RG(32, 32, Uint), PalFmt_R(32, Uint), 0, 0, 1, 8, 4); INIT_UBER_FORMATINFO(R64_UINT, - PalFmt_RG(32, 32, Uint), PalFmt_RG(32, 32, Uint), 0, 0, 1, 8, 4); + PalFmt_RG(32, 32, Uint), PalFmt_R(32, Uint), 0, 0, 1, 8, 4); INIT_UBER_FORMATINFO(R64G64_SFLOAT, - PalFmt_RG(32, 32, Uint), PalFmt_RG(32, 32, Uint), 0, 0, 2, 8, 4); + PalFmt_RG(32, 32, Uint), PalFmt_R(32, Uint), 0, 0, 2, 8, 4); INIT_UBER_FORMATINFO(R64G64_SINT, - PalFmt_RG(32, 32, Uint), PalFmt_RG(32, 32, Uint), 0, 0, 2, 8, 4); + PalFmt_RG(32, 32, Uint), PalFmt_R(32, Uint), 0, 0, 2, 8, 4); INIT_UBER_FORMATINFO(R64G64_UINT, - PalFmt_RG(32, 32, Uint), PalFmt_RG(32, 32, Uint), 0, 0, 2, 8, 4); + PalFmt_RG(32, 32, Uint), PalFmt_R(32, Uint), 0, 0, 2, 8, 4); INIT_UBER_FORMATINFO(R64G64B64_SFLOAT, - PalFmt_RG(32, 32, Uint), PalFmt_RG(32, 32, Uint), 0, 0, 3, 8, 4); + PalFmt_RG(32, 32, Uint), PalFmt_R(32, Uint), 0, 0, 3, 8, 4); INIT_UBER_FORMATINFO(R64G64B64_SINT, - PalFmt_RG(32, 32, Uint), PalFmt_RG(32, 32, Uint), 0, 0, 3, 8, 4); + PalFmt_RG(32, 32, Uint), PalFmt_R(32, Uint), 0, 0, 3, 8, 4); INIT_UBER_FORMATINFO(R64G64B64_UINT, - PalFmt_RG(32, 32, Uint), PalFmt_RG(32, 32, Uint), 0, 0, 3, 8, 4); + PalFmt_RG(32, 32, Uint), PalFmt_R(32, Uint), 0, 0, 3, 8, 4); INIT_UBER_FORMATINFO(R64G64B64A64_SFLOAT, - PalFmt_RG(32, 32, Uint), PalFmt_RG(32, 32, Uint), 0, 0, 4, 8, 4); + PalFmt_RG(32, 32, Uint), PalFmt_R(32, Uint), 0, 0, 4, 8, 4); INIT_UBER_FORMATINFO(R64G64B64A64_SINT, - PalFmt_RG(32, 32, Uint), PalFmt_RG(32, 32, Uint), 0, 0, 4, 8, 4); + PalFmt_RG(32, 32, Uint), PalFmt_R(32, Uint), 0, 0, 4, 8, 4); INIT_UBER_FORMATINFO(R64G64B64A64_UINT, - PalFmt_RG(32, 32, Uint), PalFmt_RG(32, 32, Uint), 0, 0, 4, 8, 4); + PalFmt_RG(32, 32, Uint), PalFmt_R(32, Uint), 0, 0, 4, 8, 4); INIT_UBER_FORMATINFO(R8_SINT, PalFmt_R(8, Sint), PalFmt_R(8, Sint), 1, 0, 1, 1, 1); INIT_UBER_FORMATINFO(R8_SNORM, diff --git a/icd/api/vk_descriptor_pool.cpp b/icd/api/vk_descriptor_pool.cpp index f693848a..9225306a 100644 --- a/icd/api/vk_descriptor_pool.cpp +++ b/icd/api/vk_descriptor_pool.cpp @@ -564,53 +564,42 @@ VkResult DescriptorGpuMemHeap::Init( m_gpuMemAddrAlignment = pDevice->GetProperties().descriptorSizes.alignmentInDwords * sizeof(uint32_t); - if (pDevice->GetRuntimeSettings().pipelineLayoutMode == PipelineLayoutAngle) - { - for (uint32_t i = 0; i < pCreateInfo->poolSizeCount; ++i) - { - m_gpuMemSize += AngleDescPattern::DescriptorSetBindingStride * sizeof(uint32_t) * - pTypeCount[i].descriptorCount; - } - } - else - { - constexpr uint32_t InlineUniformGranularity = 4; + constexpr uint32_t InlineUniformGranularity = 4; - m_gpuMemSize += ((m_gpuMemAddrAlignment - InlineUniformGranularity) * maxInlineUniformBlockBindings); + m_gpuMemSize += ((m_gpuMemAddrAlignment - InlineUniformGranularity) * maxInlineUniformBlockBindings); - for (uint32_t i = 0; i < pCreateInfo->poolSizeCount; ++i) + for (uint32_t i = 0; i < pCreateInfo->poolSizeCount; ++i) + { + if (pTypeCount[i].type == VK_DESCRIPTOR_TYPE_MUTABLE_EXT) { - if (pTypeCount[i].type == VK_DESCRIPTOR_TYPE_MUTABLE_EXT) + uint32_t maxSize = 0; + if ((pMutableDescriptorTypeCreateInfoEXT != nullptr) && + (pMutableDescriptorTypeCreateInfoEXT->pMutableDescriptorTypeLists != nullptr) && + (i < pMutableDescriptorTypeCreateInfoEXT->mutableDescriptorTypeListCount)) { - uint32_t maxSize = 0; - if ((pMutableDescriptorTypeCreateInfoEXT != nullptr) && - (pMutableDescriptorTypeCreateInfoEXT->pMutableDescriptorTypeLists != nullptr) && - (i < pMutableDescriptorTypeCreateInfoEXT->mutableDescriptorTypeListCount)) + const VkMutableDescriptorTypeListEXT& list = + pMutableDescriptorTypeCreateInfoEXT->pMutableDescriptorTypeLists[i]; + for (uint32_t j = 0; j < list.descriptorTypeCount; ++j) { - const VkMutableDescriptorTypeListEXT& list = - pMutableDescriptorTypeCreateInfoEXT->pMutableDescriptorTypeLists[i]; - for (uint32_t j = 0; j < list.descriptorTypeCount; ++j) - { - maxSize = Util::Max(maxSize, - DescriptorSetLayout::GetSingleDescStaticSize(pDevice, list.pDescriptorTypes[j])); - } + maxSize = Util::Max(maxSize, + DescriptorSetLayout::GetSingleDescStaticSize(pDevice, list.pDescriptorTypes[j])); } - - // If no mutable type list passed, assume largest - if (maxSize == 0) - { - maxSize = DescriptorSetLayout::GetSingleDescStaticSize( - pDevice, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER); - } - - VK_ASSERT(maxSize > 0); - m_gpuMemSize += maxSize * pTypeCount[i].descriptorCount; } - else + + // If no mutable type list passed, assume largest + if (maxSize == 0) { - m_gpuMemSize += DescriptorSetLayout::GetSingleDescStaticSize(pDevice, pTypeCount[i].type) * - pTypeCount[i].descriptorCount; + maxSize = DescriptorSetLayout::GetSingleDescStaticSize( + pDevice, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER); } + + VK_ASSERT(maxSize > 0); + m_gpuMemSize += maxSize * pTypeCount[i].descriptorCount; + } + else + { + m_gpuMemSize += DescriptorSetLayout::GetSingleDescStaticSize(pDevice, pTypeCount[i].type) * + pTypeCount[i].descriptorCount; } } diff --git a/icd/api/vk_descriptor_set_layout.cpp b/icd/api/vk_descriptor_set_layout.cpp index ed561e55..4227d2b3 100644 --- a/icd/api/vk_descriptor_set_layout.cpp +++ b/icd/api/vk_descriptor_set_layout.cpp @@ -619,11 +619,6 @@ VkResult DescriptorSetLayout::ConvertCreateInfo( // Determine the alignment requirement of descriptors in dwords. uint32 descAlignmentInDw = pDevice->GetProperties().descriptorSizes.alignmentInDwords; uint32 staDescAlignmentInDw = descAlignmentInDw; - if (pDevice->GetRuntimeSettings().pipelineLayoutMode == PipelineLayoutAngle) - { - VK_ASSERT(AngleDescPattern::DescriptorSetBindingStride % descAlignmentInDw == 0); - staDescAlignmentInDw = AngleDescPattern::DescriptorSetBindingStride; - } // If the last binding has the VARIABLE_DESCRIPTOR_COUNT_BIT set, write the varDescDwStride if ((bindingNumber == (pOut->count - 1)) && pBinding->bindingFlags.variableDescriptorCount) { diff --git a/icd/api/vk_device.cpp b/icd/api/vk_device.cpp index 74ee8655..94c113f7 100644 --- a/icd/api/vk_device.cpp +++ b/icd/api/vk_device.cpp @@ -76,6 +76,7 @@ #include "raytrace/ray_tracing_util.h" #include "raytrace/vk_ray_tracing_pipeline.h" #include "raytrace/vk_acceleration_structure.h" +#include "appopt/split_raytracing_layer.h" #endif #include "sqtt/sqtt_layer.h" @@ -552,7 +553,7 @@ VkResult Device::Create( { deviceFeatures.robustBufferAccessExtended = true; { - deviceFeatures.robustVertexBufferExtend = false; + deviceFeatures.robustVertexBufferExtend = true; } } @@ -1131,8 +1132,7 @@ VkResult Device::Initialize( } memcpy(&m_pQueues, pQueues, sizeof(m_pQueues)); - Pal::DeviceProperties deviceProps = {}; - result = PalToVkResult(PalDevice(DefaultDeviceIndex)->GetProperties(&deviceProps)); + const Pal::DeviceProperties& deviceProps = pPhysicalDevice->PalProperties(); if (result == VK_SUCCESS) { @@ -1482,9 +1482,17 @@ void Device::InitDispatchTable() } #if VKI_RAY_TRACING - if ((RayTrace() != nullptr) && (RayTrace()->GetBvhBatchLayer() != nullptr)) + RayTracingDevice* pRayTrace = RayTrace(); + if (pRayTrace != nullptr) { - RayTrace()->GetBvhBatchLayer()->OverrideDispatchTable(&m_dispatchTable); + if (pRayTrace->GetBvhBatchLayer() != nullptr) + { + pRayTrace->GetBvhBatchLayer()->OverrideDispatchTable(&m_dispatchTable); + } + if (pRayTrace->GetSplitRaytracingLayer() != nullptr) + { + pRayTrace->GetSplitRaytracingLayer()->OverrideDispatchTable(&m_dispatchTable); + } } #endif @@ -2152,7 +2160,6 @@ VkResult Device::CreateInternalPipelines() #endif if ((result == VK_SUCCESS) && - GetRuntimeSettings().useShaderLibraryForPipelineLibraryFastLink && GetEnabledFeatures().graphicsPipelineLibrary) { static constexpr uint8_t NullFragment[] = @@ -3207,7 +3214,14 @@ VkResult Device::ImportSemaphore( VkSemaphore semaphore, const ImportSemaphoreInfo& importInfo) { - return Semaphore::ObjectFromHandle(semaphore)->ImportSemaphore(this, importInfo); + VkResult result = VK_SUCCESS; + Semaphore* pSemaphore = Semaphore::ObjectFromHandle(semaphore); + + { + result = pSemaphore->ImportSemaphore(this, importInfo); + } + + return result; } // ===================================================================================================================== @@ -3661,7 +3675,7 @@ VkResult Device::CreateIndirectCommandsLayout( const VkAllocationCallbacks* pAllocator, VkIndirectCommandsLayoutNV* pIndirectCommandsLayout) { - return IndirectCommandsLayout::Create(this, pCreateInfo, pAllocator, pIndirectCommandsLayout); + return IndirectCommandsLayoutNV::Create(this, pCreateInfo, pAllocator, pIndirectCommandsLayout); } // ===================================================================================================================== @@ -5421,7 +5435,7 @@ VKAPI_ATTR void VKAPI_CALL vkGetGeneratedCommandsMemoryRequirementsNV( VkMemoryRequirements2* pMemoryRequirements) { const Device* pDevice = ApiDevice::ObjectFromHandle(device); - const IndirectCommandsLayout* pLayout = IndirectCommandsLayout::ObjectFromHandle(pInfo->indirectCommandsLayout); + const IndirectCommandsLayoutNV* pLayout = IndirectCommandsLayoutNV::ObjectFromHandle(pInfo->indirectCommandsLayout); pLayout->CalculateMemoryRequirements(pDevice, pMemoryRequirements); } @@ -5434,7 +5448,8 @@ VKAPI_ATTR VkResult VKAPI_CALL vkCreateIndirectCommandsLayoutNV( VkIndirectCommandsLayoutNV* pIndirectCommandsLayout) { Device* pDevice = ApiDevice::ObjectFromHandle(device); - const VkAllocationCallbacks* pAllocCB = pAllocator ? pAllocator : pDevice->VkInstance()->GetAllocCallbacks(); + const VkAllocationCallbacks* pAllocCB = (pAllocator != nullptr) ? pAllocator : + pDevice->VkInstance()->GetAllocCallbacks(); return pDevice->CreateIndirectCommandsLayout(pCreateInfo, pAllocCB, pIndirectCommandsLayout); } @@ -5448,9 +5463,10 @@ VKAPI_ATTR void VKAPI_CALL vkDestroyIndirectCommandsLayoutNV( if (indirectCommandsLayout != VK_NULL_HANDLE) { Device* pDevice = ApiDevice::ObjectFromHandle(device); - const VkAllocationCallbacks* pAllocCB = pAllocator ? pAllocator : pDevice->VkInstance()->GetAllocCallbacks(); + const VkAllocationCallbacks* pAllocCB = (pAllocator != nullptr) ? pAllocator : + pDevice->VkInstance()->GetAllocCallbacks(); - IndirectCommandsLayout::ObjectFromHandle(indirectCommandsLayout)->Destroy(pDevice, pAllocCB); + IndirectCommandsLayoutNV::ObjectFromHandle(indirectCommandsLayout)->Destroy(pDevice, pAllocCB); } } @@ -5466,3 +5482,6 @@ VkPipelineCreateFlags2KHR vk::Device::GetPipelineCreateFlags( const VkComputePipelineCreateInfo* pCreateInfo); +template +VkPipelineCreateFlags2KHR vk::Device::GetPipelineCreateFlags( + const VkGraphicsPipelineCreateInfo* pCreateInfo); diff --git a/icd/api/vk_dispatch.cpp b/icd/api/vk_dispatch.cpp index 1f2a2468..83db5be1 100644 --- a/icd/api/vk_dispatch.cpp +++ b/icd/api/vk_dispatch.cpp @@ -846,6 +846,7 @@ void DispatchTable::Init() INIT_DISPATCH_ENTRY(vkCmdSetRenderingAttachmentLocationsKHR ); INIT_DISPATCH_ENTRY(vkCmdSetRenderingInputAttachmentIndicesKHR ); + INIT_DISPATCH_ENTRY(vkCmdSetDepthBias2EXT ); } // ===================================================================================================================== diff --git a/icd/api/vk_event.cpp b/icd/api/vk_event.cpp index 9690bdb9..a26e6277 100644 --- a/icd/api/vk_event.cpp +++ b/icd/api/vk_event.cpp @@ -53,7 +53,7 @@ Event::Event( { if (useToken) { - m_syncToken = 0; + m_syncToken = {}; } else { @@ -77,17 +77,15 @@ VkResult Event::Create( Pal::Result palResult = Pal::Result::Success; bool useToken = false; - Pal::DeviceProperties info; - pDevice->PalDevice(DefaultDeviceIndex)->GetProperties(&info); - - const RuntimeSettings& settings = pDevice->GetRuntimeSettings(); + const Pal::DeviceProperties& info = pDevice->GetPalProperties(); + const RuntimeSettings& settings = pDevice->GetRuntimeSettings(); // If supportReleaseAcquireInterface is true, the ASIC provides new barrier interface CmdReleaseThenAcquire() // designed for Acquire/Release-based driver. This flag is currently enabled for gfx9 and above. // If supportSplitReleaseAcquire is true, the ASIC provides split CmdRelease() and CmdAcquire() to express barrier, // and CmdReleaseThenAcquire() is still valid. This flag is currently enabled for gfx10 and above. bool useSplitReleaseAcquire = info.gfxipProperties.flags.supportReleaseAcquireInterface && - info.gfxipProperties.flags.supportSplitReleaseAcquire && + info.queueProperties[0].flags.supportSplitReleaseAcquire && settings.useAcquireReleaseInterface; if (useSplitReleaseAcquire && settings.syncTokenEnabled && diff --git a/icd/api/vk_graphics_pipeline.cpp b/icd/api/vk_graphics_pipeline.cpp index 3e87bfa4..9e91a3f7 100644 --- a/icd/api/vk_graphics_pipeline.cpp +++ b/icd/api/vk_graphics_pipeline.cpp @@ -75,9 +75,10 @@ VkResult GraphicsPipeline::CreatePipelineBinaries( Vkgc::BinaryData* pPipelineBinaries, PipelineMetadata* pBinaryMetadata) { - VkResult result = VK_SUCCESS; - const uint32_t numPalDevices = pDevice->NumPalDevices(); - PipelineCompiler* pDefaultCompiler = pDevice->GetCompiler(DefaultDeviceIndex); + VkResult result = VK_SUCCESS; + const uint32_t numPalDevices = pDevice->NumPalDevices(); + PipelineCompiler* pDefaultCompiler = pDevice->GetCompiler(DefaultDeviceIndex); + bool storeBinaryToCache = true; // Load or create the pipeline binary PipelineBinaryCache* pPipelineBinaryCache = (pPipelineCache != nullptr) ? pPipelineCache->GetPipelineCache() @@ -233,7 +234,7 @@ VkResult GraphicsPipeline::CreatePipelineBinaries( binaryCreateInfoMGPU.stageFeedback); } - pDefaultCompiler->FreeGraphicsPipelineCreateInfo(pDevice, &binaryCreateInfoMGPU, false, false); + pDefaultCompiler->FreeGraphicsPipelineCreateInfo(pDevice, &binaryCreateInfoMGPU, false); } } } @@ -248,13 +249,10 @@ VkResult GraphicsPipeline::CreatePipelineBinaries( } // Add to any cache layer where missing - if (result == VK_SUCCESS) + if ((result == VK_SUCCESS) && storeBinaryToCache) { // Only store the optimized variant of the pipeline if deferCompileOptimizedPipeline is enabled - if ((pPipelineBinaries[deviceIdx].codeSize != 0) && - ((pDevice->GetRuntimeSettings().deferCompileOptimizedPipeline == false) || - ((pBinaryMetadata->enableEarlyCompile == false) && - (pBinaryMetadata->enableUberFetchShader == false)))) + if ((pPipelineBinaries[deviceIdx].codeSize != 0) && (pBinaryMetadata->enableUberFetchShader == false)) { pDevice->GetCompiler(deviceIdx)->CachePipelineBinary( &pCacheIds[deviceIdx], @@ -393,9 +391,11 @@ VkResult GraphicsPipeline::CreatePipelineObjects( pDevice->PalDevice(DefaultDeviceIndex)->GetGraphicsPipelineSize(pObjectCreateInfo->pipeline, &palResult); VK_ASSERT(palResult == Pal::Result::Success); + size_t allocationSize = sizeof(GraphicsPipeline) + (palSize * numPalDevices); + pSystemMem = pDevice->AllocApiObject( pAllocator, - sizeof(GraphicsPipeline) + (palSize * numPalDevices)); + allocationSize); if (pSystemMem == nullptr) { @@ -434,10 +434,11 @@ VkResult GraphicsPipeline::CreatePipelineObjects( const auto& info = pPalPipeline[deviceIdx]->GetInfo(); if ((info.ps.flags.perSampleShading == 1) || + (info.ps.flags.enablePops == 1) || ((info.ps.flags.usesSampleMask == 1) && (palProperties.gfxipProperties.flags.supportVrsWithDsExports == 0))) { - // Override the shader rate to 1x1 if SampleId used in shader or + // Override the shader rate to 1x1 if SampleId used in shader, or POPS is enabled, or // supportVrsWithDsExports is not supported and SampleMask used in shader. Device::SetDefaultVrsRateParams(&pObjectCreateInfo->immedInfo.vrsRateParams); @@ -498,6 +499,7 @@ VkResult GraphicsPipeline::CreatePipelineObjects( } result = PalToVkResult(palResult); + } // On success, wrap it up in a Vulkan object. @@ -569,6 +571,7 @@ VkResult GraphicsPipeline::CreatePipelineObjects( return result; } + // ===================================================================================================================== static bool IsGplFastLinkPossible( const Device* pDevice, @@ -738,11 +741,15 @@ VkResult GraphicsPipeline::Create( PipelineMetadata binaryMetadata = {}; PipelineLayout* pPipelineLayout = nullptr; bool enableFastLink = false; + bool binariesProvided = false; + bool gplProvided = false; + uint32 gplMask = 0; ShaderModuleHandle tempModules[ShaderStage::ShaderStageGfxCount] = {}; ShaderOptimizerKey shaderOptimizerKeys[ShaderStage::ShaderStageGfxCount] = {}; Vkgc::BinaryData pipelineBinaries[MaxPalDevices] = {}; Util::MetroHash::Hash cacheId[MaxPalDevices] = {}; const Pal::IShaderLibrary* shaderLibraries[GraphicsLibraryCount] = {}; + Util::MetroHash::Hash gplCacheId[GraphicsLibraryCount] = {}; uint32_t numShaderLibraries = 0; VK_ASSERT(pCreateInfo->layout != VK_NULL_HANDLE); @@ -751,89 +758,107 @@ VkResult GraphicsPipeline::Create( GraphicsPipelineLibraryInfo libInfo = {}; GraphicsPipelineCommon::ExtractLibraryInfo(pDevice, pCreateInfo, extStructs, flags, &libInfo); + if (libInfo.pPreRasterizationShaderLib != nullptr) + { + gplMask |= (1 << GraphicsLibraryPreRaster); + gplProvided = true; + } + if (libInfo.pFragmentShaderLib != nullptr) + { + gplMask |= (1 << GraphicsLibraryFragment); + gplProvided = true; + } + + // If this triggers, then the pPipelineBinaryInfoKHR handling code will need to handle the case where the binaries + // that are provided are GPL binaries, not monolithic binaries. + VK_ASSERT((gplProvided && binariesProvided) == false); + // 1. Check whether GPL fast link is possible - if (pDevice->GetRuntimeSettings().useShaderLibraryForPipelineLibraryFastLink) + // If pipeline only contains PreRasterizationShaderLib and no fragment shader is in the create info, + // we add a null fragment library in order to use fast link. + if ((libInfo.flags.isLibrary == false) && + ((libInfo.pPreRasterizationShaderLib != nullptr) && (libInfo.pFragmentShaderLib == nullptr))) { - // If pipeline only contains PreRasterizationShaderLib and no fragment shader is in the create info, - // we add a null fragment library in order to use fast link. - if ((libInfo.flags.isLibrary == false) && - ((libInfo.pPreRasterizationShaderLib != nullptr) && (libInfo.pFragmentShaderLib == nullptr))) + bool hasFragShader = false; + for (uint32_t i = 0; i < pCreateInfo->stageCount; ++i) { - bool hasFragShader = false; - for (uint32_t i = 0; i < pCreateInfo->stageCount; ++i) + if (ShaderFlagBitToStage(pCreateInfo->pStages[i].stage) == ShaderStageFragment) { - if (ShaderFlagBitToStage(pCreateInfo->pStages[i].stage) == ShaderStageFragment) - { - hasFragShader = true; - break; - } + hasFragShader = true; + break; } + } - if (hasFragShader == false) - { - libInfo.pFragmentShaderLib = pDevice->GetNullFragmentLib(); - } + if (hasFragShader == false) + { + libInfo.pFragmentShaderLib = pDevice->GetNullFragmentLib(); } + } + + if (IsGplFastLinkPossible(pDevice, libInfo, pPipelineLayout)) + { + result = pDevice->GetCompiler(DefaultDeviceIndex)->BuildGplFastLinkCreateInfo( + pDevice, pCreateInfo, extStructs, flags, libInfo, pPipelineLayout, &binaryMetadata, &binaryCreateInfo); - if (IsGplFastLinkPossible(pDevice, libInfo, pPipelineLayout)) + if (result == VK_SUCCESS) { - result = pDevice->GetCompiler(DefaultDeviceIndex)->BuildGplFastLinkCreateInfo( - pDevice, pCreateInfo, extStructs, flags, libInfo, pPipelineLayout, &binaryMetadata, &binaryCreateInfo); + const GraphicsPipelineBinaryCreateInfo& preRasterCreateInfo = + libInfo.pPreRasterizationShaderLib->GetPipelineBinaryCreateInfo(); + const GraphicsPipelineBinaryCreateInfo& fragmentCreateInfo = + libInfo.pFragmentShaderLib->GetPipelineBinaryCreateInfo(); - if (result == VK_SUCCESS) + shaderLibraries[numShaderLibraries++] = preRasterCreateInfo.pShaderLibraries[GraphicsLibraryPreRaster]; + shaderLibraries[numShaderLibraries++] = fragmentCreateInfo.pShaderLibraries[GraphicsLibraryFragment]; + + gplCacheId[GraphicsLibraryPreRaster] = + preRasterCreateInfo.earlyElfPackageHash[GraphicsLibraryPreRaster]; + gplCacheId[GraphicsLibraryFragment] = + fragmentCreateInfo.earlyElfPackageHash[GraphicsLibraryFragment]; + + if (binaryCreateInfo.pipelineInfo.enableColorExportShader) { - const GraphicsPipelineBinaryCreateInfo& preRasterCreateInfo = - libInfo.pPreRasterizationShaderLib->GetPipelineBinaryCreateInfo(); - const GraphicsPipelineBinaryCreateInfo& fragmentCreateInfo = - libInfo.pFragmentShaderLib->GetPipelineBinaryCreateInfo(); - - shaderLibraries[numShaderLibraries++] = preRasterCreateInfo.pShaderLibraries[GraphicsLibraryPreRaster]; - shaderLibraries[numShaderLibraries++] = fragmentCreateInfo.pShaderLibraries[GraphicsLibraryFragment]; - if (binaryCreateInfo.pipelineInfo.enableColorExportShader) + uint64_t colorExportTicks = Util::GetPerfCpuTime(); + Pal::IShaderLibrary* pColorExportLib = nullptr; + result = pDevice->GetCompiler(DefaultDeviceIndex)->CreateColorExportShaderLibrary(pDevice, + &binaryCreateInfo, + pAllocator, + &pColorExportLib); + if (result == VK_SUCCESS) { - uint64_t colorExportTicks = Util::GetPerfCpuTime(); - Pal::IShaderLibrary* pColorExportLib = nullptr; - result = pDevice->GetCompiler(DefaultDeviceIndex)->CreateColorExportShaderLibrary(pDevice, - &binaryCreateInfo, - pAllocator, - &pColorExportLib); - if (result == VK_SUCCESS) - { - shaderLibraries[numShaderLibraries++] = pColorExportLib; - binaryCreateInfo.pShaderLibraries[GraphicsLibraryColorExport] = pColorExportLib; - } - uint64_t durationTicks = Util::GetPerfCpuTime() - colorExportTicks; - colorExportDuration = vk::utils::TicksToNano(durationTicks); + shaderLibraries[numShaderLibraries++] = pColorExportLib; + binaryCreateInfo.pShaderLibraries[GraphicsLibraryColorExport] = pColorExportLib; } + uint64_t durationTicks = Util::GetPerfCpuTime() - colorExportTicks; + colorExportDuration = vk::utils::TicksToNano(durationTicks); } - else if (result == VK_ERROR_PIPELINE_COMPILE_REQUIRED_EXT) - { - flags |= VK_PIPELINE_CREATE_2_LINK_TIME_OPTIMIZATION_BIT_EXT; - } + } + else if (result == VK_ERROR_PIPELINE_COMPILE_REQUIRED_EXT) + { + flags |= VK_PIPELINE_CREATE_2_LINK_TIME_OPTIMIZATION_BIT_EXT; + } - if (result == VK_SUCCESS) + if (result == VK_SUCCESS) + { + objectCreateInfo.pipeline.ppShaderLibraries = shaderLibraries; + objectCreateInfo.pipeline.numShaderLibraries = numShaderLibraries; + if ((pDevice->VkInstance()->GetDevModeMgr() != nullptr) || + pDevice->GetRuntimeSettings().enablePipelineDump || + pDevice->GetRuntimeSettings().logTagIdMask) { - objectCreateInfo.pipeline.ppShaderLibraries = shaderLibraries; - objectCreateInfo.pipeline.numShaderLibraries = numShaderLibraries; - if ((pDevice->VkInstance()->GetDevModeMgr() != nullptr) || - pDevice->GetRuntimeSettings().enablePipelineDump || - pDevice->GetRuntimeSettings().logTagIdMask) - { - BuildApiHash(pCreateInfo, - flags, - extStructs, - libInfo, - binaryCreateInfo, - &apiPsoHash, - &elfHash); - binaryCreateInfo.apiPsoHash = apiPsoHash; - } - enableFastLink = true; + BuildApiHash(pCreateInfo, + flags, + extStructs, + libInfo, + binaryCreateInfo, + &apiPsoHash, + &elfHash); + binaryCreateInfo.apiPsoHash = apiPsoHash; } + enableFastLink = true; } } - if (enableFastLink == false) + if ((enableFastLink == false) && (binariesProvided == false)) { // 2. Create Cache IDs result = GraphicsPipeline::CreateCacheId( @@ -875,7 +900,7 @@ VkResult GraphicsPipeline::Create( if (result == VK_SUCCESS) { - // 4. Build pipeline object create info + // 5. Build pipeline object create info BuildPipelineObjectCreateInfo( pDevice, pCreateInfo, @@ -889,10 +914,6 @@ VkResult GraphicsPipeline::Create( if (result == VK_SUCCESS) { - objectCreateInfo.immedInfo.checkDeferCompilePipeline = - pDevice->GetRuntimeSettings().deferCompileOptimizedPipeline && - (binaryMetadata.enableEarlyCompile || binaryMetadata.enableUberFetchShader); - #if VKI_RAY_TRACING objectCreateInfo.flags.hasRayTracing = binaryMetadata.rayQueryUsed; #endif @@ -905,7 +926,7 @@ VkResult GraphicsPipeline::Create( objectCreateInfo.dispatchRaysUserDataOffset = pPipelineLayout->GetDispatchRaysUserData(); #endif - // 5. Create pipeline objects + // 6. Create pipeline objects result = CreatePipelineObjects( pDevice, pCreateInfo, @@ -921,6 +942,7 @@ VkResult GraphicsPipeline::Create( apiPsoHash, &objectCreateInfo, pPipeline); + } } @@ -930,7 +952,7 @@ VkResult GraphicsPipeline::Create( // Free the created pipeline binaries now that the PAL Pipelines/PipelineBinaryInfo have read them. for (uint32_t deviceIdx = 0; deviceIdx < pDevice->NumPalDevices(); deviceIdx++) { - if (pipelineBinaries[deviceIdx].pCode != nullptr) + if ((binariesProvided == false) && (pipelineBinaries[deviceIdx].pCode != nullptr)) { pDevice->GetCompiler(deviceIdx)->FreeGraphicsPipelineBinary( binaryCreateInfo.compilerType, @@ -939,22 +961,6 @@ VkResult GraphicsPipeline::Create( } } - if (objectCreateInfo.immedInfo.checkDeferCompilePipeline) - { - GraphicsPipeline* pThis = GraphicsPipeline::ObjectFromHandle(*pPipeline); - result = pThis->BuildDeferCompileWorkload(pDevice, - pPipelineCache, - &binaryCreateInfo, - &shaderStageInfo, - &objectCreateInfo, - extStructs, - &cacheId[0]); - if (result == VK_SUCCESS) - { - pDefaultCompiler->ExecuteDeferCompile(&pThis->m_deferWorkload); - } - } - if (result == VK_SUCCESS) { const Device::DeviceFeatures& deviceFeatures = pDevice->GetEnabledFeatures(); @@ -1015,7 +1021,6 @@ VkResult GraphicsPipeline::Create( // i.e. we need keep temp buffer in binaryCreateInfo pDefaultCompiler->FreeGraphicsPipelineCreateInfo(pDevice, &binaryCreateInfo, - objectCreateInfo.immedInfo.checkDeferCompilePipeline, true); return result; @@ -1058,7 +1063,7 @@ static size_t GetVertexInputStructSize( // ===================================================================================================================== // Create cacheId for a graphics pipeline. VkResult GraphicsPipeline::CreateCacheId( - Device* pDevice, + const Device* pDevice, const VkGraphicsPipelineCreateInfo* pCreateInfo, const GraphicsPipelineExtStructs& extStructs, const GraphicsPipelineLibraryInfo& libInfo, @@ -1178,267 +1183,6 @@ static void CopyVertexInputStruct( } } -// ===================================================================================================================== -VkResult GraphicsPipeline::BuildDeferCompileWorkload( - Device* pDevice, - PipelineCache* pPipelineCache, - GraphicsPipelineBinaryCreateInfo* pBinaryCreateInfo, - GraphicsPipelineShaderStageInfo* pShaderStageInfo, - GraphicsPipelineObjectCreateInfo* pObjectCreateInfo, - const GraphicsPipelineExtStructs& extStructs, - Util::MetroHash::Hash* pCacheIds) -{ - VkResult result = VK_SUCCESS; - DeferGraphicsPipelineCreateInfo* pCreateInfo = nullptr; - - // Calculate payload size - size_t payloadSize = sizeof(DeferGraphicsPipelineCreateInfo) + sizeof(Util::Event); - for (uint32_t i = 0; i < ShaderStage::ShaderStageGfxCount; i++) - { - if (pShaderStageInfo->stages[i].pEntryPoint != nullptr) - { - payloadSize += strlen(pShaderStageInfo->stages[i].pEntryPoint) + 1; - if (pShaderStageInfo->stages[i].pSpecializationInfo != nullptr) - { - auto pSpecializationInfo = pShaderStageInfo->stages[i].pSpecializationInfo; - payloadSize += sizeof(VkSpecializationInfo); - payloadSize += sizeof(VkSpecializationMapEntry) * pSpecializationInfo->mapEntryCount; - payloadSize += pSpecializationInfo->dataSize; - } - } - } - - size_t vertexInputSize = 0; - if ((pShaderStageInfo->stages[ShaderStage::ShaderStageVertex].pEntryPoint != nullptr) && - (pBinaryCreateInfo->pipelineInfo.pVertexInput != nullptr)) - { - vertexInputSize = GetVertexInputStructSize(pBinaryCreateInfo->pipelineInfo.pVertexInput); - payloadSize += vertexInputSize; - } - - size_t memOffset = 0; - Instance* pInstance = pDevice->VkInstance(); - void* pPayloadMem = pInstance->AllocMem(payloadSize, VK_DEFAULT_MEM_ALIGN, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - - if (pPayloadMem == nullptr) - { - result = VK_ERROR_OUT_OF_HOST_MEMORY; - } - - if (result == VK_SUCCESS) - { - memset(pPayloadMem, 0, payloadSize); - pCreateInfo = static_cast(pPayloadMem); - memOffset = sizeof(DeferGraphicsPipelineCreateInfo); - - // Fill create info and reset defer compile related options - pCreateInfo->pDevice = pDevice; - pCreateInfo->pPipelineCache = pPipelineCache; - pCreateInfo->pPipeline = this; - pCreateInfo->shaderStageInfo = *pShaderStageInfo; - pCreateInfo->binaryCreateInfo = *pBinaryCreateInfo; - pCreateInfo->objectCreateInfo = *pObjectCreateInfo; - - pCreateInfo->extStructs = extStructs; - memcpy(&pCreateInfo->cacheIds[0], pCacheIds, sizeof(pCreateInfo->cacheIds[0]) * MaxPalDevices); - - pCreateInfo->binaryCreateInfo.pipelineInfo.enableEarlyCompile = false; - pCreateInfo->binaryCreateInfo.pipelineInfo.enableUberFetchShader = false; - pCreateInfo->objectCreateInfo.immedInfo.checkDeferCompilePipeline = false; - - PipelineShaderInfo* pShaderInfo[] = - { - &pCreateInfo->binaryCreateInfo.pipelineInfo.task, - &pCreateInfo->binaryCreateInfo.pipelineInfo.vs, - &pCreateInfo->binaryCreateInfo.pipelineInfo.tcs, - &pCreateInfo->binaryCreateInfo.pipelineInfo.tes, - &pCreateInfo->binaryCreateInfo.pipelineInfo.gs, - &pCreateInfo->binaryCreateInfo.pipelineInfo.mesh, - &pCreateInfo->binaryCreateInfo.pipelineInfo.fs, - }; - - // Do deep copy for binaryCreateInfo members - for (uint32_t i = 0; i < ShaderStage::ShaderStageGfxCount; i++) - { - if (pShaderStageInfo->stages[i].pEntryPoint != nullptr) - { - size_t size = strlen(pShaderStageInfo->stages[i].pEntryPoint) + 1; - char* pEntryPoint = reinterpret_cast(Util::VoidPtrInc(pPayloadMem, memOffset)); - memcpy(pEntryPoint, pShaderStageInfo->stages[i].pEntryPoint, size); - pCreateInfo->shaderStageInfo.stages[i].pEntryPoint = pEntryPoint; - pShaderInfo[i]->pEntryTarget = pEntryPoint; - memOffset += size; - - if (pShaderStageInfo->stages[i].pSpecializationInfo != nullptr) - { - auto pSrcSpecInfo = pShaderStageInfo->stages[i].pSpecializationInfo; - auto pDestSpecInfo = reinterpret_cast(Util::VoidPtrInc(pPayloadMem, memOffset)); - *pDestSpecInfo = *pSrcSpecInfo; - memOffset += sizeof(VkSpecializationInfo); - - pDestSpecInfo->pMapEntries = reinterpret_cast(Util::VoidPtrInc(pPayloadMem, memOffset)); - memcpy(const_cast(pDestSpecInfo->pMapEntries), - pSrcSpecInfo->pMapEntries, - pSrcSpecInfo->mapEntryCount * sizeof(VkSpecializationMapEntry)); - memOffset += pSrcSpecInfo->mapEntryCount * sizeof(VkSpecializationMapEntry); - - pDestSpecInfo->pData = Util::VoidPtrInc(pPayloadMem, memOffset); - memcpy(const_cast(pDestSpecInfo->pData), - pSrcSpecInfo->pData, - pSrcSpecInfo->dataSize); - memOffset += pSrcSpecInfo->dataSize; - pCreateInfo->shaderStageInfo.stages[i].pSpecializationInfo = pDestSpecInfo; - pShaderInfo[i]->pSpecializationInfo = pDestSpecInfo; - } - } - } - - if (vertexInputSize != 0) - { - VkPipelineVertexInputStateCreateInfo* pVertexInput = - reinterpret_cast(Util::VoidPtrInc(pPayloadMem, memOffset)); - pCreateInfo->binaryCreateInfo.pipelineInfo.pVertexInput = pVertexInput; - CopyVertexInputStruct(pBinaryCreateInfo->pipelineInfo.pVertexInput, pVertexInput); - memOffset += vertexInputSize; - } - - // Copy pipeline optimizer key - memcpy( - pCreateInfo->shaderOptimizerKeys, - pBinaryCreateInfo->pPipelineProfileKey->pShaders, - sizeof(ShaderOptimizerKey)* pBinaryCreateInfo->pPipelineProfileKey->shaderCount); - pCreateInfo->pipelineOptimizerKey.pShaders = pCreateInfo->shaderOptimizerKeys; - pCreateInfo->pipelineOptimizerKey.shaderCount = pBinaryCreateInfo->pPipelineProfileKey->shaderCount; - pCreateInfo->binaryCreateInfo.pPipelineProfileKey = &pCreateInfo->pipelineOptimizerKey; - - // Copy binary metadata - pCreateInfo->binaryMetadata = *pBinaryCreateInfo->pBinaryMetadata; - pCreateInfo->binaryCreateInfo.pBinaryMetadata = &pCreateInfo->binaryMetadata; - - // Build defer workload - m_deferWorkload.pPayloads = pPayloadMem; - m_deferWorkload.pEvent = VK_PLACEMENT_NEW(Util::VoidPtrInc(pPayloadMem, memOffset))(Util::Event); - memOffset += sizeof(Util::Event); - VK_ASSERT(memOffset == payloadSize); - - EventCreateFlags flags = {}; - flags.manualReset = true; - m_deferWorkload.pEvent->Init(flags); - m_deferWorkload.Execute = ExecuteDeferCreateOptimizedPipeline; - } - - return result; -} - -// ===================================================================================================================== -void GraphicsPipeline::ExecuteDeferCreateOptimizedPipeline( - void *pPayload) -{ - DeferGraphicsPipelineCreateInfo* pCreateInfo = static_cast(pPayload); - pCreateInfo->pPipeline->DeferCreateOptimizedPipeline(pCreateInfo->pDevice, - pCreateInfo->pPipelineCache, - &pCreateInfo->binaryCreateInfo, - &pCreateInfo->shaderStageInfo, - &pCreateInfo->objectCreateInfo, - pCreateInfo->extStructs, - pCreateInfo->cacheIds); -} - -// ===================================================================================================================== -VkResult GraphicsPipeline::DeferCreateOptimizedPipeline( - Device* pDevice, - PipelineCache* pPipelineCache, - GraphicsPipelineBinaryCreateInfo* pBinaryCreateInfo, - GraphicsPipelineShaderStageInfo* pShaderStageInfo, - GraphicsPipelineObjectCreateInfo* pObjectCreateInfo, - const GraphicsPipelineExtStructs& extStructs, - Util::MetroHash::Hash* pCacheIds) -{ - VkResult result = VK_SUCCESS; - Vkgc::BinaryData pipelineBinaries[MaxPalDevices] = {}; - Pal::IPipeline* pPalPipeline[MaxPalDevices] = {}; - - Pal::Result palResult = Pal::Result::Success; - size_t palSize = - pDevice->PalDevice(DefaultDeviceIndex)->GetGraphicsPipelineSize(pObjectCreateInfo->pipeline, &palResult); - VK_ASSERT(palResult == Pal::Result::Success); - - uint32_t numPalDevices = pDevice->NumPalDevices(); - void* pSystemMem = pDevice->VkInstance()->AllocMem( - palSize, VK_DEFAULT_MEM_ALIGN, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - if (pSystemMem == nullptr) - { - result = VK_ERROR_OUT_OF_HOST_MEMORY; - } - - if (result == VK_SUCCESS) - { - GraphicsPipelineLibraryInfo libInfo = {}; - ExtractLibraryInfo(nullptr, nullptr, extStructs, 0, &libInfo); - result = CreatePipelineBinaries(pDevice, - nullptr, - extStructs, - libInfo, - 0, - pShaderStageInfo, - nullptr, - pBinaryCreateInfo->pPipelineProfileKey, - pBinaryCreateInfo, - pPipelineCache, - nullptr, - pCacheIds, - pipelineBinaries, - pBinaryCreateInfo->pBinaryMetadata); - } - - if (result == VK_SUCCESS) - { - result = CreatePalPipelineObjects(pDevice, - pPipelineCache, - pObjectCreateInfo, - pipelineBinaries, - pCacheIds, - pSystemMem, - pPalPipeline); - } - - if (result == VK_SUCCESS) - { - VK_ASSERT(pSystemMem == pPalPipeline[0]); - SetOptimizedPipeline(pPalPipeline); - } - - pDevice->GetCompiler(DefaultDeviceIndex)->FreeGraphicsPipelineCreateInfo(pDevice, pBinaryCreateInfo, false, true); - - for (uint32_t deviceIdx = 0; deviceIdx < pDevice->NumPalDevices(); deviceIdx++) - { - if (pipelineBinaries[deviceIdx].pCode != nullptr) - { - pDevice->GetCompiler(deviceIdx)->FreeGraphicsPipelineBinary( - pBinaryCreateInfo->compilerType, - pBinaryCreateInfo->freeCompilerBinary, - pipelineBinaries[deviceIdx]); - } - } - return result; -} - -// ===================================================================================================================== -void GraphicsPipeline::SetOptimizedPipeline( - Pal::IPipeline* pPalPipeline[MaxPalDevices]) -{ - const bool optimizedPipeline = true; - Util::MetroHash::Hash hash = {}; - Util::MetroHash64 palPipelineHasher; - palPipelineHasher.Update(PalPipelineHash()); - palPipelineHasher.Update(optimizedPipeline); - palPipelineHasher.Finalize(hash.bytes); - - Util::MutexAuto pipelineSwitchLock(&m_pipelineSwitchLock); - memcpy(m_pOptimizedPipeline, pPalPipeline, sizeof(m_pOptimizedPipeline)); - m_optimizedPipelineHash = hash.qwords[0]; -} - // ===================================================================================================================== GraphicsPipeline::GraphicsPipeline( Device* const pDevice, @@ -1472,9 +1216,6 @@ GraphicsPipeline::GraphicsPipeline( m_pInternalMem(pInternalMem), m_vbInfo(vbInfo), m_internalBufferInfo(*pInternalBuffer), - m_pOptimizedPipeline{}, - m_optimizedPipelineHash(0), - m_deferWorkload{}, m_flags(flags) { Pipeline::Init( @@ -1689,33 +1430,8 @@ VkResult GraphicsPipeline::Destroy( Device* pDevice, const VkAllocationCallbacks* pAllocator) { - if (m_deferWorkload.pEvent != nullptr) - { - auto result = m_deferWorkload.pEvent->Wait(10s); - if (result == Util::Result::Success) - { - Util::Destructor(m_deferWorkload.pEvent); - pDevice->VkInstance()->FreeMem(m_deferWorkload.pPayloads); - } - m_deferWorkload.pEvent = nullptr; - m_deferWorkload.pPayloads = nullptr; - } - DestroyStaticState(pAllocator); - if (m_pOptimizedPipeline[0] != nullptr) - { - void* pBaseMem = m_pOptimizedPipeline[0]; - for (uint32_t deviceIdx = 0; - (deviceIdx < m_pDevice->NumPalDevices()) && (m_pPalPipeline[deviceIdx] != nullptr); - deviceIdx++) - { - m_pOptimizedPipeline[deviceIdx]->Destroy(); - m_pOptimizedPipeline[deviceIdx] = nullptr; - } - pDevice->VkInstance()->FreeMem(pBaseMem); - } - if (m_pInternalMem != nullptr) { pDevice->MemMgr()->FreeGpuMem(m_pInternalMem); @@ -2007,6 +1723,8 @@ void GraphicsPipeline::BindToCmdBuffer( pRenderState->msaaCreateInfo.flags.enable1xMsaaSampleLocations = m_info.msaaCreateInfo.flags.enable1xMsaaSampleLocations; } + pRenderState->msaaCreateInfo.flags.forceSampleRateShading = + m_info.msaaCreateInfo.flags.forceSampleRateShading; } } else @@ -2132,13 +1850,12 @@ void GraphicsPipeline::BindToCmdBuffer( pRenderState->inputAssemblyState = m_info.inputAssemblyState; } - const bool useOptimizedPipeline = UseOptimizedPipeline(); const uint64_t oldHash = pRenderState->boundGraphicsPipelineHash; - const uint64_t newHash = useOptimizedPipeline ? m_optimizedPipelineHash : PalPipelineHash(); + const uint64_t newHash = PalPipelineHash(); const bool dynamicStateDirty = pGfxDynamicBindInfo->enable.u32All != dynamicGraphicsState.enable.u32All; - // Update pipleine dynamic state + // Update pipeline dynamic state pGfxDynamicBindInfo->enable.u32All = dynamicGraphicsState.enable.u32All; if (ContainsStaticState(DynamicStatesInternal::ColorWriteMask) ^ @@ -2337,7 +2054,7 @@ void GraphicsPipeline::BindToCmdBuffer( } } - if ((useOptimizedPipeline == false) && (m_internalBufferInfo.internalBufferCount > 0)) + if (m_internalBufferInfo.internalBufferCount > 0) { for (uint32_t i = 0; i < m_internalBufferInfo.internalBufferCount; i++) { @@ -2371,24 +2088,4 @@ void GraphicsPipeline::BindToCmdBuffer( } } -// ===================================================================================================================== -// Binds a null pipeline to PAL -void GraphicsPipeline::BindNullPipeline(CmdBuffer* pCmdBuffer) -{ - const uint32_t numDevices = pCmdBuffer->VkDevice()->NumPalDevices(); - - Pal::PipelineBindParams params = {}; - params.pipelineBindPoint = Pal::PipelineBindPoint::Graphics; - params.apiPsoHash = Pal::InternalApiPsoHash; - - for (uint32_t deviceIdx = 0; deviceIdx < numDevices; deviceIdx++) - { - Pal::ICmdBuffer* pPalCmdBuf = pCmdBuffer->PalCmdBuffer(deviceIdx); - - pPalCmdBuf->CmdBindPipeline(params); - pPalCmdBuf->CmdBindMsaaState(nullptr); - pPalCmdBuf->CmdBindColorBlendState(nullptr); - pPalCmdBuf->CmdBindDepthStencilState(nullptr); - } -} } // namespace vk diff --git a/icd/api/vk_graphics_pipeline_library.cpp b/icd/api/vk_graphics_pipeline_library.cpp index a166963f..6a9d4700 100644 --- a/icd/api/vk_graphics_pipeline_library.cpp +++ b/icd/api/vk_graphics_pipeline_library.cpp @@ -311,6 +311,7 @@ VkResult GraphicsPipelineLibrary::CreatePartialPipelineBinary( const Device* pDevice, PipelineCache* pPipelineCache, const VkGraphicsPipelineCreateInfo* pCreateInfo, + const GraphicsPipelineExtStructs& extStructs, const GraphicsPipelineLibraryInfo* pLibInfo, const GraphicsPipelineShaderStageInfo* pShaderStageInfo, GraphicsPipelineBinaryCreateInfo* pBinaryCreateInfo, @@ -359,7 +360,11 @@ VkResult GraphicsPipelineLibrary::CreatePartialPipelineBinary( if (canBuildShader) { result = pCompiler->CreateGraphicsShaderBinary( - pDevice, pPipelineCache, gplType, pBinaryCreateInfo, &pTempModuleStages[i]); + pDevice, + pPipelineCache, + gplType, + pBinaryCreateInfo, + &pTempModuleStages[i]); gplMask |= (1 << gplType); } @@ -384,7 +389,11 @@ VkResult GraphicsPipelineLibrary::CreatePartialPipelineBinary( VK_ASSERT(pTempModuleStages[TempIdx].elfPackage.codeSize == 0); result = pCompiler->CreateGraphicsShaderBinary( - pDevice, pPipelineCache, GraphicsLibraryPreRaster, pBinaryCreateInfo, &pTempModuleStages[TempIdx]); + pDevice, + pPipelineCache, + GraphicsLibraryPreRaster, + pBinaryCreateInfo, + &pTempModuleStages[TempIdx]); } if ((pLibInfo->libFlags & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_OUTPUT_INTERFACE_BIT_EXT) && @@ -395,51 +404,45 @@ VkResult GraphicsPipelineLibrary::CreatePartialPipelineBinary( VK_ASSERT(pTempModuleStages[TempIdx].elfPackage.codeSize == 0); - result = pCompiler->CreateGraphicsShaderBinary(pDevice, pPipelineCache, - GraphicsLibraryFragment, pBinaryCreateInfo, &pTempModuleStages[TempIdx]); + result = pCompiler->CreateGraphicsShaderBinary( + pDevice, + pPipelineCache, + GraphicsLibraryFragment, + pBinaryCreateInfo, + &pTempModuleStages[TempIdx]); } } // Create shader libraries for fast-link - if (pDevice->GetRuntimeSettings().useShaderLibraryForPipelineLibraryFastLink) + for (uint32_t stage = 0; (result == VK_SUCCESS) && (stage < ShaderStage::ShaderStageGfxCount); ++stage) { - for (uint32_t stage = 0; (result == VK_SUCCESS) && (stage < ShaderStage::ShaderStageGfxCount); ++stage) + GraphicsLibraryType gplType = GetGraphicsLibraryType(static_cast(stage)); + if ((pBinaryCreateInfo->earlyElfPackage[gplType].pCode != nullptr) && + (pBinaryCreateInfo->pShaderLibraries[gplType] == nullptr)) { - GraphicsLibraryType gplType = GetGraphicsLibraryType(static_cast(stage)); - if ((pBinaryCreateInfo->earlyElfPackage[gplType].pCode != nullptr) && - (pBinaryCreateInfo->pShaderLibraries[gplType] == nullptr)) - { - Vkgc::BinaryData palElfBinary = {}; + Vkgc::BinaryData palElfBinary = {}; - palElfBinary = pCompiler->GetSolution(pBinaryCreateInfo->compilerType)-> - ExtractPalElfBinary(pBinaryCreateInfo->earlyElfPackage[gplType]); - if (palElfBinary.codeSize > 0) - { - result = pCompiler->CreateGraphicsShaderLibrary(pDevice, - palElfBinary, - pAllocator, - &pBinaryCreateInfo->pShaderLibraries[gplType]); - pBinaryCreateInfo->earlyElfPackage[gplType].pCode = nullptr; - - if (pTempModuleStages[stage].elfPackage.codeSize > 0) - { - pDevice->VkInstance()->FreeMem(const_cast(pTempModuleStages[stage].elfPackage.pCode)); - pTempModuleStages[stage].elfPackage = {}; - } - } + palElfBinary = pCompiler->GetSolution(pBinaryCreateInfo->compilerType)-> + ExtractPalElfBinary(pBinaryCreateInfo->earlyElfPackage[gplType]); + if (palElfBinary.codeSize > 0) + { + result = pCompiler->CreateGraphicsShaderLibrary(pDevice, + palElfBinary, + pAllocator, + &pBinaryCreateInfo->pShaderLibraries[gplType]); } } + } - // If there is no fragment shader when create fragment library, we use a null pal graphics library. - if ((pLibInfo->libFlags & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) && - (pBinaryCreateInfo->pipelineInfo.fs.pModuleData == nullptr) && - (pShaderStageInfo->stages[ShaderStageFragment].codeHash.lower == 0) && - (pShaderStageInfo->stages[ShaderStageFragment].codeHash.upper == 0)) - { - const auto& fragmentCreateInfo = pDevice->GetNullFragmentLib()->GetPipelineBinaryCreateInfo(); - pBinaryCreateInfo->pShaderLibraries[GraphicsLibraryFragment] = - fragmentCreateInfo.pShaderLibraries[GraphicsLibraryFragment]; - } + // If there is no fragment shader when create fragment library, we use a null pal graphics library. + if ((pLibInfo->libFlags & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) && + (pBinaryCreateInfo->pipelineInfo.fs.pModuleData == nullptr) && + (pShaderStageInfo->stages[ShaderStageFragment].codeHash.lower == 0) && + (pShaderStageInfo->stages[ShaderStageFragment].codeHash.upper == 0)) + { + const auto& fragmentCreateInfo = pDevice->GetNullFragmentLib()->GetPipelineBinaryCreateInfo(); + pBinaryCreateInfo->pShaderLibraries[GraphicsLibraryFragment] = + fragmentCreateInfo.pShaderLibraries[GraphicsLibraryFragment]; } return result; @@ -476,22 +479,35 @@ VkResult GraphicsPipelineLibrary::Create( binaryCreateInfo.compilerType = PipelineCompilerTypeLlpc; } - // 1. Build shader stage infos + ShaderModuleHandle tempModules[ShaderStage::ShaderStageGfxCount] = {}; + PipelineOptimizerKey pipelineOptimizerKey = {}; + ShaderOptimizerKey shaderOptimizerKeys[ShaderStage::ShaderStageGfxCount] = {}; + uint64_t apiPsoHash = {}; + Util::MetroHash::Hash elfHash = {}; + + static_assert(VK_ARRAY_SIZE(shaderOptimizerKeys) == VK_ARRAY_SIZE(shaderStageInfo.stages), + "Please ensure stage count matches between gfx profile key and shader stage info."); + + // 1. Create Api PSO Hash and Elf Hash + result = CreateApiPsoHashAndElfHash( + pDevice, + pCreateInfo, + extStructs, + libInfo, + flags, + &shaderStageInfo, + &binaryCreateInfo, + shaderOptimizerKeys, + &pipelineOptimizerKey, + &apiPsoHash, + tempModules, + &elfHash); + + // 2. Initialize tempModuleStates if (result == VK_SUCCESS) { - ShaderModuleHandle tempModules[ShaderStage::ShaderStageGfxCount] = {}; - result = BuildShaderStageInfo(pDevice, - pCreateInfo->stageCount, - pCreateInfo->pStages, - [](const uint32_t inputIdx, const uint32_t stageIdx) - { - return stageIdx; - }, - shaderStageInfo.stages, - tempModules, - binaryCreateInfo.stageFeedback); - - // Initialize tempModuleStates + binaryCreateInfo.apiPsoHash = apiPsoHash; + for (uint32_t stage = 0; stage < ShaderStage::ShaderStageGfxCount; stage++) { if (shaderStageInfo.stages[stage].pModuleHandle != nullptr) @@ -510,39 +526,7 @@ VkResult GraphicsPipelineLibrary::Create( } } - // 2. Build ShaderOptimizer pipeline key - PipelineOptimizerKey pipelineOptimizerKey = {}; - ShaderOptimizerKey shaderOptimizerKeys[ShaderStage::ShaderStageGfxCount] = {}; - if (result == VK_SUCCESS) - { - static_assert( - VK_ARRAY_SIZE(shaderOptimizerKeys) == VK_ARRAY_SIZE(shaderStageInfo.stages), - "Please ensure stage count matches between gfx profile key and shader stage info."); - - GeneratePipelineOptimizerKey( - pDevice, - pCreateInfo, - extStructs, - libInfo, - flags, - &shaderStageInfo, - shaderOptimizerKeys, - &pipelineOptimizerKey); - } - - // 3. Build API and ELF hashes - uint64_t apiPsoHash = {}; - Util::MetroHash::Hash elfHash = {}; - BuildApiHash(pCreateInfo, - flags, - extStructs, - libInfo, - binaryCreateInfo, - &apiPsoHash, - &elfHash); - binaryCreateInfo.apiPsoHash = apiPsoHash; - - // 4. Get pipeline layout + // 3. Get pipeline layout const PipelineLayout* pPipelineLayout = PipelineLayout::ObjectFromHandle(pCreateInfo->layout); if (pPipelineLayout == nullptr) @@ -550,7 +534,7 @@ VkResult GraphicsPipelineLibrary::Create( pPipelineLayout = pDevice->GetNullPipelineLayout(); } - // 5. Populate binary create info + // 4. Populate binary create info PipelineMetadata binaryMetadata = {}; if (result == VK_SUCCESS) { @@ -567,23 +551,40 @@ VkResult GraphicsPipelineLibrary::Create( &binaryCreateInfo); } - GraphicsPipelineObjectCreateInfo objectCreateInfo = {}; if (result == VK_SUCCESS) { - // 6. Create partial pipeline binary for fast-link + // 5. Create partial pipeline binary for fast-link result = CreatePartialPipelineBinary( pDevice, pPipelineCache, pCreateInfo, + extStructs, &libInfo, &shaderStageInfo, &binaryCreateInfo, pAllocator, tempModuleStates); + + // Clean up temporary storage + for (uint32_t stage = 0; stage < ShaderStage::ShaderStageGfxCount; ++stage) + { + GraphicsLibraryType gplType = GetGraphicsLibraryType(static_cast(stage)); + + binaryCreateInfo.earlyElfPackage[gplType].pCode = nullptr; + + if (tempModuleStates[stage].elfPackage.codeSize > 0) + { + pDevice->VkInstance()->FreeMem(const_cast(tempModuleStates[stage].elfPackage.pCode)); + tempModuleStates[stage].elfPackage = {}; + } + } } // Cleanup temp memory in binaryCreateInfo. - pDevice->GetCompiler(DefaultDeviceIndex)->FreeGraphicsPipelineCreateInfo(pDevice, &binaryCreateInfo, false, true); + pDevice->GetCompiler(DefaultDeviceIndex)->FreeGraphicsPipelineCreateInfo(pDevice, &binaryCreateInfo, true); + + GraphicsPipelineObjectCreateInfo objectCreateInfo = {}; + size_t auxiliarySize = 0; if (result == VK_SUCCESS) { @@ -600,11 +601,11 @@ VkResult GraphicsPipelineLibrary::Create( &binaryCreateInfo); // Calculate object size - apiSize = sizeof(GraphicsPipelineLibrary); - size_t auxiliarySize = 0; + apiSize = sizeof(GraphicsPipelineLibrary); + auxiliarySize = 0; DumpGraphicsPipelineBinaryCreateInfo(&binaryCreateInfo, nullptr, &auxiliarySize); - const size_t objSize = apiSize + auxiliarySize; + size_t objSize = apiSize + auxiliarySize; // Allocate memory pSysMem = pDevice->AllocApiObject(pAllocator, objSize); @@ -667,6 +668,58 @@ VkResult GraphicsPipelineLibrary::Create( return result; } +// ===================================================================================================================== +VkResult GraphicsPipelineLibrary::CreateApiPsoHashAndElfHash( + const Device* pDevice, + const VkGraphicsPipelineCreateInfo* pCreateInfo, + const GraphicsPipelineExtStructs& extStructs, + const GraphicsPipelineLibraryInfo& libInfo, + VkPipelineCreateFlags2KHR flags, + GraphicsPipelineShaderStageInfo* pShaderStageInfo, + GraphicsPipelineBinaryCreateInfo* pBinaryCreateInfo, + ShaderOptimizerKey* pShaderOptimizerKeys, + PipelineOptimizerKey* pPipelineOptimizerKey, + uint64_t* pApiPsoHash, + ShaderModuleHandle* pTempModules, + Util::MetroHash::Hash* pElfHash) +{ + // 1. Build shader stage infos + VkResult result = BuildShaderStageInfo(pDevice, + pCreateInfo->stageCount, + pCreateInfo->pStages, + [](const uint32_t inputIdx, const uint32_t stageIdx) + { + return stageIdx; + }, + pShaderStageInfo->stages, + pTempModules, + pBinaryCreateInfo->stageFeedback); + + if (result == VK_SUCCESS) + { + // 2. Build ShaderOptimizer pipeline key + GeneratePipelineOptimizerKey(pDevice, + pCreateInfo, + extStructs, + libInfo, + flags, + pShaderStageInfo, + pShaderOptimizerKeys, + pPipelineOptimizerKey); + + // 3. Build API and ELF hashes + BuildApiHash(pCreateInfo, + flags, + extStructs, + libInfo, + *pBinaryCreateInfo, + pApiPsoHash, + pElfHash); + } + + return result; +} + // ===================================================================================================================== VkResult GraphicsPipelineLibrary::Destroy( Device* pDevice, diff --git a/icd/api/vk_image.cpp b/icd/api/vk_image.cpp index 4198c0c3..8cb71cad 100644 --- a/icd/api/vk_image.cpp +++ b/icd/api/vk_image.cpp @@ -260,8 +260,14 @@ void Image::ConvertImageCreateInfo( pPalCreateInfo->tilingOptMode = pDevice->GetTilingOptMode(); pPalCreateInfo->imageMemoryBudget = settings.imageMemoryBudget; - if ((pCreateInfo->imageType == VK_IMAGE_TYPE_3D) && - (pCreateInfo->usage & (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_STORAGE_BIT))) + const bool isSparse = (pCreateInfo->flags & SparseEnablingFlags) != 0; + + // Forcing non-default tiling preference for 3D PRT images would require us to expose custom block size support for + // these. But, we want to support residencyStandard3DBlockShape = true instead. + // Hence, imageTilingPreference3dGpuWritable can be used only for non-sparse images. + if ((pCreateInfo->imageType == VK_IMAGE_TYPE_3D) && + (pCreateInfo->usage & (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_STORAGE_BIT)) && + (isSparse == false)) { pPalCreateInfo->tilingPreference = settings.imageTilingPreference3dGpuWritable; } diff --git a/icd/api/vk_indirect_commands_layout.cpp b/icd/api/vk_indirect_commands_layout.cpp index a3d7c8c2..80d85610 100644 --- a/icd/api/vk_indirect_commands_layout.cpp +++ b/icd/api/vk_indirect_commands_layout.cpp @@ -37,8 +37,8 @@ namespace vk { // ===================================================================================================================== // Creates an indirect commands layout object. -VkResult IndirectCommandsLayout::Create( - const Device* pDevice, +VkResult IndirectCommandsLayoutNV::Create( + Device* pDevice, const VkIndirectCommandsLayoutCreateInfoNV* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkIndirectCommandsLayoutNV* pLayout) @@ -46,14 +46,15 @@ VkResult IndirectCommandsLayout::Create( VkResult result = VK_SUCCESS; Pal::Result palResult; + IndirectCommandsLayoutNV* pObject = nullptr; + Pal::IndirectCmdGeneratorCreateInfo createInfo = {}; Pal::IndirectParam indirectParams[MaxIndirectTokenCount] = {}; createInfo.pParams = &indirectParams[0]; - Pal::IIndirectCmdGenerator* pGenerators[MaxPalDevices] = {}; - Pal::IGpuMemory* pGpuMemory[MaxPalDevices] = {}; + Pal::IIndirectCmdGenerator* pPalGenerator[MaxPalDevices] = {}; - const size_t apiSize = ObjectSize(pDevice); + const size_t apiSize = sizeof(IndirectCommandsLayoutNV); size_t totalSize = apiSize; size_t palSize = 0; @@ -140,7 +141,7 @@ VkResult IndirectCommandsLayout::Create( { palResult = pDevice->PalDevice(deviceIdx)->CreateIndirectCmdGenerator(createInfo, pPalMemory, - &pGenerators[deviceIdx]); + &pPalGenerator[deviceIdx]); } if (palResult == Pal::Result::Success) @@ -157,51 +158,89 @@ VkResult IndirectCommandsLayout::Create( if (result == VK_SUCCESS) { - result = BindGpuMemory(pDevice, pAllocator, pGenerators, pGpuMemory); + pObject = VK_PLACEMENT_NEW(pMemory) IndirectCommandsLayoutNV( + pDevice, + info, + pPalGenerator, + createInfo); + + result = pObject->Initialize(pDevice); } if (result == VK_SUCCESS) { - VK_PLACEMENT_NEW(pMemory) IndirectCommandsLayout( - pDevice, - info, - pGenerators, - pGpuMemory, - createInfo); + *pLayout = IndirectCommandsLayoutNV::HandleFromVoidPointer(pMemory); + } + else + { + for (uint32_t deviceIdx = 0; deviceIdx < pDevice->NumPalDevices(); deviceIdx++) + { + if (pPalGenerator[deviceIdx] != nullptr) + { + pPalGenerator[deviceIdx]->Destroy(); + } + } - *pLayout = IndirectCommandsLayout::HandleFromVoidPointer(pMemory); + Util::Destructor(pObject); + + pDevice->FreeApiObject(pAllocator, pMemory); } return result; } // ===================================================================================================================== -IndirectCommandsLayout::IndirectCommandsLayout( +IndirectCommandsLayoutNV::IndirectCommandsLayoutNV( const Device* pDevice, const IndirectCommandsInfo& info, - Pal::IIndirectCmdGenerator** pGenerators, - Pal::IGpuMemory** pGpuMemory, + Pal::IIndirectCmdGenerator** ppPalGenerator, const Pal::IndirectCmdGeneratorCreateInfo& palCreateInfo) : m_info(info), - m_palCreateInfo(palCreateInfo) + m_palCreateInfo(palCreateInfo), + m_internalMem() { - for (uint32_t deviceIdx = 0; deviceIdx < pDevice->NumPalDevices(); deviceIdx++) - { - m_perGpu[deviceIdx].pGenerator = pGenerators[deviceIdx]; - m_perGpu[deviceIdx].pGpuMemory = pGpuMemory[deviceIdx]; - } + memcpy(m_pPalGenerator, ppPalGenerator, sizeof(m_pPalGenerator)); } // ===================================================================================================================== -void IndirectCommandsLayout::BuildPalCreateInfo( +VkResult IndirectCommandsLayoutNV::Initialize( + Device* pDevice) +{ + VkResult result = VK_SUCCESS; + + constexpr bool ReadOnly = false; + constexpr bool RemoveInvisibleHeap = true; + constexpr bool PersistentMapped = false; + + // Allocate and bind GPU memory for the object + result = pDevice->MemMgr()->AllocAndBindGpuMem( + pDevice->NumPalDevices(), + reinterpret_cast(&m_pPalGenerator), + ReadOnly, + &m_internalMem, + pDevice->GetPalDeviceMask(), + RemoveInvisibleHeap, + PersistentMapped, + VK_OBJECT_TYPE_INDIRECT_COMMANDS_LAYOUT_NV, + IndirectCommandsLayoutNV::IntValueFromHandle(IndirectCommandsLayoutNV::HandleFromObject(this))); + + return result; +} + +// ===================================================================================================================== +void IndirectCommandsLayoutNV::BuildPalCreateInfo( const Device* pDevice, const VkIndirectCommandsLayoutCreateInfoNV* pCreateInfo, Pal::IndirectParam* pIndirectParams, Pal::IndirectCmdGeneratorCreateInfo* pPalCreateInfo) { + uint32_t paramCount = 0; + uint32_t expectedOffset = 0; uint32_t bindingArgsSize = 0; + bool useNativeIndexType = true; + const bool isDispatch = (pCreateInfo->pTokens[pCreateInfo->tokenCount - 1].tokenType == VK_INDIRECT_COMMANDS_TOKEN_TYPE_DISPATCH_NV); @@ -209,59 +248,73 @@ void IndirectCommandsLayout::BuildPalCreateInfo( { const VkIndirectCommandsLayoutTokenNV& token = pCreateInfo->pTokens[i]; +#if PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 889 + // Set a padding operation to handle non tightly packed indirect arguments buffers + VK_ASSERT(token.offset >= expectedOffset); + if (token.offset > expectedOffset) + { + pIndirectParams[paramCount].type = Pal::IndirectParamType::Padding; + pIndirectParams[paramCount].sizeInBytes = token.offset - expectedOffset; + + bindingArgsSize += pIndirectParams[paramCount].sizeInBytes; + paramCount++; + } +#endif + switch (token.tokenType) { case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_NV: - pIndirectParams[i].type = Pal::IndirectParamType::Draw; - pIndirectParams[i].sizeInBytes = sizeof(Pal::DrawIndirectArgs); + pIndirectParams[paramCount].type = Pal::IndirectParamType::Draw; + pIndirectParams[paramCount].sizeInBytes = sizeof(Pal::DrawIndirectArgs); static_assert(sizeof(Pal::DrawIndirectArgs) == sizeof(VkDrawIndirectCommand)); break; case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_INDEXED_NV: - pIndirectParams[i].type = Pal::IndirectParamType::DrawIndexed; - pIndirectParams[i].sizeInBytes = sizeof(Pal::DrawIndexedIndirectArgs); + pIndirectParams[paramCount].type = Pal::IndirectParamType::DrawIndexed; + pIndirectParams[paramCount].sizeInBytes = sizeof(Pal::DrawIndexedIndirectArgs); static_assert(sizeof(Pal::DrawIndexedIndirectArgs) == sizeof(VkDrawIndexedIndirectCommand)); break; case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DISPATCH_NV: - pIndirectParams[i].type = Pal::IndirectParamType::Dispatch; - pIndirectParams[i].sizeInBytes = sizeof(Pal::DispatchIndirectArgs); + pIndirectParams[paramCount].type = Pal::IndirectParamType::Dispatch; + pIndirectParams[paramCount].sizeInBytes = sizeof(Pal::DispatchIndirectArgs); static_assert(sizeof(Pal::DispatchIndirectArgs) == sizeof(VkDispatchIndirectCommand)); break; case VK_INDIRECT_COMMANDS_TOKEN_TYPE_INDEX_BUFFER_NV: - pIndirectParams[i].type = Pal::IndirectParamType::BindIndexData; - pIndirectParams[i].sizeInBytes = sizeof(Pal::BindIndexDataIndirectArgs); + pIndirectParams[paramCount].type = Pal::IndirectParamType::BindIndexData; + pIndirectParams[paramCount].sizeInBytes = sizeof(Pal::BindIndexDataIndirectArgs); + useNativeIndexType = (token.indexTypeCount == 0); static_assert(sizeof(Pal::BindIndexDataIndirectArgs) == sizeof(VkBindIndexBufferIndirectCommandNV)); break; case VK_INDIRECT_COMMANDS_TOKEN_TYPE_VERTEX_BUFFER_NV: - pIndirectParams[i].type = Pal::IndirectParamType::BindVertexData; - pIndirectParams[i].sizeInBytes = sizeof(Pal::BindVertexDataIndirectArgs); - pIndirectParams[i].vertexData.bufferId = token.vertexBindingUnit; - pIndirectParams[i].userDataShaderUsage = Pal::ApiShaderStageVertex; + pIndirectParams[paramCount].type = Pal::IndirectParamType::BindVertexData; + pIndirectParams[paramCount].sizeInBytes = sizeof(Pal::BindVertexDataIndirectArgs); + pIndirectParams[paramCount].vertexData.bufferId = token.vertexBindingUnit; + pIndirectParams[paramCount].userDataShaderUsage = Pal::ApiShaderStageVertex; static_assert(sizeof(Pal::BindVertexDataIndirectArgs) == sizeof(VkBindVertexBufferIndirectCommandNV)); break; case VK_INDIRECT_COMMANDS_TOKEN_TYPE_DRAW_MESH_TASKS_NV: - pIndirectParams[i].type = Pal::IndirectParamType::DispatchMesh; - pIndirectParams[i].sizeInBytes = sizeof(Pal::DispatchMeshIndirectArgs); + pIndirectParams[paramCount].type = Pal::IndirectParamType::DispatchMesh; + pIndirectParams[paramCount].sizeInBytes = sizeof(Pal::DispatchMeshIndirectArgs); static_assert(sizeof(Pal::DispatchMeshIndirectArgs) == sizeof(VkDrawMeshTasksIndirectCommandEXT)); break; case VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_NV: { - const PipelineLayout* pPipelineLayout = PipelineLayout::ObjectFromHandle(token.pushconstantPipelineLayout); - const UserDataLayout& userDataLayout = pPipelineLayout->GetInfo().userDataLayout; + const PipelineLayout* pPipelineLayout = PipelineLayout::ObjectFromHandle(token.pushconstantPipelineLayout); + const UserDataLayout& userDataLayout = pPipelineLayout->GetInfo().userDataLayout; - uint32_t startInDwords = token.pushconstantOffset / sizeof(uint32_t); - uint32_t lengthInDwords = PipelineLayout::GetPushConstantSizeInDword(token.pushconstantSize); + uint32_t startInDwords = token.pushconstantOffset / sizeof(uint32_t); + uint32_t lengthInDwords = PipelineLayout::GetPushConstantSizeInDword(token.pushconstantSize); - pIndirectParams[i].type = Pal::IndirectParamType::SetUserData; - pIndirectParams[i].userData.entryCount = lengthInDwords; - pIndirectParams[i].sizeInBytes = sizeof(uint32_t) * lengthInDwords; - pIndirectParams[i].userData.firstEntry = userDataLayout.common.pushConstRegBase + startInDwords; - pIndirectParams[i].userDataShaderUsage = VkToPalShaderStageMask(token.pushconstantShaderStageFlags); + pIndirectParams[paramCount].type = Pal::IndirectParamType::SetUserData; + pIndirectParams[paramCount].userData.entryCount = lengthInDwords; + pIndirectParams[paramCount].sizeInBytes = sizeof(uint32_t) * lengthInDwords; + pIndirectParams[paramCount].userData.firstEntry = userDataLayout.common.pushConstRegBase + startInDwords; + pIndirectParams[paramCount].userDataShaderUsage = VkToPalShaderStageMask(token.pushconstantShaderStageFlags); break; } @@ -278,10 +331,20 @@ void IndirectCommandsLayout::BuildPalCreateInfo( } + // Override userDataShaderUsage to compute shader only for dispatch type + if (isDispatch) + { + pIndirectParams[paramCount].userDataShaderUsage = Pal::ShaderStageFlagBits::ApiShaderStageCompute; + } + if (i < (pCreateInfo->tokenCount - 1)) { - bindingArgsSize += pIndirectParams[i].sizeInBytes; + bindingArgsSize += pIndirectParams[paramCount].sizeInBytes; } + + // Calculate expected offset of the next token assuming indirect arguments buffers are tightly packed + expectedOffset = token.offset + pIndirectParams[paramCount].sizeInBytes; + paramCount++; } for (uint32_t i = 0; i < pCreateInfo->streamCount; ++i) @@ -290,20 +353,19 @@ void IndirectCommandsLayout::BuildPalCreateInfo( pPalCreateInfo->strideInBytes += stride; } - pPalCreateInfo->paramCount = pCreateInfo->tokenCount; + pPalCreateInfo->paramCount = paramCount; - // Override userDataShaderUsage to compute shader only for dispatch type - if (isDispatch) - { - for (uint32_t i = 0; i < pPalCreateInfo->paramCount; ++i) - { - pIndirectParams[i].userDataShaderUsage = Pal::ShaderStageFlagBits::ApiShaderStageCompute; - } - } + constexpr uint32_t DxgiIndexTypeUint8 = 62; + constexpr uint32_t DxgiIndexTypeUint16 = 57; + constexpr uint32_t DxgiIndexTypeUint32 = 42; + + pPalCreateInfo->indexTypeTokens[0] = useNativeIndexType ? VK_INDEX_TYPE_UINT8_KHR : DxgiIndexTypeUint8; + pPalCreateInfo->indexTypeTokens[1] = useNativeIndexType ? VK_INDEX_TYPE_UINT16 : DxgiIndexTypeUint16; + pPalCreateInfo->indexTypeTokens[2] = useNativeIndexType ? VK_INDEX_TYPE_UINT32 : DxgiIndexTypeUint32; } // ===================================================================================================================== -void IndirectCommandsLayout::CalculateMemoryRequirements( +void IndirectCommandsLayoutNV::CalculateMemoryRequirements( const Device* pDevice, VkMemoryRequirements2* pMemoryRequirements ) const @@ -315,8 +377,9 @@ void IndirectCommandsLayout::CalculateMemoryRequirements( Pal::GpuMemoryRequirements memReqs = {}; memReqs.flags.cpuAccess = 0; + memReqs.heapCount = 2; memReqs.heaps[0] = Pal::GpuHeap::GpuHeapInvisible; - memReqs.heapCount = 1; + memReqs.heaps[1] = Pal::GpuHeap::GpuHeapLocal; for (uint32_t i = 0; i < memReqs.heapCount; ++i) { @@ -330,127 +393,19 @@ void IndirectCommandsLayout::CalculateMemoryRequirements( } // ===================================================================================================================== -VkResult IndirectCommandsLayout::BindGpuMemory( - const Device* pDevice, - const VkAllocationCallbacks* pAllocator, - Pal::IIndirectCmdGenerator** pGenerators, - Pal::IGpuMemory** pGpuMemory) -{ - VkResult result = VK_SUCCESS; - Pal::Result palResult; - - Pal::GpuMemoryRequirements memReqs[MaxPalDevices] = {}; - Pal::GpuMemoryCreateInfo memCreateInfos[MaxPalDevices] = {}; - - size_t totalSize = 0; - - void* pMemory = nullptr; - - for (uint32_t deviceIdx = 0; deviceIdx < pDevice->NumPalDevices(); deviceIdx++) - { - pGenerators[deviceIdx]->GetGpuMemoryRequirements(&memReqs[deviceIdx]); - - memCreateInfos[deviceIdx].size = memReqs[deviceIdx].size; - memCreateInfos[deviceIdx].alignment = memReqs[deviceIdx].alignment; - memCreateInfos[deviceIdx].priority = Pal::GpuMemPriority::Normal; - memCreateInfos[deviceIdx].heapCount = memReqs[deviceIdx].heapCount; - - for (uint32 i = 0; i < memReqs[deviceIdx].heapCount; ++i) - { - memCreateInfos[deviceIdx].heaps[i] = memReqs[deviceIdx].heaps[i]; - } - - const size_t size = pDevice->PalDevice(deviceIdx)->GetGpuMemorySize(memCreateInfos[deviceIdx], - &palResult); - - if (palResult == Pal::Result::Success) - { - totalSize += size; - } - else - { - result = PalToVkResult(palResult); - break; - } - } - - if (result == VK_SUCCESS) - { - pMemory = pAllocator->pfnAllocation(pAllocator->pUserData, - totalSize, - VK_DEFAULT_MEM_ALIGN, - VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); - - if (pMemory == nullptr) - { - result = VK_ERROR_OUT_OF_HOST_MEMORY; - } - } - - if (result == VK_SUCCESS) - { - void* pPalMemory = pMemory; - - for (uint32_t deviceIdx = 0; deviceIdx < pDevice->NumPalDevices(); deviceIdx++) - { - const size_t size = pDevice->PalDevice(deviceIdx)->GetGpuMemorySize(memCreateInfos[deviceIdx], - &palResult); - - if (palResult == Pal::Result::Success) - { - palResult = pDevice->PalDevice(deviceIdx)->CreateGpuMemory(memCreateInfos[deviceIdx], - pPalMemory, - &pGpuMemory[deviceIdx]); - } - - if (palResult == Pal::Result::Success) - { - // Gpu memory binding for IndirectCmdGenerator to build SRD containing properties and parameter data. - palResult = pGenerators[deviceIdx]->BindGpuMemory(pGpuMemory[deviceIdx], 0); - } - else - { - result = PalToVkResult(palResult); - break; - } - - if (palResult == Pal::Result::Success) - { - pPalMemory = Util::VoidPtrInc(pPalMemory, size); - } - else - { - result = PalToVkResult(palResult); - break; - } - } - } - - return result; -} - -// ===================================================================================================================== -VkResult IndirectCommandsLayout::Destroy( +VkResult IndirectCommandsLayoutNV::Destroy( Device* pDevice, const VkAllocationCallbacks* pAllocator) { for (uint32_t deviceIdx = 0; deviceIdx < pDevice->NumPalDevices(); deviceIdx++) { - if (m_perGpu[deviceIdx].pGenerator != nullptr) + if (m_pPalGenerator[deviceIdx] != nullptr) { - m_perGpu[deviceIdx].pGenerator->Destroy(); - } - - if (m_perGpu[deviceIdx].pGpuMemory != nullptr) - { - m_perGpu[deviceIdx].pGpuMemory->Destroy(); + m_pPalGenerator[deviceIdx]->Destroy(); } } - if (m_perGpu[DefaultDeviceIndex].pGpuMemory != nullptr) - { - pAllocator->pfnFree(pAllocator->pUserData, m_perGpu[DefaultDeviceIndex].pGpuMemory); - } + pDevice->MemMgr()->FreeGpuMem(&m_internalMem); Util::Destructor(this); diff --git a/icd/api/vk_instance.cpp b/icd/api/vk_instance.cpp index 98b14635..af3e7ad3 100644 --- a/icd/api/vk_instance.cpp +++ b/icd/api/vk_instance.cpp @@ -458,8 +458,8 @@ VkResult Instance::Init( if (status == VK_SUCCESS) { PhysicalDevice* pPhysicalDevice = ApiPhysicalDevice::ObjectFromHandle(devices[DefaultDeviceIndex]); - Pal::DeviceProperties info; - pPhysicalDevice->PalDevice()->GetProperties(&info); + const Pal::DeviceProperties& info = pPhysicalDevice->PalProperties(); + if (pPhysicalDevice->GetRuntimeSettings().enableSpp && info.gfxipProperties.flags.supportSpp) { wchar_t executableName[PATH_MAX]; diff --git a/icd/api/vk_physical_device.cpp b/icd/api/vk_physical_device.cpp index eb313c7e..13c38323 100644 --- a/icd/api/vk_physical_device.cpp +++ b/icd/api/vk_physical_device.cpp @@ -551,12 +551,7 @@ static void GetFormatFeatureFlags( retFlags &= ~VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT; retFlags &= ~VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT; -#if VKI_BUILD_GFX11 - if (palProps.gfxLevel < Pal::GfxIpLevel::GfxIp11_0) -#endif - { - retFlags &= ~VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT; - } + retFlags &= ~VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT; } else { @@ -2159,7 +2154,8 @@ VkResult PhysicalDevice::GetImageFormatProperties( (((usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) != 0) && ((supportedFeatures & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT) == 0)) || (((usage & VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT) != 0) && - ((supportedFeatures & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT) == 0)) || + ((supportedFeatures & (VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT | + VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT)) == 0)) || (((usage & VK_IMAGE_USAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR) != 0) && ((supportedFeatures & VK_FORMAT_FEATURE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR) == 0))) { @@ -2571,12 +2567,12 @@ VkResult PhysicalDevice::GetPhysicalDeviceCalibrateableTimeDomainsEXT( uint32_t* pTimeDomainCount, VkTimeDomainEXT* pTimeDomains) { - Pal::DeviceProperties deviceProperties = {}; - VkResult result = PalToVkResult(m_pPalDevice->GetProperties(&deviceProperties)); - VK_ASSERT(result == VK_SUCCESS); + const Pal::DeviceProperties& deviceProperties = PalProperties(); uint32_t timeDomainCount = Util::CountSetBits(deviceProperties.osProperties.timeDomains.u32All); + VkResult result = VK_SUCCESS; + if (pTimeDomains == nullptr) { *pTimeDomainCount = timeDomainCount; @@ -2713,6 +2709,7 @@ void PhysicalDevice::GetDeviceProperties( memcpy(pProperties->deviceName, palProps.gpuName, Util::Min(Pal::MaxDeviceName, Pal::uint32(VK_MAX_PHYSICAL_DEVICE_NAME_SIZE))); + pProperties->deviceName[VK_MAX_PHYSICAL_DEVICE_NAME_SIZE - 1] = 0; pProperties->limits = GetLimits(); @@ -2745,14 +2742,15 @@ bool PhysicalDevice::QueueSupportsPresents( { // Do we have any of this engine type and, if so, does it support a queueType that supports presents? const Pal::EngineType palEngineType = m_queueFamilies[queueFamilyIndex].palEngineType; + const Pal::QueueType palQueueType = m_queueFamilies[queueFamilyIndex].palQueueType; const auto& engineProps = m_properties.engineProperties[palEngineType]; + const auto& queueProperties = m_properties.queueProperties[palQueueType]; Pal::PresentMode presentMode = - (platform == VK_ICD_WSI_PLATFORM_DISPLAY)? Pal::PresentMode::Fullscreen : Pal::PresentMode::Windowed; + (platform == VK_ICD_WSI_PLATFORM_DISPLAY) ? Pal::PresentMode::Fullscreen : Pal::PresentMode::Windowed; - return (engineProps.engineCount > 0) && + return (engineProps.engineCount > 0) && queueProperties.flags.supportsSwapChainPresents && (m_pPalDevice->GetSupportedSwapChainModes(VkToPalWsiPlatform(platform), presentMode) != 0); - } // ===================================================================================================================== @@ -3805,8 +3803,8 @@ VkResult PhysicalDevice::GetSurfaceFormats( { // The w/a here will be removed once more presentable format is supported on base driver side. const VkSurfaceFormatKHR formatList[] = { - { VK_FORMAT_B8G8R8A8_UNORM, VK_COLORSPACE_SRGB_NONLINEAR_KHR }, - { VK_FORMAT_B8G8R8A8_SRGB, VK_COLORSPACE_SRGB_NONLINEAR_KHR } + { VK_FORMAT_B8G8R8A8_UNORM, VK_COLOR_SPACE_SRGB_NONLINEAR_KHR }, + { VK_FORMAT_B8G8R8A8_SRGB, VK_COLOR_SPACE_SRGB_NONLINEAR_KHR } }; const uint32_t formatCount = sizeof(formatList) / sizeof(formatList[0]); @@ -4412,7 +4410,6 @@ DeviceExtensions::Supported PhysicalDevice::GetAvailableExtensions( } bool exposeNvComputeShaderDerivatives = false; - if ((pPhysicalDevice == nullptr) || (pPhysicalDevice->GetRuntimeSettings().exportNvComputeShaderDerivatives)) { exposeNvComputeShaderDerivatives = true; @@ -6218,8 +6215,8 @@ size_t PhysicalDevice::GetFeatures2( if (updateFeatures) { - pExtInfo->fragmentShaderSampleInterlock = VK_FALSE; - pExtInfo->fragmentShaderPixelInterlock = VK_FALSE; + pExtInfo->fragmentShaderSampleInterlock = VK_TRUE; + pExtInfo->fragmentShaderPixelInterlock = VK_TRUE; pExtInfo->fragmentShaderShadingRateInterlock = VK_FALSE; } @@ -7034,6 +7031,19 @@ size_t PhysicalDevice::GetFeatures2( break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_7_FEATURES_KHR: + { + auto* pExtInfo = reinterpret_cast(pHeader); + + if (updateFeatures) + { + pExtInfo->maintenance7 = VK_TRUE; + } + + structSize = sizeof(*pExtInfo); + break; + } + #if VKI_RAY_TRACING #endif @@ -8328,6 +8338,21 @@ void PhysicalDevice::GetDeviceProperties2( break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_7_PROPERTIES_KHR: + { + auto* pProps = static_cast(pNext); + + pProps->robustFragmentShadingRateAttachmentAccess = true; + pProps->separateDepthStencilAttachmentAccess = true; + pProps->maxDescriptorSetTotalUniformBuffersDynamic = MaxDynamicUniformDescriptors; + pProps->maxDescriptorSetTotalStorageBuffersDynamic = MaxDynamicStorageDescriptors; + pProps->maxDescriptorSetTotalBuffersDynamic = MaxDynamicDescriptors; + pProps->maxDescriptorSetUpdateAfterBindTotalUniformBuffersDynamic = MaxDynamicUniformDescriptors; + pProps->maxDescriptorSetUpdateAfterBindTotalStorageBuffersDynamic = MaxDynamicStorageDescriptors; + pProps->maxDescriptorSetUpdateAfterBindTotalBuffersDynamic = MaxDynamicDescriptors; + break; + } + #if VKI_RAY_TRACING #endif diff --git a/icd/api/vk_physical_device_manager.cpp b/icd/api/vk_physical_device_manager.cpp index eb308126..6c2ba504 100644 --- a/icd/api/vk_physical_device_manager.cpp +++ b/icd/api/vk_physical_device_manager.cpp @@ -424,9 +424,7 @@ VkResult PhysicalDeviceManager::UpdateLockedPhysicalDeviceList(void) { PhysicalDevice* pPhysicalDevice = ApiPhysicalDevice::ObjectFromHandle(deviceList[currentDeviceIndex]); - Pal::DeviceProperties info; - - pPhysicalDevice->PalDevice()->GetProperties(&info); + const Pal::DeviceProperties& info = pPhysicalDevice->PalProperties(); PerfIndex perf; diff --git a/icd/api/vk_pipeline.cpp b/icd/api/vk_pipeline.cpp index 0a793d0b..6ff46e2c 100644 --- a/icd/api/vk_pipeline.cpp +++ b/icd/api/vk_pipeline.cpp @@ -942,9 +942,7 @@ VKAPI_ATTR VkResult VKAPI_CALL vkGetShaderInfoAMD( ConvertShaderInfoStatistics(palStats, pStats); - Pal::DeviceProperties info; - - pDevice->VkPhysicalDevice(DefaultDeviceIndex)->PalDevice()->GetProperties(&info); + const Pal::DeviceProperties& info = pDevice->GetPalProperties(); pStats->numPhysicalVgprs = info.gfxipProperties.shaderCore.vgprsPerSimd; pStats->numPhysicalSgprs = info.gfxipProperties.shaderCore.sgprsPerSimd; diff --git a/icd/api/vk_pipeline_cache.cpp b/icd/api/vk_pipeline_cache.cpp index bdc484a2..98938439 100644 --- a/icd/api/vk_pipeline_cache.cpp +++ b/icd/api/vk_pipeline_cache.cpp @@ -121,6 +121,8 @@ VkResult PipelineCache::Create( } vk::PhysicalDevice* pDefaultPhysicalDevice = pDevice->VkPhysicalDevice(DefaultDeviceIndex); + + // This call to PipelineBinaryCache::Create must use the VkInstance allocation callbacks to avoid issues. pBinaryCache = PipelineBinaryCache::Create( pDefaultPhysicalDevice->VkInstance()->GetAllocCallbacks(), pDefaultPhysicalDevice->GetPlatformKey(), diff --git a/icd/api/vk_pipeline_layout.cpp b/icd/api/vk_pipeline_layout.cpp index 29d73be6..dc8c46a5 100644 --- a/icd/api/vk_pipeline_layout.cpp +++ b/icd/api/vk_pipeline_layout.cpp @@ -56,7 +56,6 @@ static bool IsUberFetchShaderEnabled(const Device* pDevice) bool enabled = false; if (pDevice->GetRuntimeSettings().enableUberFetchShader || - (pDevice->GetRuntimeSettings().enableEarlyCompile && (scheme == PipelineLayoutScheme::Compact)) || pDevice->IsExtensionEnabled(DeviceExtensions::EXT_GRAPHICS_PIPELINE_LIBRARY) || pDevice->IsExtensionEnabled(DeviceExtensions::EXT_VERTEX_INPUT_DYNAMIC_STATE) ) @@ -235,34 +234,45 @@ void PipelineLayout::ProcessPushConstantsInfo( // ===================================================================================================================== // Checks if GpuRT resource mappings will need to be added to this pipeline layout bool PipelineLayout::HasRayTracing( + const Device* pDevice, const VkPipelineLayoutCreateInfo* pIn) { bool rtFound = false; - for (uint32_t setIndex = 0; (setIndex < pIn->setLayoutCount) && (rtFound == false); ++setIndex) + if (pDevice->RayTrace() != nullptr) { - if (pIn->pSetLayouts[setIndex] != VK_NULL_HANDLE) + if (pIn->setLayoutCount == 0) { - const auto pSetLayout = DescriptorSetLayout::ObjectFromHandle(pIn->pSetLayouts[setIndex]); - - // Test if the set layout supports the RayGen stage (required for RT pipelines) - // Without this check, compilation fails for RT pipelines that don't utilize trace ray - if (Util::TestAnyFlagSet(pSetLayout->Info().activeStageMask, VK_SHADER_STAGE_RAYGEN_BIT_KHR) == true) - { - rtFound = true; - break; - } + // If layout is empty, we will reserve raytracing node. Otherwise the resources that gpurt requires + // will not be bound. + rtFound = true; + } - // Test if an acceleration structure descriptor binding is present (necessary for pipelines with ray query) - for (uint32_t bindingIndex = 0; bindingIndex < pSetLayout->Info().count; ++bindingIndex) + for (uint32_t setIndex = 0; (setIndex < pIn->setLayoutCount) && (rtFound == false); ++setIndex) + { + if (pIn->pSetLayouts[setIndex] != VK_NULL_HANDLE) { - const DescriptorSetLayout::BindingInfo& binding = pSetLayout->Binding(bindingIndex); + const auto pSetLayout = DescriptorSetLayout::ObjectFromHandle(pIn->pSetLayouts[setIndex]); - if (binding.info.descriptorType == VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR) + // Test if the set layout supports the RayGen stage (required for RT pipelines) + // Without this check, compilation fails for RT pipelines that don't utilize trace ray + if (Util::TestAnyFlagSet(pSetLayout->Info().activeStageMask, VK_SHADER_STAGE_RAYGEN_BIT_KHR) == true) { rtFound = true; break; } + + // Test if an acceleration structure descriptor binding is present (necessary for pipelines with ray query) + for (uint32_t bindingIndex = 0; bindingIndex < pSetLayout->Info().count; ++bindingIndex) + { + const DescriptorSetLayout::BindingInfo& binding = pSetLayout->Binding(bindingIndex); + + if (binding.info.descriptorType == VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR) + { + rtFound = true; + break; + } + } } } } @@ -308,24 +318,7 @@ VkResult PipelineLayout::BuildCompactSchemeInfo( memset(&(pInfo->userDataLayout), 0, sizeof(UserDataLayout)); pInfo->userDataLayout.scheme = PipelineLayoutScheme::Compact; - if (settings.enableEarlyCompile) - { - // Early compile mode will enable uber-fetch shader and spec constant buffer on vertex shader and - // fragment shader implicitly. So we need three reserved node. - // Each buffer consume 2 user data register now. - pPipelineInfo->numUserDataNodes += 3; - pInfo->userDataRegCount += 3 * InternalConstBufferRegCount; - - pUserDataLayout->uberFetchConstBufRegBase = FetchShaderInternalBufferOffset; - pUserDataLayout->specConstBufVertexRegBase = SpecConstBufferVertexOffset; - pUserDataLayout->specConstBufFragmentRegBase = SpecConstBufferFragmentOffset; - } - else - { - pUserDataLayout->uberFetchConstBufRegBase = InvalidReg; - pUserDataLayout->specConstBufVertexRegBase = InvalidReg; - pUserDataLayout->specConstBufFragmentRegBase = InvalidReg; - } + pUserDataLayout->uberFetchConstBufRegBase = InvalidReg; VK_ASSERT(pIn->setLayoutCount <= MaxDescriptorSets); @@ -345,15 +338,14 @@ VkResult PipelineLayout::BuildCompactSchemeInfo( } #if VKI_RAY_TRACING - if (HasRayTracing(pIn)) + if (HasRayTracing(pDevice, pIn)) { gfxReservedCount += (InternalConstBufferRegCount + MaxTraceRayUserDataRegCount); } #endif // the user data entries for uber-fetch shader const buffer - if (IsUberFetchShaderEnabled(pDevice) && - (settings.enableEarlyCompile == false)) + if (IsUberFetchShaderEnabled(pDevice)) { gfxReservedCount += SetPtrRegCount; } @@ -374,8 +366,7 @@ VkResult PipelineLayout::BuildCompactSchemeInfo( // If uber-fetch shader is not enabled for early compile, the user data entries for uber-fetch shader const // buffer is appended at the bottom of user data table. Just following vertex buffer table. - if (IsUberFetchShaderEnabled(pDevice) && - (pDevice->GetRuntimeSettings().enableEarlyCompile == false)) + if (IsUberFetchShaderEnabled(pDevice)) { VK_ASSERT(pUserDataLayout->uberFetchConstBufRegBase == InvalidReg); @@ -411,7 +402,7 @@ VkResult PipelineLayout::BuildCompactSchemeInfo( &pUserDataLayout->threadGroupReversalRegBase); #if VKI_RAY_TRACING - if (HasRayTracing(pIn)) + if (HasRayTracing(pDevice, pIn)) { // Reserve one node for indirect RT capture replay. pPipelineInfo->numUserDataNodes += 1; @@ -545,17 +536,6 @@ VkResult PipelineLayout::BuildCompactSchemeInfo( // Add the number of user data regs used by this set to the total count for the whole layout pInfo->userDataRegCount += pSetUserData->totalRegCount; - if (settings.pipelineLayoutMode == PipelineLayoutAngle) - { - // Force next set firstRegOffset align to AngleDescPattern. - if ((i + 1) < Util::ArrayLen(AngleDescPattern::DescriptorSetOffset)) - { - if (pInfo->userDataRegCount < AngleDescPattern::DescriptorSetOffset[i + 1]) - { - pInfo->userDataRegCount = AngleDescPattern::DescriptorSetOffset[i + 1]; - } - } - } } // Calculate total number of user data regs used for active descriptor set data @@ -609,8 +589,6 @@ VkResult PipelineLayout::BuildIndirectSchemeInfo( VK_ASSERT(pIn->setLayoutCount <= MaxDescriptorSets); const RuntimeSettings& settings = pDevice->GetRuntimeSettings(); - VK_ASSERT(settings.pipelineLayoutMode != PipelineLayoutAngle); - VK_ASSERT(settings.enableEarlyCompile == false); VkResult result = VK_SUCCESS; @@ -675,7 +653,7 @@ VkResult PipelineLayout::BuildIndirectSchemeInfo( &pUserDataLayout->threadGroupReversalRegBase); #if VKI_RAY_TRACING - if (HasRayTracing(pIn)) + if (HasRayTracing(pDevice, pIn)) { pUserDataLayout->dispatchRaysArgsPtrRegBase = pInfo->userDataRegCount; pPipelineInfo->numUserDataNodes += MaxTraceRayUserDataNodeCount; @@ -1271,8 +1249,6 @@ VkResult PipelineLayout::BuildCompactSchemeLlpcPipelineMapping( const auto& userDataLayout = m_info.userDataLayout.compact; const auto& commonUserDataLayout = m_info.userDataLayout.common; - const bool enableEarlyCompile = m_pDevice->GetRuntimeSettings().enableEarlyCompile; - Vkgc::ResourceMappingRootNode* pUserDataNodes = static_cast(pBuffer); Vkgc::ResourceMappingNode* pResourceNodes = reinterpret_cast(pUserDataNodes + m_pipelineInfo.numUserDataNodes); @@ -1283,44 +1259,10 @@ VkResult PipelineLayout::BuildCompactSchemeLlpcPipelineMapping( uint32_t mappingNodeCount = 0; // Number of consumed ResourceMappingNodes (only sub-nodes) uint32_t descriptorRangeCount = 0; // Number of consumed StaticResourceValues - if (enableEarlyCompile) - { - VK_ASSERT(userDataLayout.specConstBufVertexRegBase == SpecConstBufferVertexOffset); - VK_ASSERT(userDataLayout.specConstBufFragmentRegBase == SpecConstBufferFragmentOffset); - - constexpr uint32_t MaxInternalSpecConstBuffSize = UINT16_MAX; - if (stageMask & Vkgc::ShaderStageVertexBit) - { - BuildLlpcInternalInlineBufferMapping( - Vkgc::ShaderStageVertexBit, - userDataLayout.specConstBufVertexRegBase, - MaxInternalSpecConstBuffSize, - static_cast(Vkgc::SpecConstInternalBufferBindingId) + - static_cast(ShaderStage::ShaderStageVertex), - &pUserDataNodes[userDataNodeCount], - &userDataNodeCount, - &pResourceNodes[mappingNodeCount], - &mappingNodeCount); - } - - if (stageMask & Vkgc::ShaderStageFragmentBit) - { - BuildLlpcInternalInlineBufferMapping( - Vkgc::ShaderStageFragmentBit, - userDataLayout.specConstBufFragmentRegBase, - MaxInternalSpecConstBuffSize, - static_cast(Vkgc::SpecConstInternalBufferBindingId) + - static_cast(ShaderStage::ShaderStageFragment), - &pUserDataNodes[userDataNodeCount], - &userDataNodeCount, - &pResourceNodes[mappingNodeCount], - &mappingNodeCount); - } - } if (pVbInfo != nullptr) { const uint32_t tailingVertexBufferRegCount = - (appendFetchShaderCb && (enableEarlyCompile == false)) ? + appendFetchShaderCb ? (VbTablePtrRegCount + InternalConstBufferRegCount) : VbTablePtrRegCount; if ((m_info.userDataRegCount + tailingVertexBufferRegCount) <= @@ -1336,8 +1278,6 @@ VkResult PipelineLayout::BuildCompactSchemeLlpcPipelineMapping( if (appendFetchShaderCb) { - VK_ASSERT((enableEarlyCompile == false) || - (userDataLayout.uberFetchConstBufRegBase == FetchShaderInternalBufferOffset)); const uint32_t MaxUberFetchConstBufSize = PipelineCompiler::GetMaxUberFetchShaderInternalDataSize(); // Append node for uber fetch shader constant buffer BuildLlpcInternalInlineBufferMapping( diff --git a/icd/api/vk_query.cpp b/icd/api/vk_query.cpp index a187ad82..d16685b6 100644 --- a/icd/api/vk_query.cpp +++ b/icd/api/vk_query.cpp @@ -712,20 +712,18 @@ VkResult TimestampQueryPool::GetResults( volatile const uint64_t* pTimestamp = reinterpret_cast(Util::VoidPtrInc(pSrcData, srcSlotOffset)); - // Test if the timestamp query is available - uint64_t value = *pTimestamp; - bool ready = (value != TimestampNotReady); - // Wait until the timestamp query has become available if ((flags & VK_QUERY_RESULT_WAIT_BIT) != 0) { - while (!ready) + while ((*pTimestamp) == TimestampNotReady) { - value = *pTimestamp; - ready = (value != TimestampNotReady); + Util::YieldThread(); } } + // Test if the timestamp query is available + const bool ready = ((*pTimestamp) != TimestampNotReady); + // Get a pointer to the start of this slot's data void* pSlotData = Util::VoidPtrInc(pData, static_cast(dstSlot * stride)); @@ -737,7 +735,7 @@ VkResult TimestampQueryPool::GetResults( if (ready) { - pSlot[0] = value; + pSlot[0] = (*pTimestamp); } if ((flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) != 0) @@ -751,7 +749,8 @@ VkResult TimestampQueryPool::GetResults( if (ready) { - pSlot[0] = static_cast(value); // Note: 32-bit results are allowed to wrap + // Note: 32-bit results are allowed to wrap + pSlot[0] = static_cast(*pTimestamp); } if ((flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) != 0) @@ -788,24 +787,18 @@ void TimestampQueryPool::Reset( queryCount = Util::Min(queryCount, m_entryCount - startQuery); // Query pool size needs to be reset in qwords. - const uint32_t queryDataSize = (m_slotSize * queryCount) / sizeof(uint64_t); + const uint32_t queryDataSize = (m_slotSize * queryCount); + for (uint32_t deviceIdx = 0; deviceIdx < pDevice->NumPalDevices(); deviceIdx++) { void* pMappedAddr = nullptr; if (m_internalMem.Map(deviceIdx, &pMappedAddr) == Pal::Result::Success) { - uint64_t* pQueryData = static_cast(Util::VoidPtrInc(pMappedAddr, - (m_slotSize * startQuery))); + void* pQueryData = Util::VoidPtrInc(pMappedAddr, (m_slotSize * startQuery)); - for (uint32_t idx = 0; idx < queryDataSize; idx++) - { - pQueryData[idx] = TimestampNotReady; - } + memset(pQueryData, NotReadyByte, queryDataSize); - if (pMappedAddr != nullptr) - { - m_internalMem.Unmap(deviceIdx); - } + m_internalMem.Unmap(deviceIdx); } } } diff --git a/icd/api/vk_queue.cpp b/icd/api/vk_queue.cpp index 79196bd8..73def9fc 100644 --- a/icd/api/vk_queue.cpp +++ b/icd/api/vk_queue.cpp @@ -1304,7 +1304,8 @@ VkResult Queue::Submit( palSubmitInfo.gpuMemRefCount = 0; palSubmitInfo.pGpuMemoryRefs = nullptr; - const uint32_t deviceCount = (pDeviceGroupInfo == nullptr) ? 1 : m_pDevice->NumPalDevices(); + const uint32_t deviceCount = ((pDeviceGroupInfo == nullptr) && (isSynchronization2 == false)) ? + 1 : m_pDevice->NumPalDevices(); for (uint32_t deviceIdx = 0; (deviceIdx < deviceCount) && (result == VK_SUCCESS); deviceIdx++) { Pal::Result palResult = Pal::Result::Success; @@ -1341,7 +1342,18 @@ VkResult Queue::Submit( for (uint32_t i = 0; i < cmdBufferCount; ++i) { - if ((deviceCount > 1) && + if (isSynchronization2) + { + const VkSubmitInfo2KHR* pSubmitInfoKhr = + reinterpret_cast(&pSubmits[submitIdx]); + + if ((pSubmitInfoKhr->pCommandBufferInfos[i].deviceMask != 0) && + ((pSubmitInfoKhr->pCommandBufferInfos[i].deviceMask & deviceMask) == 0)) + { + continue; + } + } + else if ((pDeviceGroupInfo != nullptr) && (pDeviceGroupInfo->pCommandBufferDeviceMasks != nullptr) && (pDeviceGroupInfo->pCommandBufferDeviceMasks[i] & deviceMask) == 0) { @@ -1644,12 +1656,17 @@ VkResult Queue::Submit( VkResult Queue::WaitIdle(void) { Pal::Result palResult = Pal::Result::Success; + const RuntimeSettings* pSettings = &m_pDevice->VkPhysicalDevice(DefaultDeviceIndex)->GetRuntimeSettings(); for (uint32_t deviceIdx = 0; (deviceIdx < m_pDevice->NumPalDevices()) && (palResult == Pal::Result::Success); deviceIdx++) { - palResult = PalQueue(deviceIdx)->WaitIdle(); + do + { + palResult = PalQueue(deviceIdx)->WaitIdle(); + } + while ((pSettings->infiniteDeviceWaitIdle) && (palResult == Pal::Result::Timeout)); } return PalToVkResult(palResult); @@ -1989,16 +2006,6 @@ VkResult Queue::Present( } } - // Ensure metadata is available before post processing. - if (pSwapChain->GetFullscreenMgr() != nullptr) - { - Pal::Result palResult = m_pDevice->PalDevice(DefaultDeviceIndex)->PollFullScreenFrameMetadataControl( - pSwapChain->GetVidPnSourceId(), - &m_palFrameMetadataControl); - - VK_ASSERT(palResult == Pal::Result::Success); - } - // Fill in present information and obtain the PAL memory of the presentable image. Pal::IGpuMemory* pGpuMemory = pSwapChain->UpdatePresentInfo(presentationDeviceIdx, imageIndex, diff --git a/icd/api/vk_sampler.cpp b/icd/api/vk_sampler.cpp index 6c85c71a..7999d937 100644 --- a/icd/api/vk_sampler.cpp +++ b/icd/api/vk_sampler.cpp @@ -233,8 +233,7 @@ VkResult Sampler::Create( // Figure out how big a sampler SRD is. This is not the most efficient way of doing // things, so we could cache the SRD size. - Pal::DeviceProperties props; - pDevice->PalDevice(DefaultDeviceIndex)->GetProperties(&props); + const Pal::DeviceProperties& props = pDevice->GetPalProperties(); const uint32 apiSize = sizeof(Sampler); const uint32 palSize = props.gfxipProperties.srdSizes.sampler; diff --git a/icd/api/vk_semaphore.cpp b/icd/api/vk_semaphore.cpp index 4223459a..e1e5022f 100644 --- a/icd/api/vk_semaphore.cpp +++ b/icd/api/vk_semaphore.cpp @@ -281,7 +281,6 @@ VkResult Semaphore::ImportSemaphore( PAL_ASSERT((handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT) || (handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT)); palOpenInfo.flags.isReference = (handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT); - #endif //Todo: Check whether pDevice is the same as the one created the semaphore. diff --git a/icd/api/vk_shader.cpp b/icd/api/vk_shader.cpp index 32d7fd76..c6378aef 100644 --- a/icd/api/vk_shader.cpp +++ b/icd/api/vk_shader.cpp @@ -217,11 +217,6 @@ VkResult ShaderModule::Init( m_handle.codeHash = m_codeHash; - if (result == VK_SUCCESS) - { - pCompiler->TryEarlyCompileShaderModule(pDevice, &m_handle); - } - return result; } diff --git a/icd/res/ver.h b/icd/res/ver.h index 86be3368..36011de9 100644 --- a/icd/res/ver.h +++ b/icd/res/ver.h @@ -36,7 +36,7 @@ #define VERSION_MAJOR_STR MAKE_VERSION_STRING(VULKAN_ICD_MAJOR_VERSION) "\0" // Bump up after each promotion to mainline -#define VULKAN_ICD_BUILD_VERSION 310 +#define VULKAN_ICD_BUILD_VERSION 316 // String version is needed with leading zeros and extra termination (unicode) #define VERSION_NUMBER_MINOR VULKAN_ICD_BUILD_VERSION @@ -45,7 +45,7 @@ // These values specify the driver ID and driver info string #define VULKAN_DRIVER_ID VK_DRIVER_ID_AMD_OPEN_SOURCE_KHR // "AMDOPEN" #define VULKAN_DRIVER_NAME_STR "AMD open-source driver" -#define VULKAN_DRIVER_INFO_STR "2024.Q2.3" +#define VULKAN_DRIVER_INFO_STR "2024.Q3.1" #define VULKAN_DRIVER_INFO_STR_LLPC "(LLPC)" // These values tell which version of the conformance test the driver is compliant against diff --git a/icd/settings/settings.cpp b/icd/settings/settings.cpp index 0a9d8a6c..d493c70e 100644 --- a/icd/settings/settings.cpp +++ b/icd/settings/settings.cpp @@ -42,7 +42,6 @@ #include "palPlatform.h" #include "devDriverServer.h" -#include "protocols/ddSettingsService.h" #include "dd_settings_service.h" #include "experimentsLoader.h" @@ -117,6 +116,16 @@ static ExpShaderWaveSize XglSwsToExpSws( return wsOut; } +// ===================================================================================================================== +static void OverrideVkd3dCommonSettings( + RuntimeSettings* pSettings) +{ + pSettings->exportNvComputeShaderDerivatives = true; + pSettings->exportNvDeviceGeneratedCommands = true; + pSettings->exportImageCompressionControl = true; + pSettings->disableSingleMipAnisoOverride = false; +} + // ===================================================================================================================== // Constructor for the SettingsLoader object. VulkanSettingsLoader::VulkanSettingsLoader( @@ -409,1287 +418,1309 @@ VkResult VulkanSettingsLoader::OverrideProfiledSettings( Pal::PalPublicSettings* pPalSettings = m_pDevice->GetPublicSettings(); - { - - // By allowing the enable/disable to be set by environment variable, any third party platform owners - // can enable or disable the feature based on their internal feedback and not have to wait for a driver - // update to catch issues + // By allowing the enable/disable to be set by environment variable, any third party platform owners + // can enable or disable the feature based on their internal feedback and not have to wait for a driver + // update to catch issues - const char* pPipelineCacheEnvVar = getenv(m_settings.pipelineCachingEnvironmentVariable); + const char* pPipelineCacheEnvVar = getenv(m_settings.pipelineCachingEnvironmentVariable); - if (pPipelineCacheEnvVar != nullptr) - { - m_settings.usePalPipelineCaching = (atoi(pPipelineCacheEnvVar) != 0); - } + if (pPipelineCacheEnvVar != nullptr) + { + m_settings.usePalPipelineCaching = (atoi(pPipelineCacheEnvVar) != 0); + } - const char* pEnableInternalCacheToDisk = getenv("AMD_VK_ENABLE_INTERNAL_PIPELINECACHING_TO_DISK"); - if (pEnableInternalCacheToDisk != nullptr) - { - m_settings.enableInternalPipelineCachingToDisk = (atoi(pEnableInternalCacheToDisk) != 0); - } + const char* pEnableInternalCacheToDisk = getenv("AMD_VK_ENABLE_INTERNAL_PIPELINECACHING_TO_DISK"); + if (pEnableInternalCacheToDisk != nullptr) + { + m_settings.enableInternalPipelineCachingToDisk = (atoi(pEnableInternalCacheToDisk) != 0); + } + { // In general, DCC is very beneficial for color attachments, 2D, 3D shader storage resources that have BPP>=32. // If this is completely offset, maybe by increased shader read latency or partial writes of DCC blocks, it should // be debugged on a case by case basis. - m_settings.forceEnableDcc = (ForceDccForColorAttachments | - ForceDccFor2DShaderStorage | - ForceDccFor3DShaderStorage | - ForceDccFor32BppShaderStorage | - ForceDccFor64BppShaderStorage); + m_settings.forceEnableDcc = (ForceDccForColorAttachments | + ForceDccFor2DShaderStorage | + ForceDccFor3DShaderStorage | + ForceDccFor32BppShaderStorage | + ForceDccFor64BppShaderStorage); + } - m_settings.optImgMaskToApplyShaderReadUsageForTransferSrc |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + m_settings.optImgMaskToApplyShaderReadUsageForTransferSrc |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; #if VKI_RAY_TRACING - const char* pMaxInlinedShadersEnvVar = getenv("AMD_VK_MAX_INLINED_SHADERS"); + const char* pMaxInlinedShadersEnvVar = getenv("AMD_VK_MAX_INLINED_SHADERS"); - if (pMaxInlinedShadersEnvVar != nullptr) - { - m_settings.maxUnifiedNonRayGenShaders = static_cast(atoi(pMaxInlinedShadersEnvVar)); - } + if (pMaxInlinedShadersEnvVar != nullptr) + { + m_settings.maxUnifiedNonRayGenShaders = static_cast(atoi(pMaxInlinedShadersEnvVar)); + } #if VKI_BUILD_GFX11 - // Default optimized RT settings for Navi31 / 32, - // which has physical VGPR 1536 per SIMD - if (pInfo->gfxipProperties.shaderCore.vgprsPerSimd == 1536) - { - // 1.2% faster - Corresponds to 1.5x VGPR feature - m_settings.rtIndirectVgprLimit = 120; + // Default optimized RT settings for Navi31 / 32, + // which has physical VGPR 1536 per SIMD + if (pInfo->gfxipProperties.shaderCore.vgprsPerSimd == 1536) + { + // 1.2% faster - Corresponds to 1.5x VGPR feature + m_settings.rtIndirectVgprLimit = 120; - // 1% faster using indirectCallTargetOccupancyPerSimd of 0.75 - m_settings.indirectCallTargetOccupancyPerSimd = 0.75; - } + // 1% faster using indirectCallTargetOccupancyPerSimd of 0.75 + m_settings.indirectCallTargetOccupancyPerSimd = 0.75; + } #endif #endif - if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_3) - { - // Enable NGG culling by default for Navi2x. - m_settings.nggEnableBackfaceCulling = true; - m_settings.nggEnableSmallPrimFilter = true; + if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_3) + { + // Enable NGG culling by default for Navi2x. + m_settings.nggEnableBackfaceCulling = true; + m_settings.nggEnableSmallPrimFilter = true; - // Enable NGG compactionless mode for Navi2x - m_settings.nggCompactVertex = false; + // Enable NGG compactionless mode for Navi2x + m_settings.nggCompactVertex = false; - } + } - { - m_settings.disableImplicitInvariantExports = false; - } + { + m_settings.disableImplicitInvariantExports = false; + } #if VKI_BUILD_GFX11 - if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) - { - // Enable NGG compactionless mode for Navi3x - m_settings.nggCompactVertex = false; + if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) + { + // Enable NGG compactionless mode for Navi3x + m_settings.nggCompactVertex = false; - // Hardcode wave sizes per shader stage until the ML model is trained and perf lab testing is done - m_settings.csWaveSize = 64; - m_settings.fsWaveSize = 64; - } + // Hardcode wave sizes per shader stage until the ML model is trained and perf lab testing is done + m_settings.csWaveSize = 64; + m_settings.fsWaveSize = 64; + } #endif - // Put command buffers in local for large/resizable BAR systems with > 7 GBs of local heap - constexpr gpusize _1GB = 1024ull * 1024ull * 1024ull; + // Put command buffers in local for large/resizable BAR systems with > 7 GBs of local heap + constexpr gpusize _1GB = 1024ull * 1024ull * 1024ull; - if (pInfo->gpuMemoryProperties.barSize > (7ull * _1GB)) + if (pInfo->gpuMemoryProperties.barSize > (7ull * _1GB)) + { + if ((appProfile != AppProfile::WorldWarZ) + && (appProfile != AppProfile::XPlane) + && (appProfile != AppProfile::SeriousSam4)) { - if ((appProfile != AppProfile::WorldWarZ) - && (appProfile != AppProfile::XPlane) - ) - { - m_settings.cmdAllocatorDataHeap = Pal::GpuHeapLocal; - m_settings.cmdAllocatorEmbeddedHeap = Pal::GpuHeapLocal; - } - - if ((appProfile == AppProfile::DoomEternal) || - (appProfile == AppProfile::SniperElite5) || - (appProfile == AppProfile::CSGO)) - { - m_settings.overrideHeapChoiceToLocal = OverrideChoiceForGartUswc; - } + m_settings.cmdAllocatorDataHeap = Pal::GpuHeapLocal; + m_settings.cmdAllocatorEmbeddedHeap = Pal::GpuHeapLocal; } - // Allow device memory overallocation for <= 2GBs of VRAM including APUs. - if (pInfo->gpuMemoryProperties.maxLocalMemSize <= (2ull * _1GB)) + if ((appProfile == AppProfile::DoomEternal) || + (appProfile == AppProfile::SniperElite5) || + (appProfile == AppProfile::CSGO)) { - m_settings.memoryDeviceOverallocationAllowed = true; + m_settings.overrideHeapChoiceToLocal = OverrideChoiceForGartUswc; } + } - if (appProfile == AppProfile::Doom) - { - m_settings.enableSpvPerfOptimal = true; + // Allow device memory overallocation for <= 2GBs of VRAM including APUs. + if (pInfo->gpuMemoryProperties.maxLocalMemSize <= (2ull * _1GB)) + { + m_settings.memoryDeviceOverallocationAllowed = true; + } - m_settings.optColorTargetUsageDoesNotContainResolveLayout = true; + if (appProfile == AppProfile::Doom) + { + m_settings.enableSpvPerfOptimal = true; - m_settings.barrierFilterOptions = SkipStrayExecutionDependencies | - SkipImageLayoutUndefined | - SkipDuplicateResourceBarriers; + m_settings.optColorTargetUsageDoesNotContainResolveLayout = true; - m_settings.modifyResourceKeyForAppProfile = true; - m_settings.forceImageSharingMode = ForceImageSharingMode::ForceImageSharingModeExclusive; + m_settings.barrierFilterOptions = SkipStrayExecutionDependencies | + SkipImageLayoutUndefined | + SkipDuplicateResourceBarriers; - // id games are known to query instance-level functions with vkGetDeviceProcAddr illegally thus we - // can't do any better than returning a non-null function pointer for them. - m_settings.lenientInstanceFuncQuery = true; - } + m_settings.modifyResourceKeyForAppProfile = true; + m_settings.forceImageSharingMode = ForceImageSharingMode::ForceImageSharingModeExclusive; - if (appProfile == AppProfile::DoomVFR) - { - // id games are known to query instance-level functions with vkGetDeviceProcAddr illegally thus we - // can't do any better than returning a non-null function pointer for them. - m_settings.lenientInstanceFuncQuery = true; + // id games are known to query instance-level functions with vkGetDeviceProcAddr illegally thus we + // can't do any better than returning a non-null function pointer for them. + m_settings.lenientInstanceFuncQuery = true; + } - // This works around a crash at app startup. - m_settings.ignoreSuboptimalSwapchainSize = true; + if (appProfile == AppProfile::DoomVFR) + { + // id games are known to query instance-level functions with vkGetDeviceProcAddr illegally thus we + // can't do any better than returning a non-null function pointer for them. + m_settings.lenientInstanceFuncQuery = true; - m_settings.forceEnableDcc = ForceDccDefault; + // This works around a crash at app startup. + m_settings.ignoreSuboptimalSwapchainSize = true; - if (pInfo->revision == Pal::AsicRevision::Navi14) - { - m_settings.barrierFilterOptions = SkipImageLayoutUndefined; - } - } + m_settings.forceEnableDcc = ForceDccDefault; - if (appProfile == AppProfile::WolfensteinII) + if (pInfo->revision == Pal::AsicRevision::Navi14) { - m_settings.zeroInitIlRegs = true; + m_settings.barrierFilterOptions = SkipImageLayoutUndefined; + } + } - m_settings.disableSingleMipAnisoOverride = false; + if (appProfile == AppProfile::WolfensteinII) + { + m_settings.zeroInitIlRegs = true; - if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_3) - { - // Mall no alloc settings give a 2.91% gain - m_settings.mallNoAllocCtPolicy = MallNoAllocCtAsSnsr; - m_settings.mallNoAllocCtSsrPolicy = MallNoAllocCtSsrAsSnsr; - } + m_settings.disableSingleMipAnisoOverride = false; - // Don't enable DCC for color attachments aside from those listed in the app_resource_optimizer - m_settings.forceEnableDcc = ForceDccDefault; + if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_3) + { + // Mall no alloc settings give a 2.91% gain + m_settings.mallNoAllocCtPolicy = MallNoAllocCtAsSnsr; + m_settings.mallNoAllocCtSsrPolicy = MallNoAllocCtSsrAsSnsr; } - if (appProfile == AppProfile::WolfensteinYoungblood) - { - m_settings.overrideHeapGartCacheableToUswc = true; + // Don't enable DCC for color attachments aside from those listed in the app_resource_optimizer + m_settings.forceEnableDcc = ForceDccDefault; + } - if (pInfo->gpuType == Pal::GpuType::Discrete) - { - m_settings.cmdAllocatorDataHeap = Pal::GpuHeapLocal; - m_settings.cmdAllocatorEmbeddedHeap = Pal::GpuHeapLocal; - } + if (appProfile == AppProfile::WolfensteinYoungblood) + { + m_settings.overrideHeapGartCacheableToUswc = true; - // Don't enable DCC for color attachments aside from those listed in the app_resource_optimizer - if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_1) - { - m_settings.forceEnableDcc = ForceDccDefault; - } + if (pInfo->gpuType == Pal::GpuType::Discrete) + { + m_settings.cmdAllocatorDataHeap = Pal::GpuHeapLocal; + m_settings.cmdAllocatorEmbeddedHeap = Pal::GpuHeapLocal; + } + + // Don't enable DCC for color attachments aside from those listed in the app_resource_optimizer + if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_1) + { + m_settings.forceEnableDcc = ForceDccDefault; + } - if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_3) + if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_3) + { + if (pInfo->revision == Pal::AsicRevision::Navi21) { - if (pInfo->revision == Pal::AsicRevision::Navi21) - { - } } + } #if VKI_BUILD_GFX11 - else if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) - { + else if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) + { #if VKI_BUILD_NAVI31 - if (pInfo->revision == Pal::AsicRevision::Navi31) + if (pInfo->revision == Pal::AsicRevision::Navi31) + { { - { - m_settings.mallNoAllocCtPolicy = MallNoAllocCtAsSnsr; - m_settings.mallNoAllocSsrPolicy = MallNoAllocSsrAsSnsr; - } + m_settings.mallNoAllocCtPolicy = MallNoAllocCtAsSnsr; + m_settings.mallNoAllocSsrPolicy = MallNoAllocSsrAsSnsr; } -#endif } #endif } +#endif + } - if ((appProfile == AppProfile::WolfensteinII) || - (appProfile == AppProfile::WolfensteinYoungblood)) - { - m_settings.enableSpvPerfOptimal = true; + if ((appProfile == AppProfile::WolfensteinII) || + (appProfile == AppProfile::WolfensteinYoungblood)) + { + m_settings.enableSpvPerfOptimal = true; - m_settings.optColorTargetUsageDoesNotContainResolveLayout = true; + m_settings.optColorTargetUsageDoesNotContainResolveLayout = true; - m_settings.barrierFilterOptions = SkipStrayExecutionDependencies | - SkipImageLayoutUndefined; + m_settings.barrierFilterOptions = SkipStrayExecutionDependencies | + SkipImageLayoutUndefined; - m_settings.modifyResourceKeyForAppProfile = true; - m_settings.forceImageSharingMode = ForceImageSharingMode::ForceImageSharingModeExclusive; + m_settings.modifyResourceKeyForAppProfile = true; + m_settings.forceImageSharingMode = ForceImageSharingMode::ForceImageSharingModeExclusive; - m_settings.asyncComputeQueueLimit = 1; + m_settings.asyncComputeQueueLimit = 1; - // id games are known to query instance-level functions with vkGetDeviceProcAddr illegally thus we - // can't do any better than returning a non-null function pointer for them. - m_settings.lenientInstanceFuncQuery = true; - } + // id games are known to query instance-level functions with vkGetDeviceProcAddr illegally thus we + // can't do any better than returning a non-null function pointer for them. + m_settings.lenientInstanceFuncQuery = true; + } + + if (((appProfile == AppProfile::WolfensteinII) || + (appProfile == AppProfile::WolfensteinYoungblood) || + (appProfile == AppProfile::Doom)) && + ((pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_1) || + (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_3))) + { + m_settings.asyncComputeQueueMaxWavesPerCu = 20; + m_settings.nggSubgroupSizing = NggSubgroupExplicit; + m_settings.nggVertsPerSubgroup = 254; + m_settings.nggPrimsPerSubgroup = 128; + } + + if (appProfile == AppProfile::WorldWarZ) + { + // This application oversubscribes on 4 GB cards during ALT+TAB + m_settings.memoryDeviceOverallocationAllowed = true; + + m_settings.reportSuboptimalPresentAsOutOfDate = true; - if (((appProfile == AppProfile::WolfensteinII) || - (appProfile == AppProfile::WolfensteinYoungblood) || - (appProfile == AppProfile::Doom)) && - ((pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_1) || - (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_3))) + if (pInfo->revision != Pal::AsicRevision::Navi21) { - m_settings.asyncComputeQueueMaxWavesPerCu = 20; - m_settings.nggSubgroupSizing = NggSubgroupExplicit; - m_settings.nggVertsPerSubgroup = 254; - m_settings.nggPrimsPerSubgroup = 128; + m_settings.optimizeCmdbufMode = EnableOptimizeCmdbuf; } - if (appProfile == AppProfile::WorldWarZ) { - // This application oversubscribes on 4 GB cards during ALT+TAB - m_settings.memoryDeviceOverallocationAllowed = true; + m_settings.forceEnableDcc = (ForceDccFor64BppShaderStorage | + ForceDccForNonColorAttachmentShaderStorage | + ForceDccForColorAttachments | + ForceDccFor2DShaderStorage); + } - m_settings.reportSuboptimalPresentAsOutOfDate = true; + // Mall no alloc setting gives a ~0.82% gain + if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_3) + { + m_settings.csWaveSize = 64; + m_settings.fsWaveSize = 64; - if (pInfo->revision != Pal::AsicRevision::Navi21) + if (pInfo->revision == Pal::AsicRevision::Navi21) { - m_settings.optimizeCmdbufMode = EnableOptimizeCmdbuf; - } + m_settings.forceEnableDcc = (ForceDccFor64BppShaderStorage | + ForceDccFor32BppShaderStorage | + ForceDccForNonColorAttachmentShaderStorage | + ForceDccForColorAttachments | + ForceDccFor3DShaderStorage); - // WWZ performs worse with DCC forced on, so just let the PAL heuristics decide what's best for now. - m_settings.forceEnableDcc = (ForceDccFor64BppShaderStorage | - ForceDccForNonColorAttachmentShaderStorage | - ForceDccForColorAttachments | - ForceDccFor2DShaderStorage); + m_settings.mallNoAllocCtSsrPolicy = MallNoAllocCtSsrAsSnsr; + } - // Mall no alloc setting gives a ~0.82% gain - if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_3) + if (pInfo->revision == Pal::AsicRevision::Navi22) { - m_settings.csWaveSize = 64; - m_settings.fsWaveSize = 64; - - if (pInfo->revision == Pal::AsicRevision::Navi21) - { - m_settings.forceEnableDcc = (ForceDccFor64BppShaderStorage | - ForceDccFor32BppShaderStorage | - ForceDccForNonColorAttachmentShaderStorage | - ForceDccForColorAttachments | - ForceDccFor3DShaderStorage); - - m_settings.mallNoAllocCtSsrPolicy = MallNoAllocCtSsrAsSnsr; - } - - if (pInfo->revision == Pal::AsicRevision::Navi22) - { - m_settings.forceEnableDcc = (ForceDccFor3DShaderStorage | - ForceDccForColorAttachments | - ForceDccForNonColorAttachmentShaderStorage | - ForceDccFor64BppShaderStorage); + m_settings.forceEnableDcc = (ForceDccFor3DShaderStorage | + ForceDccForColorAttachments | + ForceDccForNonColorAttachmentShaderStorage | + ForceDccFor64BppShaderStorage); - m_settings.mallNoAllocCtSsrPolicy = MallNoAllocCtSsrAsSnsr; - } + m_settings.mallNoAllocCtSsrPolicy = MallNoAllocCtSsrAsSnsr; + } - if (pInfo->revision == Pal::AsicRevision::Navi23) - { - m_settings.forceEnableDcc = (ForceDccFor32BppShaderStorage | - ForceDccForNonColorAttachmentShaderStorage | - ForceDccForColorAttachments | - ForceDccFor3DShaderStorage); - } + if (pInfo->revision == Pal::AsicRevision::Navi23) + { + m_settings.forceEnableDcc = (ForceDccFor32BppShaderStorage | + ForceDccForNonColorAttachmentShaderStorage | + ForceDccForColorAttachments | + ForceDccFor3DShaderStorage); + } - if (pInfo->revision == Pal::AsicRevision::Navi24) - { - m_settings.forceEnableDcc = (ForceDccFor64BppShaderStorage | - ForceDccForNonColorAttachmentShaderStorage | - ForceDccForColorAttachments | - ForceDccFor3DShaderStorage); + if (pInfo->revision == Pal::AsicRevision::Navi24) + { + m_settings.forceEnableDcc = (ForceDccFor64BppShaderStorage | + ForceDccForNonColorAttachmentShaderStorage | + ForceDccForColorAttachments | + ForceDccFor3DShaderStorage); - m_settings.mallNoAllocCtPolicy = MallNoAllocCtAsSnsr; - } + m_settings.mallNoAllocCtPolicy = MallNoAllocCtAsSnsr; } + } #if VKI_BUILD_GFX11 - if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) - { - m_settings.resourceBarrierOptions &= ~ResourceBarrierOptions::SkipDstCacheInv; - } + if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) + { + m_settings.resourceBarrierOptions &= ~ResourceBarrierOptions::SkipDstCacheInv; + } #endif - m_settings.implicitExternalSynchronization = false; - } + m_settings.implicitExternalSynchronization = false; + } - if (appProfile == AppProfile::WolfensteinCyberpilot) + if (appProfile == AppProfile::WolfensteinCyberpilot) + { + if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_3) { - if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_3) - { - m_settings.barrierFilterOptions = SkipImageLayoutUndefined; + m_settings.barrierFilterOptions = SkipImageLayoutUndefined; - } } + } - if (appProfile == AppProfile::IdTechEngine) - { - m_settings.enableSpvPerfOptimal = true; + if (appProfile == AppProfile::IdTechEngine) + { + m_settings.enableSpvPerfOptimal = true; - // id games are known to query instance-level functions with vkGetDeviceProcAddr illegally thus we - // can't do any better than returning a non-null function pointer for them. - m_settings.lenientInstanceFuncQuery = true; - } + // id games are known to query instance-level functions with vkGetDeviceProcAddr illegally thus we + // can't do any better than returning a non-null function pointer for them. + m_settings.lenientInstanceFuncQuery = true; + } - if (appProfile == AppProfile::Dota2) - { - pPalSettings->fastDepthStencilClearMode = Pal::FastDepthStencilClearMode::Graphics; + if (appProfile == AppProfile::Dota2) + { + pPalSettings->fastDepthStencilClearMode = Pal::FastDepthStencilClearMode::Graphics; - m_settings.disableSmallSurfColorCompressionSize = 511; + m_settings.disableSmallSurfColorCompressionSize = 511; - m_settings.preciseAnisoMode = DisablePreciseAnisoAll; - m_settings.useAnisoThreshold = true; - m_settings.anisoThreshold = 1.0f; + m_settings.preciseAnisoMode = DisablePreciseAnisoAll; + m_settings.useAnisoThreshold = true; + m_settings.anisoThreshold = 1.0f; - m_settings.disableMsaaStencilShaderRead = true; + m_settings.disableMsaaStencilShaderRead = true; - // Disable image type checking on Navi10 to avoid 2% loss. - m_settings.disableImageResourceTypeCheck = true; + // Disable image type checking on Navi10 to avoid 2% loss. + m_settings.disableImageResourceTypeCheck = true; - } + } - if (appProfile == AppProfile::CSGO) + if (appProfile == AppProfile::CSGO) + { + if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_3) { - if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_3) - { - - if (pInfo->revision == Pal::AsicRevision::Navi21) - { - m_settings.csWaveSize = 32; - m_settings.fsWaveSize = 32; - m_settings.mallNoAllocCtPolicy = MallNoAllocCtPolicy::MallNoAllocCtAsSnsr; - m_settings.mallNoAllocSsrPolicy = MallNoAllocSsrPolicy::MallNoAllocSsrAsSnsr; - } + if (pInfo->revision == Pal::AsicRevision::Navi21) + { + m_settings.csWaveSize = 32; + m_settings.fsWaveSize = 32; - if (pInfo->revision == Pal::AsicRevision::Navi22) - { - m_settings.mallNoAllocDsPolicy = MallNoAllocDsPolicy::MallNoAllocDsAsSnsr; - m_settings.mallNoAllocCtPolicy = MallNoAllocCtPolicy::MallNoAllocCtAsSnsr; - } + m_settings.mallNoAllocCtPolicy = MallNoAllocCtPolicy::MallNoAllocCtAsSnsr; + m_settings.mallNoAllocSsrPolicy = MallNoAllocSsrPolicy::MallNoAllocSsrAsSnsr; + } - if (pInfo->revision == Pal::AsicRevision::Navi23) - { - m_settings.mallNoAllocCtSsrPolicy = MallNoAllocCtSsrPolicy::MallNoAllocCtSsrAsSnsr; - m_settings.mallNoAllocSsrPolicy = MallNoAllocSsrPolicy::MallNoAllocSsrAsSnsr; - } + if (pInfo->revision == Pal::AsicRevision::Navi22) + { + m_settings.mallNoAllocDsPolicy = MallNoAllocDsPolicy::MallNoAllocDsAsSnsr; + m_settings.mallNoAllocCtPolicy = MallNoAllocCtPolicy::MallNoAllocCtAsSnsr; + } - if (pInfo->revision == Pal::AsicRevision::Navi24) - { - m_settings.csWaveSize = 64; - m_settings.mallNoAllocSsrPolicy = MallNoAllocSsrPolicy::MallNoAllocSsrAsSnsr; - } + if (pInfo->revision == Pal::AsicRevision::Navi23) + { + m_settings.mallNoAllocCtSsrPolicy = MallNoAllocCtSsrPolicy::MallNoAllocCtSsrAsSnsr; + m_settings.mallNoAllocSsrPolicy = MallNoAllocSsrPolicy::MallNoAllocSsrAsSnsr; } -#if VKI_BUILD_GFX11 - if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) + if (pInfo->revision == Pal::AsicRevision::Navi24) { + m_settings.csWaveSize = 64; m_settings.mallNoAllocSsrPolicy = MallNoAllocSsrPolicy::MallNoAllocSsrAsSnsr; - m_settings.ac01WaNotNeeded = true; + } + } - if (pInfo->gpuType == Pal::GpuType::Discrete) - { - m_settings.rpmViewsBypassMall = RpmViewBypassMall::RpmViewBypassMallOnCbDbWrite | - RpmViewBypassMall::RpmViewBypassMallOnRead; - } +#if VKI_BUILD_GFX11 + if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) + { + m_settings.mallNoAllocSsrPolicy = MallNoAllocSsrPolicy::MallNoAllocSsrAsSnsr; + m_settings.ac01WaNotNeeded = true; + + if (pInfo->gpuType == Pal::GpuType::Discrete) + { + m_settings.rpmViewsBypassMall = RpmViewBypassMall::RpmViewBypassMallOnCbDbWrite | + RpmViewBypassMall::RpmViewBypassMallOnRead; + } #if VKI_BUILD_NAVI31 - if (pInfo->revision == Pal::AsicRevision::Navi31) - { - m_settings.mallNoAllocCtPolicy = MallNoAllocCtPolicy::MallNoAllocCtAsSnsr; - } + if (pInfo->revision == Pal::AsicRevision::Navi31) + { + m_settings.mallNoAllocCtPolicy = MallNoAllocCtPolicy::MallNoAllocCtAsSnsr; + } #endif #if VKI_BUILD_NAVI32 - if (pInfo->revision == Pal::AsicRevision::Navi32) - { - m_settings.mallNoAllocCtPolicy = MallNoAllocCtPolicy::MallNoAllocCtAsSnsr; - } + if (pInfo->revision == Pal::AsicRevision::Navi32) + { + m_settings.mallNoAllocCtPolicy = MallNoAllocCtPolicy::MallNoAllocCtAsSnsr; + } #endif #if VKI_BUILD_NAVI33 - if (pInfo->revision == Pal::AsicRevision::Navi33) - { - m_settings.mallNoAllocCtSsrPolicy = MallNoAllocCtSsrPolicy::MallNoAllocCtSsrAsSnsr; - } -#endif + if (pInfo->revision == Pal::AsicRevision::Navi33) + { + m_settings.mallNoAllocCtSsrPolicy = MallNoAllocCtSsrPolicy::MallNoAllocCtSsrAsSnsr; } #endif - - m_settings.enableUberFetchShader = true; } +#endif - if (appProfile == AppProfile::Source2Engine) - { - pPalSettings->fastDepthStencilClearMode = Pal::FastDepthStencilClearMode::Graphics; + m_settings.enableUberFetchShader = true; + } - m_settings.disableSmallSurfColorCompressionSize = 511; + if (appProfile == AppProfile::Source2Engine) + { + pPalSettings->fastDepthStencilClearMode = Pal::FastDepthStencilClearMode::Graphics; - m_settings.preciseAnisoMode = DisablePreciseAnisoAll; - m_settings.useAnisoThreshold = true; - m_settings.anisoThreshold = 1.0f; + m_settings.disableSmallSurfColorCompressionSize = 511; - m_settings.disableMsaaStencilShaderRead = true; - } + m_settings.preciseAnisoMode = DisablePreciseAnisoAll; + m_settings.useAnisoThreshold = true; + m_settings.anisoThreshold = 1.0f; - if (appProfile == AppProfile::Talos) - { - m_settings.preciseAnisoMode = DisablePreciseAnisoAll; - m_settings.optImgMaskToApplyShaderReadUsageForTransferSrc = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; + m_settings.disableMsaaStencilShaderRead = true; + } - m_settings.forceDepthClampBasedOnZExport = true; + if (appProfile == AppProfile::Talos) + { + m_settings.preciseAnisoMode = DisablePreciseAnisoAll; + m_settings.optImgMaskToApplyShaderReadUsageForTransferSrc = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT; - m_settings.clampMaxImageSize = 16384u; - } + m_settings.forceDepthClampBasedOnZExport = true; - if (appProfile == AppProfile::SeriousSamFusion) - { - m_settings.preciseAnisoMode = DisablePreciseAnisoAll; - m_settings.useAnisoThreshold = true; - m_settings.anisoThreshold = 1.0f; + m_settings.clampMaxImageSize = 16384u; + } - m_settings.clampMaxImageSize = 16384u; - } + if (appProfile == AppProfile::SeriousSamFusion) + { + m_settings.preciseAnisoMode = DisablePreciseAnisoAll; + m_settings.useAnisoThreshold = true; + m_settings.anisoThreshold = 1.0f; - if ((appProfile == AppProfile::TalosVR) || - (appProfile == AppProfile::SeriousSamVrTheLastHope) || - (appProfile == AppProfile::SedpEngine)) - { - m_settings.clampMaxImageSize = 16384u; - } + m_settings.clampMaxImageSize = 16384u; + } + + if ((appProfile == AppProfile::TalosVR) || + (appProfile == AppProfile::SeriousSamVrTheLastHope) || + (appProfile == AppProfile::SedpEngine)) + { + m_settings.clampMaxImageSize = 16384u; + } + + if (appProfile == AppProfile::SeriousSam4) + { + m_settings.preciseAnisoMode = DisablePreciseAnisoAll; - if (appProfile == AppProfile::SeriousSam4) + if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_3) { - m_settings.preciseAnisoMode = DisablePreciseAnisoAll; + m_settings.forceEnableDcc = ForceDccDefault; + } - if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_3) - { - m_settings.forceEnableDcc = ForceDccDefault; - } + m_settings.enableWgpMode = Vkgc::ShaderStageBit::ShaderStageComputeBit; - m_settings.enableWgpMode = Vkgc::ShaderStageBit::ShaderStageComputeBit; + m_settings.clampMaxImageSize = 16384u; + } - m_settings.clampMaxImageSize = 16384u; + if (appProfile == AppProfile::KnockoutCity) + { + if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_1) + { + m_settings.forceEnableDcc = (ForceDccFor3DShaderStorage | + ForceDccForColorAttachments | + ForceDccForNonColorAttachmentShaderStorage| + ForceDccFor32BppShaderStorage); } - if (appProfile == AppProfile::KnockoutCity) + if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_3) { - if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_1) - { - m_settings.forceEnableDcc = (ForceDccFor3DShaderStorage | - ForceDccForColorAttachments | - ForceDccForNonColorAttachmentShaderStorage| - ForceDccFor32BppShaderStorage); - } + m_settings.forceEnableDcc = (ForceDccFor3DShaderStorage | + ForceDccForColorAttachments | + ForceDccForNonColorAttachmentShaderStorage | + ForceDccFor32BppShaderStorage | + ForceDccFor64BppShaderStorage); - if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_3) + if (pInfo->revision == Pal::AsicRevision::Navi22) { - m_settings.forceEnableDcc = (ForceDccFor3DShaderStorage | - ForceDccForColorAttachments | - ForceDccForNonColorAttachmentShaderStorage | - ForceDccFor32BppShaderStorage | - ForceDccFor64BppShaderStorage); - - if (pInfo->revision == Pal::AsicRevision::Navi22) - { - m_settings.mallNoAllocSsrPolicy = MallNoAllocSsrAsSnsr; - m_settings.mallNoAllocCtSsrPolicy = MallNoAllocCtSsrAsSnsr; - } + m_settings.mallNoAllocSsrPolicy = MallNoAllocSsrAsSnsr; + m_settings.mallNoAllocCtSsrPolicy = MallNoAllocCtSsrAsSnsr; } } - if (appProfile == AppProfile::EvilGenius2) - { - m_settings.preciseAnisoMode = DisablePreciseAnisoAll; - m_settings.csWaveSize = 64; - m_settings.fsWaveSize = 64; - - if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_3) - { - if (pInfo->revision == Pal::AsicRevision::Navi21) - { - m_settings.mallNoAllocCtPolicy = MallNoAllocCtAsSnsr; - m_settings.mallNoAllocCtSsrPolicy = MallNoAllocCtSsrAsSnsr; - m_settings.enableWgpMode = Vkgc::ShaderStageBit::ShaderStageComputeBit; - } - } + } + if (appProfile == AppProfile::EvilGenius2) + { + m_settings.preciseAnisoMode = DisablePreciseAnisoAll; + m_settings.csWaveSize = 64; + m_settings.fsWaveSize = 64; - if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_1) + if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_3) + { + if (pInfo->revision == Pal::AsicRevision::Navi21) { + m_settings.mallNoAllocCtPolicy = MallNoAllocCtAsSnsr; + m_settings.mallNoAllocCtSsrPolicy = MallNoAllocCtSsrAsSnsr; m_settings.enableWgpMode = Vkgc::ShaderStageBit::ShaderStageComputeBit; } } - if (appProfile == AppProfile::QuakeEnhanced) + if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_1) { - // Originally applied to QuakeRemastered - this setting applies to QuakeEnhanced now since it's an update - // to the same game. - m_settings.disableDisplayDcc = DisplayableDcc::DisplayableDccDisabled; + m_settings.enableWgpMode = Vkgc::ShaderStageBit::ShaderStageComputeBit; + } + } + + if (appProfile == AppProfile::QuakeEnhanced) + { + // Originally applied to QuakeRemastered - this setting applies to QuakeEnhanced now since it's an update + // to the same game. + m_settings.disableDisplayDcc = DisplayableDcc::DisplayableDccDisabled; #if VKI_BUILD_GFX11 - if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) - { - m_settings.forcePwsMode = PwsMode::NoLateAcquirePoint; - } -#endif + if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) + { + m_settings.forcePwsMode = PwsMode::NoLateAcquirePoint; } +#endif + } + + if (appProfile == AppProfile::SedpEngine) + { + m_settings.preciseAnisoMode = DisablePreciseAnisoAll; + } - if (appProfile == AppProfile::SedpEngine) + if (appProfile == AppProfile::StrangeBrigade) + { + + if (pInfo->gpuType == Pal::GpuType::Discrete) { - m_settings.preciseAnisoMode = DisablePreciseAnisoAll; + m_settings.overrideHeapChoiceToLocal = OverrideChoiceForGartUswc; + m_settings.cmdAllocatorDataHeap = Pal::GpuHeapLocal; + m_settings.cmdAllocatorEmbeddedHeap = Pal::GpuHeapLocal; } - if (appProfile == AppProfile::StrangeBrigade) + if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_1) { + m_settings.forceEnableDcc = (ForceDccFor2DShaderStorage | + ForceDccFor3DShaderStorage | + ForceDccForColorAttachments | + ForceDccFor64BppShaderStorage); - if (pInfo->gpuType == Pal::GpuType::Discrete) - { - m_settings.overrideHeapChoiceToLocal = OverrideChoiceForGartUswc; - m_settings.cmdAllocatorDataHeap = Pal::GpuHeapLocal; - m_settings.cmdAllocatorEmbeddedHeap = Pal::GpuHeapLocal; - } - - if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_1) - { - m_settings.forceEnableDcc = (ForceDccFor2DShaderStorage | - ForceDccFor3DShaderStorage | - ForceDccForColorAttachments | - ForceDccFor64BppShaderStorage); - - m_settings.enableNgg = 0x0; - } + m_settings.enableNgg = 0x0; + } - if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_3) - { + if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_3) + { - m_settings.disableDisplayDcc = DisplayableDcc::DisplayableDccDisabledForMgpu; + m_settings.disableDisplayDcc = DisplayableDcc::DisplayableDccDisabledForMgpu; - m_settings.overrideWgpMode = WgpMode::WgpModeWgp; - m_settings.csWaveSize = 64; - m_settings.fsWaveSize = 64; - m_settings.forceEnableDcc = (ForceDccFor3DShaderStorage | - ForceDccForColorAttachments | - ForceDccFor32BppShaderStorage); - } + m_settings.overrideWgpMode = WgpMode::WgpModeWgp; + m_settings.csWaveSize = 64; + m_settings.fsWaveSize = 64; + m_settings.forceEnableDcc = (ForceDccFor3DShaderStorage | + ForceDccForColorAttachments | + ForceDccFor32BppShaderStorage); + } #if VKI_BUILD_GFX11 - else if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) - { + else if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) + { #if VKI_BUILD_NAVI31 - if (pInfo->revision == Pal::AsicRevision::Navi31) - { - m_settings.mallNoAllocDsPolicy = MallNoAllocDsAsSnsr; - m_settings.pipelineBinningMode = PipelineBinningModeEnable; - } -#endif + if (pInfo->revision == Pal::AsicRevision::Navi31) + { + m_settings.mallNoAllocDsPolicy = MallNoAllocDsAsSnsr; + m_settings.pipelineBinningMode = PipelineBinningModeEnable; } #endif } +#endif + } - if (appProfile == AppProfile::ZombieArmy4) - { - - if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_1) - { - m_settings.forceEnableDcc = (ForceDccFor2DShaderStorage | - ForceDccForColorAttachments | - ForceDccFor64BppShaderStorage); - - m_settings.enableNgg = 0x0; - m_settings.enableWgpMode = Vkgc::ShaderStageBit::ShaderStageComputeBit; - } - else if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_3) - { - m_settings.enableNgg = 0x3; - m_settings.nggEnableFrustumCulling = true; - m_settings.enableWgpMode = Vkgc::ShaderStageBit::ShaderStageComputeBit; - } - } + if (appProfile == AppProfile::ZombieArmy4) + { - if (appProfile == AppProfile::MadMax) + if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_1) { - m_settings.preciseAnisoMode = DisablePreciseAnisoAll; - m_settings.useAnisoThreshold = true; - m_settings.anisoThreshold = 1.0f; - m_settings.disableResetReleaseResources = true; - m_settings.implicitExternalSynchronization = false; - } + m_settings.forceEnableDcc = (ForceDccFor2DShaderStorage | + ForceDccForColorAttachments | + ForceDccFor64BppShaderStorage); - if (appProfile == AppProfile::F1_2017) - { - // F1 2017 performs worse with DCC forced on, so just let the PAL heuristics decide what's best for now. - m_settings.forceEnableDcc = ForceDccDefault; + m_settings.enableNgg = 0x0; + m_settings.enableWgpMode = Vkgc::ShaderStageBit::ShaderStageComputeBit; } - - if (appProfile == AppProfile::ThronesOfBritannia) + else if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_3) { - m_settings.disableHtileBasedMsaaRead = true; - m_settings.enableFullCopyDstOnly = true; + m_settings.enableNgg = 0x3; + m_settings.nggEnableFrustumCulling = true; + m_settings.enableWgpMode = Vkgc::ShaderStageBit::ShaderStageComputeBit; } + } - if (appProfile == AppProfile::DiRT4) - { - // DiRT 4 performs worse with DCC forced on, so just let the PAL heuristics decide what's best for now. - m_settings.forceEnableDcc = ForceDccDefault; - - m_settings.forceDepthClampBasedOnZExport = true; - } + if (appProfile == AppProfile::MadMax) + { + m_settings.preciseAnisoMode = DisablePreciseAnisoAll; + m_settings.useAnisoThreshold = true; + m_settings.anisoThreshold = 1.0f; + m_settings.disableResetReleaseResources = true; + m_settings.implicitExternalSynchronization = false; + } - if (appProfile == AppProfile::WarHammerII) - { - // WarHammer II performs worse with DCC forced on, so just let the PAL heuristics decide - // what's best for now. - m_settings.forceEnableDcc = ForceDccDefault; + if (appProfile == AppProfile::F1_2017) + { + // F1 2017 performs worse with DCC forced on, so just let the PAL heuristics decide what's best for now. + m_settings.forceEnableDcc = ForceDccDefault; + } - m_settings.ac01WaNotNeeded = true; - } + if (appProfile == AppProfile::ThronesOfBritannia) + { + m_settings.disableHtileBasedMsaaRead = true; + m_settings.enableFullCopyDstOnly = true; + } - if (appProfile == AppProfile::WarHammerIII) - { - m_settings.ac01WaNotNeeded = true; - } + if (appProfile == AppProfile::DiRT4) + { + // DiRT 4 performs worse with DCC forced on, so just let the PAL heuristics decide what's best for now. + m_settings.forceEnableDcc = ForceDccDefault; - if (appProfile == AppProfile::RainbowSixSiege) - { - m_settings.preciseAnisoMode = DisablePreciseAnisoAll; - m_settings.useAnisoThreshold = true; - m_settings.anisoThreshold = 1.0f; + m_settings.forceDepthClampBasedOnZExport = true; + } - // Ignore suboptimal swapchain size to fix crash on task switch - m_settings.ignoreSuboptimalSwapchainSize = true; + if (appProfile == AppProfile::WarHammerII) + { + // WarHammer II performs worse with DCC forced on, so just let the PAL heuristics decide + // what's best for now. + m_settings.forceEnableDcc = ForceDccDefault; - if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_1) - { - m_settings.forceEnableDcc = (ForceDccFor2DShaderStorage | - ForceDccForColorAttachments | - ForceDccForNonColorAttachmentShaderStorage); - } - else if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_3) - { - m_settings.nggEnableBackfaceCulling = false; - m_settings.nggEnableSmallPrimFilter = false; + m_settings.ac01WaNotNeeded = true; + } - if (pInfo->revision == Pal::AsicRevision::Navi23) - { - m_settings.overrideLocalHeapSizeInGBs = 8; - m_settings.memoryDeviceOverallocationAllowed = true; - } + if (appProfile == AppProfile::WarHammerIII) + { + m_settings.ac01WaNotNeeded = true; + } - if (pInfo->revision == Pal::AsicRevision::Navi24) - { - m_settings.forceEnableDcc = (ForceDccFor3DShaderStorage | - ForceDccForColorAttachments | - ForceDccForNonColorAttachmentShaderStorage | - ForceDccFor32BppShaderStorage); + if (appProfile == AppProfile::RainbowSixSiege) + { + m_settings.preciseAnisoMode = DisablePreciseAnisoAll; + m_settings.useAnisoThreshold = true; + m_settings.anisoThreshold = 1.0f; - m_settings.overrideLocalHeapSizeInGBs = 8; - m_settings.memoryDeviceOverallocationAllowed = true; - } - } + // Ignore suboptimal swapchain size to fix crash on task switch + m_settings.ignoreSuboptimalSwapchainSize = true; + if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_1) + { + m_settings.forceEnableDcc = (ForceDccFor2DShaderStorage | + ForceDccForColorAttachments | + ForceDccForNonColorAttachmentShaderStorage); } - - if (appProfile == AppProfile::Rage2) + else if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_3) { - //PM4 optimizations give us another 1.5% perf increase - m_settings.optimizeCmdbufMode = OptimizeCmdbufMode::EnableOptimizeCmdbuf; + m_settings.nggEnableBackfaceCulling = false; + m_settings.nggEnableSmallPrimFilter = false; - m_settings.enableAceShaderPrefetch = false; - - // Rage 2 currently has all it's images set to VK_SHARING_MODE_CONCURRENT. - // Forcing these images to use VK_SHARING_MODE_EXCLUSIVE gives us around 5% perf increase. - m_settings.forceImageSharingMode = ForceImageSharingMode::ForceImageSharingModeExclusiveForNonColorAttachments; - - // Disable image type checking to avoid 1% loss. - m_settings.disableImageResourceTypeCheck = true; - - m_settings.implicitExternalSynchronization = false; - - if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_1) + if (pInfo->revision == Pal::AsicRevision::Navi23) { - // Rage 2 performs worse with DCC forced on, so just let the PAL heuristics decide what's best for now. - m_settings.forceEnableDcc = ForceDccDefault; - + m_settings.overrideLocalHeapSizeInGBs = 8; + m_settings.memoryDeviceOverallocationAllowed = true; } - if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_3) + if (pInfo->revision == Pal::AsicRevision::Navi24) { - m_settings.pipelineBinningMode = PipelineBinningModeDisable; - m_settings.mallNoAllocCtPolicy = MallNoAllocCtAsSnsr; - m_settings.mallNoAllocSsrPolicy = MallNoAllocSsrAsSnsr; - m_settings.forceEnableDcc = (ForceDccFor3DShaderStorage | ForceDccForColorAttachments | ForceDccForNonColorAttachmentShaderStorage | - ForceDccFor32BppShaderStorage | - ForceDccFor64BppShaderStorage); - - if (pInfo->revision != Pal::AsicRevision::Navi21) - { - m_settings.forceEnableDcc |= ForceDccFor2DShaderStorage; - m_settings.mallNoAllocCtSsrPolicy = MallNoAllocCtSsrAsSnsr; - } + ForceDccFor32BppShaderStorage); - { - m_settings.csWaveSize = 64; - m_settings.fsWaveSize = 64; - } + m_settings.overrideLocalHeapSizeInGBs = 8; + m_settings.memoryDeviceOverallocationAllowed = true; } + } + } + + if (appProfile == AppProfile::RainbowSixExtraction) + { #if VKI_BUILD_GFX11 - else if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) - { - m_settings.pipelineBinningMode = PipelineBinningModeDisable; - m_settings.mallNoAllocCtPolicy = MallNoAllocCtAsSnsr; - m_settings.mallNoAllocSsrPolicy = MallNoAllocSsrAsSnsr; + if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) + { + m_settings.forceEnableDcc = (ForceDccFor2DShaderStorage | + ForceDccForColorAttachments | + ForceDccForNonColorAttachmentShaderStorage | + ForceDccFor32BppShaderStorage | + ForceDccFor64BppShaderStorage); - m_settings.forceEnableDcc = (ForceDccFor3DShaderStorage | - ForceDccForColorAttachments | - ForceDccForNonColorAttachmentShaderStorage | - ForceDccFor32BppShaderStorage | - ForceDccFor64BppShaderStorage); - } -#endif + m_settings.disableLoopUnrolls = true; + m_settings.forceCsThreadIdSwizzling = true; } +#endif + } - if (appProfile == AppProfile::RedDeadRedemption2) - { - m_settings.enableAcquireBeforeSignal = true; + if (appProfile == AppProfile::Rage2) + { + //PM4 optimizations give us another 1.5% perf increase + m_settings.optimizeCmdbufMode = OptimizeCmdbufMode::EnableOptimizeCmdbuf; - m_settings.limitSampleCounts = VK_SAMPLE_COUNT_1_BIT | - VK_SAMPLE_COUNT_2_BIT | - VK_SAMPLE_COUNT_4_BIT; + m_settings.enableAceShaderPrefetch = false; - if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_1) - { - } + // Rage 2 currently has all it's images set to VK_SHARING_MODE_CONCURRENT. + // Forcing these images to use VK_SHARING_MODE_EXCLUSIVE gives us around 5% perf increase. + m_settings.forceImageSharingMode = ForceImageSharingMode::ForceImageSharingModeExclusiveForNonColorAttachments; - // Force exclusive sharing mode - 2% gain - m_settings.forceImageSharingMode = ForceImageSharingMode::ForceImageSharingModeExclusive; - m_settings.implicitExternalSynchronization = false; + // Disable image type checking to avoid 1% loss. + m_settings.disableImageResourceTypeCheck = true; - if (pInfo->gfxLevel >= Pal::GfxIpLevel::GfxIp10_3) - { - m_settings.pipelineBinningMode = PipelineBinningModeDisable; - m_settings.mallNoAllocCtPolicy = MallNoAllocCtAsSnsr; - m_settings.mallNoAllocSsrPolicy = MallNoAllocSsrAsSnsr; - m_settings.forceEnableDcc = (ForceDccFor2DShaderStorage | - ForceDccFor3DShaderStorage | - ForceDccForColorAttachments | - ForceDccFor64BppShaderStorage); + m_settings.implicitExternalSynchronization = false; -#if VKI_BUILD_GFX11 - if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) - { - m_settings.forceEnableDcc |= ForceDccForNonColorAttachmentShaderStorage; - } -#endif - } + if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_1) + { + // Rage 2 performs worse with DCC forced on, so just let the PAL heuristics decide what's best for now. + m_settings.forceEnableDcc = ForceDccDefault; - m_settings.ac01WaNotNeeded = true; } - if (appProfile == AppProfile::GhostReconBreakpoint) + if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_3) { + m_settings.pipelineBinningMode = PipelineBinningModeDisable; + m_settings.mallNoAllocCtPolicy = MallNoAllocCtAsSnsr; + m_settings.mallNoAllocSsrPolicy = MallNoAllocSsrAsSnsr; - // Override the PAL default for 3D color attachments and storage images to match GFX9's, SW_R/z-slice order. - m_settings.imageTilingPreference3dGpuWritable = Pal::ImageTilingPattern::YMajor; + m_settings.forceEnableDcc = (ForceDccFor3DShaderStorage | + ForceDccForColorAttachments | + ForceDccForNonColorAttachmentShaderStorage | + ForceDccFor32BppShaderStorage | + ForceDccFor64BppShaderStorage); - if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_3) + if (pInfo->revision != Pal::AsicRevision::Navi21) { - m_settings.forceEnableDcc = (ForceDccFor2DShaderStorage | - ForceDccFor3DShaderStorage | - ForceDccForColorAttachments | - ForceDccForNonColorAttachmentShaderStorage | - ForceDccFor64BppShaderStorage); + m_settings.forceEnableDcc |= ForceDccFor2DShaderStorage; + m_settings.mallNoAllocCtSsrPolicy = MallNoAllocCtSsrAsSnsr; } - m_settings.implicitExternalSynchronization = false; - } - - if (appProfile == AppProfile::BaldursGate3) - { - if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_1) { m_settings.csWaveSize = 64; m_settings.fsWaveSize = 64; } + } - if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_3) - { - m_settings.csWaveSize = 64; - m_settings.fsWaveSize = 64; +#if VKI_BUILD_GFX11 + else if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) + { + m_settings.pipelineBinningMode = PipelineBinningModeDisable; + m_settings.mallNoAllocCtPolicy = MallNoAllocCtAsSnsr; + m_settings.mallNoAllocSsrPolicy = MallNoAllocSsrAsSnsr; - if (pInfo->revision == Pal::AsicRevision::Navi21) - { - m_settings.mallNoAllocDsPolicy = MallNoAllocDsAsSnsr; - m_settings.mallNoAllocCtSsrPolicy = MallNoAllocCtSsrAsSnsr; - m_settings.mallNoAllocSsrPolicy = MallNoAllocSsrAsSnsr; - } - else if (pInfo->revision == Pal::AsicRevision::Navi22) - { - m_settings.forceEnableDcc = (ForceDccFor2DShaderStorage | - ForceDccFor3DShaderStorage | - ForceDccForColorAttachments); + m_settings.forceEnableDcc = (ForceDccFor3DShaderStorage | + ForceDccForColorAttachments | + ForceDccForNonColorAttachmentShaderStorage | + ForceDccFor32BppShaderStorage | + ForceDccFor64BppShaderStorage); + } +#endif + } - m_settings.mallNoAllocDsPolicy = MallNoAllocDsAsSnsr; - m_settings.mallNoAllocCtSsrPolicy = MallNoAllocCtSsrAsSnsr; - m_settings.mallNoAllocSsrPolicy = MallNoAllocSsrAsSnsr; - } - else if (pInfo->revision == Pal::AsicRevision::Navi23) - { - m_settings.mallNoAllocCtPolicy = MallNoAllocCtAsSnsr; - m_settings.mallNoAllocDsPolicy = MallNoAllocDsAsSnsr; - } - else if (pInfo->revision == Pal::AsicRevision::Navi24) - { - m_settings.mallNoAllocCtPolicy = MallNoAllocCtAsSnsr; - m_settings.mallNoAllocDsPolicy = MallNoAllocDsAsSnsr; + if (appProfile == AppProfile::RedDeadRedemption2) + { + m_settings.enableAcquireBeforeSignal = true; - m_settings.memoryDeviceOverallocationAllowed = true; - } - } + m_settings.limitSampleCounts = VK_SAMPLE_COUNT_1_BIT | + VK_SAMPLE_COUNT_2_BIT | + VK_SAMPLE_COUNT_4_BIT; + + if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_1) + { + } + + // Force exclusive sharing mode - 2% gain + m_settings.forceImageSharingMode = ForceImageSharingMode::ForceImageSharingModeExclusive; + m_settings.implicitExternalSynchronization = false; + + if (pInfo->gfxLevel >= Pal::GfxIpLevel::GfxIp10_3) + { + m_settings.pipelineBinningMode = PipelineBinningModeDisable; + m_settings.mallNoAllocCtPolicy = MallNoAllocCtAsSnsr; + m_settings.mallNoAllocSsrPolicy = MallNoAllocSsrAsSnsr; + m_settings.forceEnableDcc = (ForceDccFor2DShaderStorage | + ForceDccFor3DShaderStorage | + ForceDccForColorAttachments | + ForceDccFor64BppShaderStorage); #if VKI_BUILD_GFX11 if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) { - m_settings.forcePwsMode = PwsMode::NoLateAcquirePoint; + m_settings.forceEnableDcc |= ForceDccForNonColorAttachmentShaderStorage; } #endif } -#if VKI_RAY_TRACING - if (appProfile == AppProfile::Quake2RTX) - { - m_settings.memoryDeviceOverallocationAllowed = true; + m_settings.ac01WaNotNeeded = true; + } - if (pInfo->gfxLevel >= Pal::GfxIpLevel::GfxIp10_3) - { - m_settings.disableDisplayDcc = DisplayableDcc::DisplayableDccDisabled; + if (appProfile == AppProfile::GhostReconBreakpoint) + { - m_settings.rtTriangleCompressionMode = NoTriangleCompression; + // Override the PAL default for 3D color attachments and storage images to match GFX9's, SW_R/z-slice order. + m_settings.imageTilingPreference3dGpuWritable = Pal::ImageTilingPattern::YMajor; - m_settings.useFlipHint = false; + if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_3) + { + m_settings.forceEnableDcc = (ForceDccFor2DShaderStorage | + ForceDccFor3DShaderStorage | + ForceDccForColorAttachments | + ForceDccForNonColorAttachmentShaderStorage | + ForceDccFor64BppShaderStorage); + } - m_settings.maxTotalSizeOfUnifiedShaders = UINT_MAX; + m_settings.implicitExternalSynchronization = false; + } + + if (appProfile == AppProfile::BaldursGate3) + { + if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_1) + { + m_settings.csWaveSize = 64; + m_settings.fsWaveSize = 64; + } + + if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_3) + { + m_settings.csWaveSize = 64; + m_settings.fsWaveSize = 64; + if (pInfo->revision == Pal::AsicRevision::Navi21) + { + m_settings.mallNoAllocDsPolicy = MallNoAllocDsAsSnsr; + m_settings.mallNoAllocCtSsrPolicy = MallNoAllocCtSsrAsSnsr; + m_settings.mallNoAllocSsrPolicy = MallNoAllocSsrAsSnsr; + } + else if (pInfo->revision == Pal::AsicRevision::Navi22) + { m_settings.forceEnableDcc = (ForceDccFor2DShaderStorage | ForceDccFor3DShaderStorage | - ForceDccForColorAttachments | - ForceDccFor64BppShaderStorage); + ForceDccForColorAttachments); + m_settings.mallNoAllocDsPolicy = MallNoAllocDsAsSnsr; + m_settings.mallNoAllocCtSsrPolicy = MallNoAllocCtSsrAsSnsr; + m_settings.mallNoAllocSsrPolicy = MallNoAllocSsrAsSnsr; } - -#if VKI_BUILD_GFX11 - if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) + else if (pInfo->revision == Pal::AsicRevision::Navi23) { - // Gives ~0.5% gain at 4k - m_settings.enableAceShaderPrefetch = false; + m_settings.mallNoAllocCtPolicy = MallNoAllocCtAsSnsr; + m_settings.mallNoAllocDsPolicy = MallNoAllocDsAsSnsr; + } + else if (pInfo->revision == Pal::AsicRevision::Navi24) + { + m_settings.mallNoAllocCtPolicy = MallNoAllocCtAsSnsr; + m_settings.mallNoAllocDsPolicy = MallNoAllocDsAsSnsr; + + m_settings.memoryDeviceOverallocationAllowed = true; } -#endif } - if (appProfile == AppProfile::ControlDX12) +#if VKI_BUILD_GFX11 + if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) { - if (pInfo->gfxLevel >= Pal::GfxIpLevel::GfxIp10_3) - { - m_settings.rtEnableCompilePipelineLibrary = false; - } + m_settings.forcePwsMode = PwsMode::NoLateAcquirePoint; + } +#endif + } + +#if VKI_RAY_TRACING + if (appProfile == AppProfile::Quake2RTX) + { + m_settings.memoryDeviceOverallocationAllowed = true; + + if (pInfo->gfxLevel >= Pal::GfxIpLevel::GfxIp10_3) + { + m_settings.disableDisplayDcc = DisplayableDcc::DisplayableDccDisabled; + + m_settings.rtTriangleCompressionMode = NoTriangleCompression; + + m_settings.useFlipHint = false; + + m_settings.maxTotalSizeOfUnifiedShaders = UINT_MAX; + + m_settings.forceEnableDcc = (ForceDccFor2DShaderStorage | + ForceDccFor3DShaderStorage | + ForceDccForColorAttachments | + ForceDccFor64BppShaderStorage); + + } #if VKI_BUILD_GFX11 - if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) - { - // Gives ~2.22% gain at 1080p - m_settings.enableAceShaderPrefetch = false; - } + if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) + { + // Gives ~0.5% gain at 4k + m_settings.enableAceShaderPrefetch = false; + } #endif + } + + if (appProfile == AppProfile::ControlDX12) + { + if (pInfo->gfxLevel >= Pal::GfxIpLevel::GfxIp10_3) + { + m_settings.rtEnableCompilePipelineLibrary = false; } - if (appProfile == AppProfile::RayTracingWeekends) +#if VKI_BUILD_GFX11 + if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) { + // Gives ~2.22% gain at 1080p + m_settings.enableAceShaderPrefetch = false; + } +#endif + } + + if (appProfile == AppProfile::RayTracingWeekends) + { #if VKI_BUILD_GFX11 - if ((pInfo->revision != Pal::AsicRevision::Navi31) + if ((pInfo->revision != Pal::AsicRevision::Navi31) #if VKI_BUILD_NAVI32 - && (pInfo->revision != Pal::AsicRevision::Navi32) + && (pInfo->revision != Pal::AsicRevision::Navi32) #endif - ) + ) #endif + { { - { - m_settings.rtUnifiedVgprLimit = 64; - } + m_settings.rtUnifiedVgprLimit = 64; } } + } #endif - if (appProfile == AppProfile::DoomEternal) - { - m_settings.barrierFilterOptions = SkipStrayExecutionDependencies; - - m_settings.modifyResourceKeyForAppProfile = true; - m_settings.forceImageSharingMode = ForceImageSharingMode::ForceImageSharingModeExclusive; + if (appProfile == AppProfile::DoomEternal) + { + m_settings.barrierFilterOptions = SkipStrayExecutionDependencies; - // PM4 optimizations give us 1% gain - m_settings.optimizeCmdbufMode = EnableOptimizeCmdbuf; + m_settings.modifyResourceKeyForAppProfile = true; + m_settings.forceImageSharingMode = ForceImageSharingMode::ForceImageSharingModeExclusive; - m_settings.enableSpvPerfOptimal = true; + // PM4 optimizations give us 1% gain + m_settings.optimizeCmdbufMode = EnableOptimizeCmdbuf; - // id games are known to query instance-level functions with vkGetDeviceProcAddr illegally thus we - // can't do any better than returning a non-null function pointer for them. - m_settings.lenientInstanceFuncQuery = true; + m_settings.enableSpvPerfOptimal = true; - m_settings.backgroundFullscreenIgnorePresentErrors = true; + // id games are known to query instance-level functions with vkGetDeviceProcAddr illegally thus we + // can't do any better than returning a non-null function pointer for them. + m_settings.lenientInstanceFuncQuery = true; - m_settings.implicitExternalSynchronization = false; + m_settings.backgroundFullscreenIgnorePresentErrors = true; - m_settings.alwaysReportHdrFormats = true; + m_settings.implicitExternalSynchronization = false; -#if VKI_RAY_TRACING - m_settings.indirectCallConvention = IndirectConvention0; -#endif + m_settings.alwaysReportHdrFormats = true; - if (pInfo->gpuType == Pal::GpuType::Discrete) - { - m_settings.cmdAllocatorDataHeap = Pal::GpuHeapLocal; - m_settings.cmdAllocatorEmbeddedHeap = Pal::GpuHeapLocal; - } + if (pInfo->gpuType == Pal::GpuType::Discrete) + { + m_settings.cmdAllocatorDataHeap = Pal::GpuHeapLocal; + m_settings.cmdAllocatorEmbeddedHeap = Pal::GpuHeapLocal; + } - // Coarse optimizations that apply to multiple GFXIPs go below - if (Util::IsPowerOfTwo(pInfo->gpuMemoryProperties.performance.vramBusBitWidth) == false) - { - m_settings.resourceBarrierOptions &= ~ResourceBarrierOptions::AvoidCpuMemoryCoher; - } + // Coarse optimizations that apply to multiple GFXIPs go below + if (Util::IsPowerOfTwo(pInfo->gpuMemoryProperties.performance.vramBusBitWidth) == false) + { + m_settings.resourceBarrierOptions &= ~ResourceBarrierOptions::AvoidCpuMemoryCoher; + } - if (pInfo->gfxLevel >= Pal::GfxIpLevel::GfxIp10_3) - { + if (pInfo->gfxLevel >= Pal::GfxIpLevel::GfxIp10_3) + { #if VKI_RAY_TRACING - m_settings.rtBvhBuildModeFastTrace = BvhBuildModeLinear; - m_settings.rtEnableTopDownBuild = false; - m_settings.plocRadius = 4; + m_settings.rtBvhBuildModeFastTrace = BvhBuildModeLinear; + m_settings.rtEnableTopDownBuild = false; + m_settings.plocRadius = 4; - // 13% Gain @ 4k - Allows overlapping builds - m_settings.enableAceShaderPrefetch = false; + // 13% Gain @ 4k - Allows overlapping builds + m_settings.enableAceShaderPrefetch = false; #endif - m_settings.enableWgpMode = Vkgc::ShaderStageBit::ShaderStageComputeBit; - } - - if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_1) - { - // Doom Eternal performs better when DCC is not forced on. 2% gain on 4k. - m_settings.forceEnableDcc = ForceDccDefault; + m_settings.enableWgpMode = Vkgc::ShaderStageBit::ShaderStageComputeBit; + } - // Doom Eternal performs better with NGG disabled (3% gain on 4k), likely because idTech runs it's own - // triangle culling and there are no options in the game to turn it off making NGG somewhat redundant. - m_settings.enableNgg = false; + if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_1) + { + // Doom Eternal performs better when DCC is not forced on. 2% gain on 4k. + m_settings.forceEnableDcc = ForceDccDefault; - m_settings.asyncComputeQueueMaxWavesPerCu = 20; + // Doom Eternal performs better with NGG disabled (3% gain on 4k), likely because idTech runs it's own + // triangle culling and there are no options in the game to turn it off making NGG somewhat redundant. + m_settings.enableNgg = false; - m_settings.enableWgpMode = Vkgc::ShaderStageBit::ShaderStageComputeBit; + m_settings.asyncComputeQueueMaxWavesPerCu = 20; - m_settings.csWaveSize = 64; - } - else if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_3) - { - m_settings.asyncComputeQueueMaxWavesPerCu = 20; - m_settings.mallNoAllocSsrPolicy = MallNoAllocSsrAsSnsr; + m_settings.enableWgpMode = Vkgc::ShaderStageBit::ShaderStageComputeBit; - if (pInfo->revision != Pal::AsicRevision::Navi21) - { - m_settings.mallNoAllocCtPolicy = MallNoAllocCtAsSnsr; - m_settings.mallNoAllocCtSsrPolicy = MallNoAllocCtSsrAsSnsr; - } + m_settings.csWaveSize = 64; + } + else if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_3) + { + m_settings.asyncComputeQueueMaxWavesPerCu = 20; + m_settings.mallNoAllocSsrPolicy = MallNoAllocSsrAsSnsr; - m_settings.csWaveSize = 64; + if (pInfo->revision != Pal::AsicRevision::Navi21) + { + m_settings.mallNoAllocCtPolicy = MallNoAllocCtAsSnsr; + m_settings.mallNoAllocCtSsrPolicy = MallNoAllocCtSsrAsSnsr; } + + m_settings.csWaveSize = 64; + } #if VKI_BUILD_GFX11 - else if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) - { - // Navi31 Mall and Tiling Settings - if ((pInfo->revision == Pal::AsicRevision::Navi31) + else if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) + { + // Navi31 Mall and Tiling Settings + if ((pInfo->revision == Pal::AsicRevision::Navi31) #if VKI_BUILD_NAVI32 - || (pInfo->revision == Pal::AsicRevision::Navi32) + || (pInfo->revision == Pal::AsicRevision::Navi32) #endif - ) - { - // Mall no alloc settings give a ~1% gain - m_settings.mallNoAllocCtPolicy = MallNoAllocCtAsSnsr; - m_settings.mallNoAllocCtSsrPolicy = MallNoAllocCtSsrAsSnsr; - m_settings.mallNoAllocSsrPolicy = MallNoAllocSsrAsSnsr; + ) + { + // Mall no alloc settings give a ~1% gain + m_settings.mallNoAllocCtPolicy = MallNoAllocCtAsSnsr; + m_settings.mallNoAllocCtSsrPolicy = MallNoAllocCtSsrAsSnsr; + m_settings.mallNoAllocSsrPolicy = MallNoAllocSsrAsSnsr; - // This provides ~6% gain at 4k - m_settings.imageTilingPreference3dGpuWritable = Pal::ImageTilingPattern::YMajor; - } + // This provides ~6% gain at 4k + m_settings.imageTilingPreference3dGpuWritable = Pal::ImageTilingPattern::YMajor; } -#endif } +#endif + } - if (appProfile == AppProfile::IdTechLauncher) - { - m_settings.enableInternalPipelineCachingToDisk = false; - } + if (appProfile == AppProfile::IdTechLauncher) + { + m_settings.enableInternalPipelineCachingToDisk = false; + } - if (appProfile == AppProfile::SaschaWillemsExamples) - { - m_settings.forceDepthClampBasedOnZExport = true; - } + if (appProfile == AppProfile::SaschaWillemsExamples) + { + m_settings.forceDepthClampBasedOnZExport = true; + } - if ((appProfile == AppProfile::DxvkHaloInfiniteLauncher) || - (appProfile == AppProfile::DxvkTf2) + if ((appProfile == AppProfile::DxvkHaloInfiniteLauncher) || + (appProfile == AppProfile::DxvkTf2) #ifndef ICD_X64_BUILD - || (appProfile == AppProfile::DXVK) + || (appProfile == AppProfile::DXVK) #endif - ) - { - // DXVK Tropic4, GTA4, Halo Infinite Launcher page fault when GPL is enabled. - // It looks incorrect pipeline layout is used. Force indirect can make optimized pipeline layout compatible - // with fast-linked pipeline. - m_settings.pipelineLayoutSchemeSelectionStrategy = PipelineLayoutSchemeSelectionStrategy::ForceIndirect; + ) + { + // DXVK Tropic4, GTA4, Halo Infinite Launcher page fault when GPL is enabled. + // It looks incorrect pipeline layout is used. Force indirect can make optimized pipeline layout compatible + // with fast-linked pipeline. + m_settings.pipelineLayoutSchemeSelectionStrategy = PipelineLayoutSchemeSelectionStrategy::ForceIndirect; + + // It results from incorrect behavior of DXVK. Incompatible push constant size leads to Gpu page fault + // during fast link in pipeline creation. + m_settings.pipelineLayoutPushConstantCompatibilityCheck = true; + } - // It results from incorrect behavior of DXVK. Incompatible push constant size leads to Gpu page fault - // during fast link in pipeline creation. - m_settings.pipelineLayoutPushConstantCompatibilityCheck = true; - } + if (appProfile == AppProfile::AshesOfTheSingularity) + { + // Disable image type checking on Navi10 to avoid 2.5% loss in Ashes + m_settings.disableImageResourceTypeCheck = true; + m_settings.overrideUndefinedLayoutToTransferSrcOptimal = true; + } - if (appProfile == AppProfile::AshesOfTheSingularity) + if (appProfile == AppProfile::DetroitBecomeHuman) + { + // Disable image type checking on Navi10 to avoid 1.5% loss in Detroit + m_settings.disableImageResourceTypeCheck = true; + + // This restores previous driver behavior where depth compression was disabled for VK_IMAGE_LAYOUT_GENERAL. + // There is an image memory barrier missing to synchronize DB metadata and L2 causing hair corruption in + // some scenes. + m_settings.forceResolveLayoutForDepthStencilTransferUsage = true; + + if (Util::IsPowerOfTwo(pInfo->gpuMemoryProperties.performance.vramBusBitWidth) == false) { - // Disable image type checking on Navi10 to avoid 2.5% loss in Ashes - m_settings.disableImageResourceTypeCheck = true; - m_settings.overrideUndefinedLayoutToTransferSrcOptimal = true; + m_settings.resourceBarrierOptions &= ~ResourceBarrierOptions::AvoidCpuMemoryCoher; } + m_settings.skipUnMapMemory = true; + } - if (appProfile == AppProfile::DetroitBecomeHuman) - { - // Disable image type checking on Navi10 to avoid 1.5% loss in Detroit - m_settings.disableImageResourceTypeCheck = true; + if (appProfile == AppProfile::WarThunder) + { + // A larger minImageCount can get a huge performance gain for game WarThunder. + m_settings.forceMinImageCount = 3; - // This restores previous driver behavior where depth compression was disabled for VK_IMAGE_LAYOUT_GENERAL. - // There is an image memory barrier missing to synchronize DB metadata and L2 causing hair corruption in - // some scenes. - m_settings.forceResolveLayoutForDepthStencilTransferUsage = true; + m_settings.enableDumbTransitionSync = false; + m_settings.forceDisableGlobalBarrierCacheSync = true; + } - if (Util::IsPowerOfTwo(pInfo->gpuMemoryProperties.performance.vramBusBitWidth) == false) - { - m_settings.resourceBarrierOptions &= ~ResourceBarrierOptions::AvoidCpuMemoryCoher; - } - m_settings.skipUnMapMemory = true; - } + if (appProfile == AppProfile::MetroExodus) + { + // A larger minImageCount can get a performance gain for game Metro Exodus. + m_settings.forceMinImageCount = 3; - if (appProfile == AppProfile::WarThunder) +#if VKI_BUILD_GFX11 + if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) { - // A larger minImageCount can get a huge performance gain for game WarThunder. - m_settings.forceMinImageCount = 3; + // Gives ~0.9% gain at 1080p + m_settings.enableAceShaderPrefetch = false; + } +#endif + } - m_settings.enableDumbTransitionSync = false; - m_settings.forceDisableGlobalBarrierCacheSync = true; + if (appProfile == AppProfile::X4Foundations) + { + if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_3) + { + m_settings.disableHtileBasedMsaaRead = true; } + } + + if (appProfile == AppProfile::SHARK) + { + m_settings.initializeVramToZero = false; + } + + if (appProfile == AppProfile::Valheim) + { + m_settings.disableDisplayDcc = DisplayableDcc::DisplayableDccDisabled; - if (appProfile == AppProfile::MetroExodus) + if (pInfo->gfxLevel >= Pal::GfxIpLevel::GfxIp10_3) { - // A larger minImageCount can get a performance gain for game Metro Exodus. - m_settings.forceMinImageCount = 3; + m_settings.csWaveSize = 32; + m_settings.fsWaveSize = 64; -#if VKI_BUILD_GFX11 - if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) + if (pInfo->revision == Pal::AsicRevision::Navi21) { - // Gives ~0.9% gain at 1080p - m_settings.enableAceShaderPrefetch = false; + m_settings.mallNoAllocCtPolicy = MallNoAllocCtAsSnsr; } -#endif - } + else if (pInfo->revision == Pal::AsicRevision::Navi22) + { + m_settings.forceEnableDcc = (ForceDccFor2DShaderStorage | + ForceDccForColorAttachments | + ForceDccFor3DShaderStorage | + ForceDccForNonColorAttachmentShaderStorage); - if (appProfile == AppProfile::X4Foundations) - { - if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_3) + m_settings.mallNoAllocCtPolicy = MallNoAllocCtAsSnsr; + } + else if (pInfo->revision == Pal::AsicRevision::Navi23) { - m_settings.disableHtileBasedMsaaRead = true; + m_settings.mallNoAllocCtPolicy = MallNoAllocCtAsSnsr; + m_settings.mallNoAllocDsPolicy = MallNoAllocDsAsSnsr; } } + } - if (appProfile == AppProfile::SHARK) - { - m_settings.initializeVramToZero = false; - } + if (appProfile == AppProfile::SniperElite5) + { + m_settings.alwaysReportHdrFormats = true; - if (appProfile == AppProfile::Valheim) + if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_3) { - m_settings.disableDisplayDcc = DisplayableDcc::DisplayableDccDisabled; + m_settings.csWaveSize = 64; + m_settings.fsWaveSize = 64; - if (pInfo->gfxLevel >= Pal::GfxIpLevel::GfxIp10_3) + if (pInfo->revision == Pal::AsicRevision::Navi21) { - m_settings.csWaveSize = 32; - m_settings.fsWaveSize = 64; - - if (pInfo->revision == Pal::AsicRevision::Navi21) - { - m_settings.mallNoAllocCtPolicy = MallNoAllocCtAsSnsr; - } - else if (pInfo->revision == Pal::AsicRevision::Navi22) - { - m_settings.forceEnableDcc = (ForceDccFor2DShaderStorage | - ForceDccForColorAttachments | - ForceDccFor3DShaderStorage | - ForceDccForNonColorAttachmentShaderStorage); - - m_settings.mallNoAllocCtPolicy = MallNoAllocCtAsSnsr; - } - else if (pInfo->revision == Pal::AsicRevision::Navi23) - { - m_settings.mallNoAllocCtPolicy = MallNoAllocCtAsSnsr; - m_settings.mallNoAllocDsPolicy = MallNoAllocDsAsSnsr; - } + m_settings.pipelineBinningMode = PipelineBinningModeDisable; } - } - - if (appProfile == AppProfile::SniperElite5) - { - m_settings.alwaysReportHdrFormats = true; - - if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_3) + else if (pInfo->revision == Pal::AsicRevision::Navi22) { - m_settings.csWaveSize = 64; - m_settings.fsWaveSize = 64; - - if (pInfo->revision == Pal::AsicRevision::Navi21) - { - m_settings.pipelineBinningMode = PipelineBinningModeDisable; - } - else if (pInfo->revision == Pal::AsicRevision::Navi22) - { - m_settings.forceEnableDcc = (ForceDccFor2DShaderStorage | - ForceDccFor32BppShaderStorage); - } - else if (pInfo->revision == Pal::AsicRevision::Navi23) - { - m_settings.pipelineBinningMode = PipelineBinningModeDisable; - } - else if (pInfo->revision == Pal::AsicRevision::Navi24) - { - m_settings.pipelineBinningMode = PipelineBinningModeDisable; - m_settings.mallNoAllocCtPolicy = MallNoAllocCtAsSnsr; - m_settings.mallNoAllocCtSsrPolicy = MallNoAllocCtSsrAsSnsr; - m_settings.mallNoAllocSsrPolicy = MallNoAllocSsrAsSnsr; - } + m_settings.forceEnableDcc = (ForceDccFor2DShaderStorage | + ForceDccFor32BppShaderStorage); } -#if VKI_BUILD_GFX11 - else if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) + else if (pInfo->revision == Pal::AsicRevision::Navi23) + { + m_settings.pipelineBinningMode = PipelineBinningModeDisable; + } + else if (pInfo->revision == Pal::AsicRevision::Navi24) { + m_settings.pipelineBinningMode = PipelineBinningModeDisable; + m_settings.mallNoAllocCtPolicy = MallNoAllocCtAsSnsr; + m_settings.mallNoAllocCtSsrPolicy = MallNoAllocCtSsrAsSnsr; + m_settings.mallNoAllocSsrPolicy = MallNoAllocSsrAsSnsr; + } + } +#if VKI_BUILD_GFX11 + else if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) + { #if VKI_BUILD_NAVI31 - if (pInfo->revision == Pal::AsicRevision::Navi31) - { - // This provides ~4.2% gain at 4k - m_settings.imageTilingPreference3dGpuWritable = Pal::ImageTilingPattern::YMajor; - m_settings.mallNoAllocCtPolicy = MallNoAllocCtAsSnsr; - } + if (pInfo->revision == Pal::AsicRevision::Navi31) + { + // This provides ~4.2% gain at 4k + m_settings.imageTilingPreference3dGpuWritable = Pal::ImageTilingPattern::YMajor; + m_settings.mallNoAllocCtPolicy = MallNoAllocCtAsSnsr; + } #endif #if VKI_BUILD_NAVI33 - if (pInfo->revision == Pal::AsicRevision::Navi33) + if (pInfo->revision == Pal::AsicRevision::Navi33) + { { - { - m_settings.forceCsThreadIdSwizzling = true; - } + m_settings.forceCsThreadIdSwizzling = true; } -#endif } #endif } +#endif + } - if (appProfile == AppProfile::MetalGearSolid5) - { - m_settings.padVertexBuffers = true; - } + if (appProfile == AppProfile::MetalGearSolid5) + { + m_settings.padVertexBuffers = true; + } - if (appProfile == AppProfile::MetalGearSolid5Online) - { - m_settings.padVertexBuffers = true; - } + if (appProfile == AppProfile::MetalGearSolid5Online) + { + m_settings.padVertexBuffers = true; + } - if (appProfile == AppProfile::YamagiQuakeII) + if (appProfile == AppProfile::YamagiQuakeII) + { + m_settings.forceImageSharingMode = + ForceImageSharingMode::ForceImageSharingModeExclusiveForNonColorAttachments; + } + + if (appProfile == AppProfile::XPlane) + { + + if (pInfo->gfxLevel >= Pal::GfxIpLevel::GfxIp10_3) { - m_settings.forceImageSharingMode = - ForceImageSharingMode::ForceImageSharingModeExclusiveForNonColorAttachments; + m_settings.disableHtileBasedMsaaRead = true; } - if (appProfile == AppProfile::XPlane) - { + m_settings.padVertexBuffers = true; + } - if (pInfo->gfxLevel >= Pal::GfxIpLevel::GfxIp10_3) - { - m_settings.disableHtileBasedMsaaRead = true; - } + if (appProfile == AppProfile::Battlefield1) + { + m_settings.forceDisableAnisoFilter = true; + } - m_settings.padVertexBuffers = true; - } + if (appProfile == AppProfile::DDraceNetwork) + { + m_settings.ignorePreferredPresentMode = true; + } - if (appProfile == AppProfile::Battlefield1) - { - m_settings.forceDisableAnisoFilter = true; - } + if (appProfile == AppProfile::SaintsRowV) + { + m_settings.barrierFilterOptions = BarrierFilterOptions::FlushOnHostMask; - if (appProfile == AppProfile::DDraceNetwork) - { - m_settings.ignorePreferredPresentMode = true; - } + } - if (appProfile == AppProfile::SaintsRowV) + if ((appProfile == AppProfile::HalfLifeAlyx) || + (appProfile == AppProfile::Satisfactory)) + { +#if VKI_BUILD_GFX11 + if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) { - m_settings.barrierFilterOptions = BarrierFilterOptions::FlushOnHostMask; - + m_settings.forcePwsMode = PwsMode::NoLateAcquirePoint; } - - if ((appProfile == AppProfile::HalfLifeAlyx) || - (appProfile == AppProfile::Satisfactory)) - { -#if VKI_BUILD_GFX11 - if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) - { - m_settings.forcePwsMode = PwsMode::NoLateAcquirePoint; - } #endif - } + } - if (appProfile == AppProfile::RomeRemastered) - { + if (appProfile == AppProfile::RomeRemastered) + { #if VKI_BUILD_GFX11 - if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) - { - m_settings.forcePwsMode = PwsMode::NoLateAcquirePoint; - } -#endif - } - - if (appProfile == AppProfile::SpidermanRemastered) + if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) { - m_settings.supportMutableDescriptors = false; + m_settings.forcePwsMode = PwsMode::NoLateAcquirePoint; } +#endif + } - if (appProfile == AppProfile::Enscape) - { - m_settings.enableSpvPerfOptimal = true; - m_settings.optimizeCmdbufMode = EnableOptimizeCmdbuf; - m_settings.enableAceShaderPrefetch = false; + if (appProfile == AppProfile::SpidermanRemastered) + { + m_settings.supportMutableDescriptors = false; + } -#if VKI_RAY_TRACING - m_settings.plocRadius = 4; - m_settings.rtBvhBuildModeFastTrace = BvhBuildModeLinear; - m_settings.rtEnableTopDownBuild = false; + if (appProfile == AppProfile::Enscape) + { + m_settings.enableSpvPerfOptimal = true; + m_settings.optimizeCmdbufMode = EnableOptimizeCmdbuf; + m_settings.enableAceShaderPrefetch = false; - if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_3) - { - m_settings.asyncComputeQueueMaxWavesPerCu = 20; - m_settings.csWaveSize = 64; - } -#endif - } +#if VKI_RAY_TRACING + m_settings.plocRadius = 4; + m_settings.rtBvhBuildModeFastTrace = BvhBuildModeLinear; + m_settings.rtEnableTopDownBuild = false; - if (appProfile == AppProfile::Vkd3dEngine) + if (pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp10_3) { - m_settings.exportNvComputeShaderDerivatives = true; - m_settings.exportNvDeviceGeneratedCommands = true; - m_settings.exportImageCompressionControl = true; + m_settings.asyncComputeQueueMaxWavesPerCu = 20; + m_settings.csWaveSize = 64; } +#endif + } + + if (appProfile == AppProfile::Starfield) + { + OverrideVkd3dCommonSettings(&m_settings); - if ((appProfile == AppProfile::DXVK) || - (appProfile == AppProfile::Vkd3dEngine)) +#if VKI_BUILD_GFX11 + if ((pInfo->gfxLevel == Pal::GfxIpLevel::GfxIp11_0) + ) { - m_settings.disableSingleMipAnisoOverride = false; + m_settings.fsWaveSize = 32; } +#endif + } + + if (appProfile == AppProfile::Vkd3dEngine) + { + OverrideVkd3dCommonSettings(&m_settings); + } + if (appProfile == AppProfile::DXVK) + { + m_settings.disableSingleMipAnisoOverride = false; } return result; @@ -1779,13 +1810,6 @@ VkResult VulkanSettingsLoader::ProcessSettings( // We need to override debug file paths settings to absolute paths as per system info OverrideSettingsBySystemInfo(); - // Modify defaults based on application profile and panel settings - if ((*pAppProfile == AppProfile::AngleEngine) && m_settings.deferCompileOptimizedPipeline) - { - m_settings.enableEarlyCompile = true; - m_settings.pipelineLayoutMode = PipelineLayoutMode::PipelineLayoutAngle; - } - DumpAppProfileChanges(*pAppProfile); auto pSettingsRpcService = m_pPlatform->GetSettingsRpcService(); @@ -1923,12 +1947,6 @@ void VulkanSettingsLoader::ValidateSettings() m_settings.enableRaytracingSupport = false; } - // When using continuations, always set thread group size to 32 x 1 x 1, that's what we only support. - if (m_settings.llpcRaytracingMode == RaytracingContinuations) - { - m_settings.rtFlattenThreadGroupSize = 32; - } - #if VKI_BUILD_GFX11 // RTIP 2.0+ is always expected to support hardware traversal stack VK_ASSERT((rayTracingIpLevel <= Pal::RayTracingIpLevel::RtIp1_1) || @@ -2032,6 +2050,11 @@ void VulkanSettingsLoader::UpdatePalSettings() pPalSettings->ac01WaNotNeeded = true; } + if (m_settings.expandHiZRangeForResummarize) + { + pPalSettings->expandHiZRangeForResummarize = true; + } + } // ===================================================================================================================== diff --git a/icd/settings/settings_xgl.json b/icd/settings/settings_xgl.json index 689d0128..86935719 100644 --- a/icd/settings/settings_xgl.json +++ b/icd/settings/settings_xgl.json @@ -840,34 +840,6 @@ "Scope": "Driver", "Type": "enum" }, - { - "Name": "PipelineLayoutMode", - "Description": "Control the pipeline descriptor layout for early compile", - "Tags": [ - "Pipeline Options" - ], - "Defaults": { - "Default": "PipelineLayoutDefault" - }, - "ValidValues": { - "IsEnum": true, - "Values": [ - { - "Name": "PipelineLayoutDefault", - "Value": 0, - "Description": "Build descritptor layout with default layout" - }, - { - "Name": "PipelineLayoutAngle", - "Value": 1, - "Description": "Build descriptor layout compatible with angle base app" - } - ], - "Name": "PipelineLayoutMode" - }, - "Scope": "Driver", - "Type": "enum" - }, { "Name": "PipelineLayoutSchemeSelectionStrategy", "Description": "Decide how to choose the scheme of pipeline layout", @@ -1226,18 +1198,6 @@ "Type": "bool", "Scope": "Driver" }, - { - "Name": "UseShaderLibraryForPipelineLibraryFastLink", - "Description": "Whether use Pal shader library for pipeline library fast-link", - "Tags": [ - "Pipeline Options" - ], - "Defaults": { - "Default": true - }, - "Type": "bool", - "Scope": "Driver" - }, { "Name": "ForceLinkTimeOptimizationInShaderObject", "Description": "Whether force enable link time optimziation in shader object layer", @@ -1452,45 +1412,6 @@ "Scope": "Driver", "Type": "bool" }, - { - "Name": "EnableEarlyCompile", - "Description": "Enable pipeline early compile.", - "Tags": [ - "SPIRV Options" - ], - "Defaults": { - "Default": false - }, - "Scope": "Driver", - "Type": "bool" - }, - { - "Name": "DeferCompileOptimizedPipeline", - "Description": "Whether enable defer compile optimized pipeline, it only affects option EnableUberFetchShader and EnableEarlyCompile", - "Tags": [ - "SPIRV Options" - ], - "Defaults": { - "Default": false - }, - "Scope": "Driver", - "Type": "bool" - }, - { - "Name": "DeferCompileThreadCount", - "Description": "Assistant thread count for deferred compile operation, if count is greater than the internal limitation, the real thread count will be clamped to the limitation.", - "Tags": [ - "SPIRV Options" - ], - "Defaults": { - "Default": "0xFFFFFFFF" - }, - "Scope": "Driver", - "Type": "uint32", - "Flags": { - "IsHex": true - } - }, { "Name": "DisablePerCompFetch", "Description": "Disable per component fetch in uber fetch shader.", @@ -1539,18 +1460,6 @@ "Scope": "Driver", "Type": "bool" }, - { - "Name": "SupportPatchSpecConst", - "Description": "Support patch specialized constant", - "Tags": [ - "SPIRV Options" - ], - "Defaults": { - "Default": false - }, - "Scope": "Driver", - "Type": "bool" - }, { "Description": "Enable pipeline dump, pipeline is stored with .pipe format. You must set AMD_DEBUG_DIR and make sure $AMD_DEBUG_DIR + pipelineDumpDir is an available directory.", "Tags": [ @@ -2504,7 +2413,7 @@ ], "Defaults": { "Default": true, - "AndroidDefault": false + "Android": false }, "Type": "bool", "Scope": "Driver" @@ -2763,6 +2672,18 @@ "Type": "uint32", "Name": "DumpPipelineCompileCacheMatrix" }, + { + "Description": "If true, interpMode patch will be not applied to APP. Therefore, the option should only be enabled via application profile.", + "Tags": [ + "SPIRV Options" + ], + "Defaults": { + "Default": false + }, + "Scope": "Driver", + "Type": "bool", + "Name": "DisablePatchInterpMode" + }, { "Name": "LlpcRaytracingMode", "BuildTypes": [ @@ -2776,9 +2697,9 @@ "IsEnum": true, "Values": [ { - "Name": "RaytracingNone", + "Name": "RaytracingAuto", "Value": 0, - "Description": "No Raytracing compiling path." + "Description": "Let compiler choose raytracing mode automatically." }, { "Name": "RaytracingLegacy", @@ -5712,6 +5633,18 @@ "Scope": "Driver", "Type": "bool" }, + { + "Name": "InfiniteDeviceWaitIdle", + "Description": "Wait for vkDeviceWaitIdle() infinitely instead of returning VK_TIMEOUT directly.", + "Tags": [ + "General" + ], + "Defaults": { + "Default": false + }, + "Scope": "Driver", + "Type": "bool" + }, { "Name": "UseOldDeviceUUIDCalculation", "Description": "Old calculation only use pci bdf (busNumber, deviceNumber, functionNumber) to compose the UUID.", @@ -6792,7 +6725,7 @@ "IsEnum": true, "SkipGen": true }, - "Description": "Override the ImageTilingPreference setting value for 3D images used as color attachments or shader storage.", + "Description": "Override the ImageTilingPreference setting value for 3D images used as color attachments or shader storage where possible.", "Tags": [ "Optimization" ], @@ -7493,6 +7426,18 @@ "Type": "enum", "Scope": "Driver" }, + { + "Description": "Resummarize the htile values from the depth-stencil surface contents.", + "Tags": [ + "Optimization" + ], + "Defaults": { + "Default": false + }, + "Scope": "Driver", + "Type": "bool", + "Name": "ExpandHiZRangeForResummarize" + }, { "Description": "Force all ray tracing shaders compiled in unified mode to have swizzled thread groups.", "Tags": [ @@ -7629,7 +7574,7 @@ ], "Defaults": { "Default": false, - "AndroidDefault": true + "Android": true }, "Scope": "Driver", "Type": "bool", @@ -8970,6 +8915,66 @@ "Type": "uint32", "Scope": "Driver" }, + { + "Description": "Split ray tracing dispatch call", + "Tags": [ + "Ray Tracing" + ], + "BuildTypes": [ + "VKI_RAY_TRACING" + ], + "Defaults": { + "Default": false + }, + "Type": "bool", + "Name": "SplitRayTracingDispatch", + "Scope": "Driver" + }, + { + "Description": "Split the X dimension of a ray tracing dspatch call into RtDispatchSplitX parts", + "Tags": [ + "Ray Tracing" + ], + "BuildTypes": [ + "VKI_RAY_TRACING" + ], + "Defaults": { + "Default": 16 + }, + "Type": "uint32", + "Name": "RtDispatchSplitX", + "Scope": "Driver" + }, + { + "Description": "Split the Y dimension of a ray tracing dspatch call into RtDispatchSplitY parts", + "Tags": [ + "Ray Tracing" + ], + "BuildTypes": [ + "VKI_RAY_TRACING" + ], + "Defaults": { + "Default": 16 + }, + "Type": "uint32", + "Name": "RtDispatchSplitY", + "Scope": "Driver" + }, + { + "Description": "Split the Z dimension of a ray tracing dspatch call into RtDispatchSplitZ parts", + "Tags": [ + "Ray Tracing" + ], + "BuildTypes": [ + "VKI_RAY_TRACING" + ], + "Defaults": { + "Default": 1 + }, + "Type": "uint32", + "Name": "RtDispatchSplitZ", + "Scope": "Driver" + }, { "Description": "Used to wait idle on vkCreateInstance() call until a debugger is attached to running application. Effective only on Windows debug builds.", "Tags": [ diff --git a/icd/tools/generate/genSettingsCode.py b/icd/tools/generate/genSettingsCode.py deleted file mode 100644 index 117ef3ae..00000000 --- a/icd/tools/generate/genSettingsCode.py +++ /dev/null @@ -1,120 +0,0 @@ -## - ####################################################################################################################### - # - # Copyright (c) 2019-2024 Advanced Micro Devices, Inc. All Rights Reserved. - # - # Permission is hereby granted, free of charge, to any person obtaining a copy - # of this software and associated documentation files (the "Software"), to deal - # in the Software without restriction, including without limitation the rights - # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - # copies of the Software, and to permit persons to whom the Software is - # furnished to do so, subject to the following conditions: - # - # The above copyright notice and this permission notice shall be included in all - # copies or substantial portions of the Software. - # - # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - # SOFTWARE. - # - ####################################################################################################################### - -"""This is the xgl settings helper script for the generation of settings code by a script in PAL. - -This script lives in the generate/ directory, while the other generation files (e.g. template files) -reside in pal/tools/generate/ directory. -""" - -import os -import sys - -GENERATE_DIR_PATH = os.path.dirname(sys.argv[0]) -if GENERATE_DIR_PATH == "": - GENERATE_DIR_PATH = "." - -PYTHON_EXECUTABLE = sys.executable + " " - -PalDepth = sys.argv[1] + "/" - -GenSettingsStr = (PYTHON_EXECUTABLE + PalDepth + - "tools/generate/genSettingsCode.py --settingsFile %SettingsFile% --codeTemplateFile " - "%CodeTemplateFile% --outFilename %OutFilename% --classNameOverride %ClassNameOverride% " - "%GenRegistryCode%") - -CLASS_NAME_OVERRIDE = "%ClassName%" - -SettingsFileBasePath = GENERATE_DIR_PATH + "/../../settings/" - -# If was specified -if len(sys.argv) == 4: - GENERATE_DIR_PATH = sys.argv[3] - SettingsFileBasePath = GENERATE_DIR_PATH + "/settings/" - -GENERATE_REGISTRY_CODE_ENABLED = " --genRegistryCode" -GENERATE_REGISTRY_CODE_DISABLED = "" - -DefaultSettingsTemplateFile = os.path.dirname(os.path.abspath(__file__)) + "/vulkanSettingsCodeTemplates.py" - -settingsArgData = { - "vulkan": { - "SettingsFile": SettingsFileBasePath + "settings_xgl.json", - "CodeTemplateFile": DefaultSettingsTemplateFile, - "OutFilename": "g_settings", - "ClassNameOverride": "VulkanSettingsLoader", - "GenRegistryCode": GENERATE_REGISTRY_CODE_ENABLED, - } -} - -def gen_settings(arg_data): - """Invoke generate settings script.""" - # Build the command string - command_str = GenSettingsStr - command_str = command_str.replace("%SettingsFile%", arg_data["SettingsFile"]) - command_str = command_str.replace("%CodeTemplateFile%", arg_data["CodeTemplateFile"]) - command_str = command_str.replace("%OutFilename%", arg_data["OutFilename"]) - command_str = command_str.replace("%ClassNameOverride%", arg_data["ClassNameOverride"]) - command_str = command_str.replace("%GenRegistryCode%", arg_data["GenRegistryCode"]) - print(command_str) - - return os.system(command_str) - -USAGE = "\ -*****************************************************************************************************\n\ - Helper script to generate settings files. User can provide components to be generated as arguments.\n\ - The current list of supported arguments/components is:\n\ - [mandatory] - path to PAL sources\n\ - Vulkan\n\ - [optional ]\n\ - User can instead pass \'-all\' to generate all components\' settings files.\n\ - Example Usage: python genSettingsCode.py vulkan [optional: - path to output]\n\ -*****************************************************************************************************" - -if len(sys.argv) not in (3, 4): - print(USAGE) - sys.exit(1) - -if sys.argv[1] == "-all": - # Generate all the settings files - for key, value in settingsArgData.items(): - print("Generating settings code for " + key) - result = gen_settings(value) - if result != 0: - print("Error generating settings for " + key) - sys.exit(1) -else: - component = sys.argv[2] - if component in settingsArgData: - print("Generating settings code for " + component) - result = gen_settings(settingsArgData[component]) - if result != 0: - print("Error generating settings for " + component + ". Did you forget to check out the g_ files?") - sys.exit(1) - else: - print("Unknown component argument: " + component) - sys.exit(1) - -sys.exit(0) diff --git a/icd/tools/generate/genShaderProfile.py b/icd/tools/generate/genShaderProfile.py index 8fd8990d..325ffb3a 100644 --- a/icd/tools/generate/genShaderProfile.py +++ b/icd/tools/generate/genShaderProfile.py @@ -519,10 +519,11 @@ def get_game_titles(file_path): if "#if" in title: has_build_type = True directive = title.strip("#if ") - game_title_info[directive] = { - "gameTitles": [], - "buildTypes": {"andType": [directive]} - } + if directive not in game_title_info: + game_title_info[directive] = { + "gameTitles": [], + "buildTypes": {"andType": [directive]} + } continue if ("#end" in title) or ("#else" in title): diff --git a/icd/tools/generate/vulkanSettingsCodeTemplates.py b/icd/tools/generate/vulkanSettingsCodeTemplates.py deleted file mode 100644 index 407b8910..00000000 --- a/icd/tools/generate/vulkanSettingsCodeTemplates.py +++ /dev/null @@ -1,213 +0,0 @@ -## - ####################################################################################################################### - # - # Copyright (c) 2017-2024 Advanced Micro Devices, Inc. All Rights Reserved. - # - # Permission is hereby granted, free of charge, to any person obtaining a copy - # of this software and associated documentation files (the "Software"), to deal - # in the Software without restriction, including without limitation the rights - # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - # copies of the Software, and to permit persons to whom the Software is - # furnished to do so, subject to the following conditions: - # - # The above copyright notice and this permission notice shall be included in all - # copies or substantial portions of the Software. - # - # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - # SOFTWARE. - # - ####################################################################################################################### - -'''This script defines a template for Vulkan settings initialization. -''' - -import os - -CopyrightFilePath = os.path.dirname(os.path.realpath(__file__)) + "/../xgl-copyright-template.txt" - -with open(CopyrightFilePath, encoding='utf-8') as f: - FileHeaderCopyright = f.read() - -FileHeaderWarning = "\ -///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////\n\ -//\n\ -// WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING!\n\ -//\n\ -// This code has been generated automatically. Do not hand-modify this code.\n\ -//\n\ -// When changes are needed, modify the tools generating this module in the tools\\generate directory OR settings_xgl.json\n\ -//\n\ -// WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING! WARNING!\n\ -///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////\n\ -\n" - -CopyrightAndWarning = FileHeaderCopyright + FileHeaderWarning - -HeaderFileDoxComment = "\n\ -/**\n\ -***************************************************************************************************\n\ -* @file %FileName%\n\ -* @brief auto-generated file.\n\ -* Contains the definition for the Vulkan settings struct and enums for initialization.\n\ -***************************************************************************************************\n\ -*/\n\ -#pragma once\n" - -NamespaceStart = "\nnamespace vk\n{\n" -NamespaceEnd = "\n} // vk" - -HeaderIncludes = "\n\ -#include \"pal.h\"\n\ -#include \"palUtil.h\"\n\ -#include \"palSettingsLoader.h\"\n\ -\n\ -typedef Util::uint64 uint64;\n\ -typedef Util::uint32 uint32;\n\ -typedef Util::uint8 uint8;\n\ -typedef Pal::gpusize gpusize;\n" - -CppIncludes = "#include \"settings.h\"\n\ -#include \"palDevice.h\"\n" - -IncludeDir = "settings/" - -PrefixName = "" - -DevDriverIncludes = "\n\ -#include \"devDriverServer.h\"\n\ -#include \"protocols/ddSettingsService.h\"\n\ -\n\ -using namespace DevDriver::SettingsURIService;\n\ -\n" - -Enum = "\n\ -enum %EnumName% : %EnumDataType%\n\ -{\n\ -%EnumData%\n\ -};\n" - -SettingStructName = "RuntimeSettings" - -StructDef = "\n\ -/// Pal auto-generated settings struct\n\ -struct %SettingStructName% : public Pal::DriverSettings\n\ -{\n\ -%SettingDefs%\ -};\n" - -SettingDef = " %SettingType% %SettingVarName%%ArrayLength%;\n" -SettingStructDef = "\ - struct {\n\ -%StructSettingFields%\ - } %StructSettingName%;\n" - -SettingStr = "static const char* %SettingStrName% = %SettingString%;\n" - -SetupDefaultsFunc = "\n\ -// =====================================================================================================================\n\ -// Initializes the settings structure to default values.\n\ -void %ClassName%::SetupDefaults()\n\ -{\n\ - // set setting variables to their default values...\n\ -%SetDefaultsCode%\n\ -}\n" - -IfMinMax = "#if PAL_CLIENT_INTERFACE_MAJOR_VERSION >= %MinVersion% && PAL_CLIENT_INTERFACE_MAJOR_VERSION <= %MaxVersion%\n" -IfMin = "#if PAL_CLIENT_INTERFACE_MAJOR_VERSION >= %MinVersion%\n" -IfMax = "#if PAL_CLIENT_INTERFACE_MAJOR_VERSION <= %MaxVersion%\n" -EndIf = "#endif\n" - -SetDefault = " m_settings.%SettingVarName% = %SettingDefault%;\n" -SetStringDefault = " memset(m_settings.%SettingVarName%, 0, %SettingStringLength%);\n\ - strncpy(m_settings.%SettingVarName%, %SettingDefault%, %SettingStringLength%);\n" - -SetArrayDefault = " memset(m_settings.%SettingVarName%, 0, %SettingSize%);\n\ - memcpy(m_settings.%SettingVarName%, %SettingDefault%, %SettingSize%);\n" - -WinIfDef = "defined(_WIN32)\n" -LnxIfDef = "(__unix__)\n" -AndroidIfDef = "defined(__ANDROID__)\n" - -ReadSettingsFunc = "\n\ -// =====================================================================================================================\n\ -%ReadSettingsDesc%\n\ -void %ClassName%::%ReadSettingsName%()\n\ -{\n\ - // read from the OS adapter for each individual setting\n\ -%ReadSettingsCode%\n\ -}\n" - -PalReadSettingClass = "static_cast(m_pDevice)" -ReadSetting = " %ReadSettingClass%->ReadSetting(%SettingStrName%,\n\ - %OsiSettingType%,\n\ - %SettingRegistryType%,\n\ - &m_settings.%SettingVarName%);\n\n" -ReadSettingStr = " %ReadSettingClass%->ReadSetting(%SettingStrName%,\n\ - %OsiSettingType%,\n\ - %SettingRegistryType%,\n\ - &m_settings.%SettingVarName%,\n\ - %StringLength%);\n\n" -PalOsiSettingType = "Pal::SettingScope::%OsiSettingType%" - -SettingHashListName = "g_%LowerCamelComponentName%SettingHashList" -SettingNumSettingsName = "g_%LowerCamelComponentName%NumSettings" -SettingHashList = "\n\ -static const uint32 %SettingNumSettingsName% = %NumSettings%;\n\ -static const Pal::SettingNameHash %SettingHashListName%[] = {\n\ -%SettingHashList%\ -};\n" - -InitSettingsInfoFunc = "\n\ -// =====================================================================================================================\n\ -// Initializes the SettingInfo hash map and array of setting hashes.\n\ -void %ClassName%::InitSettingsInfo()\n\ -{\n\ - SettingInfo info = {};\n\ -%InitSettingInfoCode%\n\ -}\n" - -InitSettingInfo = "\n\ - info.type = %DevDriverType%;\n\ - info.pValuePtr = &m_settings.%SettingVarName%;\n\ - info.valueSize = sizeof(m_settings.%SettingVarName%);\n\ - m_settingsInfoMap.Insert(%HashName%, info);\n" - -JsonDataArray = "\n\ -static const uint8 %JsonDataArrayName%[] = {\n\ -%JsonArrayData%\n\ -}; // %JsonDataArrayName%[]\n" - -DevDriverRegisterFunc = "\n\ -// =====================================================================================================================\n\ -// Registers the core settings with the Developer Driver settings service.\n\ -void %ClassName%::DevDriverRegister()\n\ -{\n\ - auto* pDevDriverServer = static_cast(m_pPlatform)->GetDevDriverServer();\n\ - if (pDevDriverServer != nullptr)\n\ - {\n\ - auto* pSettingsService = pDevDriverServer->GetSettingsService();\n\ - if (pSettingsService != nullptr)\n\ - {\n\ - RegisteredComponent component = {};\n\ - strncpy(&component.componentName[0], m_pComponentName, kMaxComponentNameStrLen);\n\ - component.pPrivateData = static_cast(this);\n\ - component.pSettingsHashes = &%SettingHashListName%[0];\n\ - component.numSettings = %SettingNumSettingsName%;\n\ - component.pfnGetValue = ISettingsLoader::GetValue;\n\ - component.pfnSetValue = ISettingsLoader::SetValue;\n\ - component.pSettingsData = &%JsonDataArrayName%[0];\n\ - component.settingsDataSize = sizeof(%JsonDataArrayName%);\n\ - component.settingsDataHash = %SettingsDataHash%;\n\ - component.settingsDataHeader.isEncoded = %IsJsonEncoded%;\n\ - component.settingsDataHeader.magicBufferId = %MagicBufferId%;\n\ - component.settingsDataHeader.magicBufferOffset = %MagicBufferOffset%;\n\ -\n\ - pSettingsService->RegisterComponent(component);\n\ - }\n\ - }\n\ -}\n"