From 00af22e9d867e7153717b213af2d282e02116ebf Mon Sep 17 00:00:00 2001 From: krickw Date: Thu, 3 Dec 2020 14:50:47 -0500 Subject: [PATCH 1/5] Prototype support for GPU Normals and OSD using glsl. The code is disabled by default. --- cmake/modules/FindMaya.cmake | 3 +- lib/mayaUsd/render/CMakeLists.txt | 1 + .../render/vp2ComputeShaders/CMakeLists.txt | 13 + .../vp2ComputeShaders/computeNormals.cl | 34 + .../vp2ComputeShaders/computeNormals.glsl | 69 + .../render/vp2ComputeShaders/plugInfo.json | 11 + .../render/vp2RenderDelegate/CMakeLists.txt | 1 + lib/mayaUsd/render/vp2RenderDelegate/mesh.cpp | 410 +++-- lib/mayaUsd/render/vp2RenderDelegate/mesh.h | 35 +- .../vp2RenderDelegate/meshViewportCompute.cpp | 1335 +++++++++++++++++ .../vp2RenderDelegate/meshViewportCompute.h | 261 ++++ 11 files changed, 2075 insertions(+), 98 deletions(-) create mode 100644 lib/mayaUsd/render/vp2ComputeShaders/CMakeLists.txt create mode 100644 lib/mayaUsd/render/vp2ComputeShaders/computeNormals.cl create mode 100644 lib/mayaUsd/render/vp2ComputeShaders/computeNormals.glsl create mode 100644 lib/mayaUsd/render/vp2ComputeShaders/plugInfo.json create mode 100644 lib/mayaUsd/render/vp2RenderDelegate/meshViewportCompute.cpp create mode 100644 lib/mayaUsd/render/vp2RenderDelegate/meshViewportCompute.h diff --git a/cmake/modules/FindMaya.cmake b/cmake/modules/FindMaya.cmake index 7d8f9ad29a..ed825217a4 100644 --- a/cmake/modules/FindMaya.cmake +++ b/cmake/modules/FindMaya.cmake @@ -207,7 +207,8 @@ foreach(MAYA_LIB IMFbase tbb cg - cgGL) + cgGL + clew) find_library(MAYA_${MAYA_LIB}_LIBRARY ${MAYA_LIB} diff --git a/lib/mayaUsd/render/CMakeLists.txt b/lib/mayaUsd/render/CMakeLists.txt index 4c6902a784..08d0cf135e 100644 --- a/lib/mayaUsd/render/CMakeLists.txt +++ b/lib/mayaUsd/render/CMakeLists.txt @@ -1,4 +1,5 @@ add_subdirectory(px_vp20) add_subdirectory(pxrUsdMayaGL) +add_subDirectory(vp2ComputeShaders) add_subdirectory(vp2RenderDelegate) add_subdirectory(vp2ShaderFragments) diff --git a/lib/mayaUsd/render/vp2ComputeShaders/CMakeLists.txt b/lib/mayaUsd/render/vp2ComputeShaders/CMakeLists.txt new file mode 100644 index 0000000000..db6b641dcf --- /dev/null +++ b/lib/mayaUsd/render/vp2ComputeShaders/CMakeLists.txt @@ -0,0 +1,13 @@ +# ----------------------------------------------------------------------------- +# install +# ----------------------------------------------------------------------------- +list(APPEND MAYAUSD_COMPUTESHADERS + computeNormals.glsl + computeNormals.cl + plugInfo.json +) + +install(FILES ${MAYAUSD_COMPUTESHADERS} + DESTINATION ${CMAKE_INSTALL_PREFIX}/lib/usd/mayaUSD_ComputeShaders/resources +) + diff --git a/lib/mayaUsd/render/vp2ComputeShaders/computeNormals.cl b/lib/mayaUsd/render/vp2ComputeShaders/computeNormals.cl new file mode 100644 index 0000000000..11b9a2e050 --- /dev/null +++ b/lib/mayaUsd/render/vp2ComputeShaders/computeNormals.cl @@ -0,0 +1,34 @@ +__kernel void computeNormals( + __global const float* positions, + const unsigned int vertexCount, // number of posisions and normals + /* Adjacency buffer is two distinct parts. + First, two ints per vertex the offset and the valence. The valence is the number of adjacent + vertices. The offset is the offset into the adjacency buffer to find the vertex ids of the + adjacent vertices. Next, a list of vertex ids of the + adjacent vertices for each vertex, found using the information from the first part of the + buffer. + */ + __global const int* adjacency, + __global float* normals) +{ + unsigned int vertexId = get_global_id(0); + if (vertexId >= vertexCount) + return; + + unsigned int offsetIdx = vertexId * 2; + int offset = adjacency[offsetIdx]; + int valence = adjacency[offsetIdx + 1]; + __global int* currAdj = &adjacency[offset]; + + const float3 currVertex = vload3(vertexId, positions); + float3 accumulatedNormal = (float3)(0.0f, 0.0f, 0.0f); + + for (int neighbour = 0; neighbour < valence; neighbour++) { + float3 prevVertex = vload3(*currAdj++, positions); + float3 nextVertex = vload3(*currAdj++, positions); + accumulatedNormal += cross(nextVertex - currVertex, prevVertex - currVertex); + } + + accumulatedNormal = normalize(accumulatedNormal); + vstore3(accumulatedNormal, vertexId, normals); +} diff --git a/lib/mayaUsd/render/vp2ComputeShaders/computeNormals.glsl b/lib/mayaUsd/render/vp2ComputeShaders/computeNormals.glsl new file mode 100644 index 0000000000..7778d04982 --- /dev/null +++ b/lib/mayaUsd/render/vp2ComputeShaders/computeNormals.glsl @@ -0,0 +1,69 @@ +#version 430 + +layout( std140, binding=0 ) uniform Values +{ + uint VertexCount; +}; + +// This is a float3 but for buffer layout to be correct use float +layout( std430, binding=1 ) buffer Pos +{ + float Positions[ ]; +}; + +layout( std430, binding=2 ) buffer Adj +{ + int Adjacency[ ]; +}; + +layout( std430, binding=3 ) buffer RtoS +{ + int RenderingToScene[ ]; +}; + +layout( std430, binding=4 ) buffer StoR +{ + int SceneToRendering[ ]; +}; + +// This is a float3 but for buffer layout to be correct use float +layout( std430, binding=5 ) buffer Norm +{ + float Normals[ ]; +}; + +layout( local_size_x = 256, local_size_y = 1, local_size_z = 1) in; + +void main() { + uint renderingVertexId = gl_GlobalInvocationID.x; + uint renderingVertexOffset = renderingVertexId *3; + + if (renderingVertexId < VertexCount) + { + uint sceneVertexId = RenderingToScene[renderingVertexId]; + + uint adjOffsetIdx = sceneVertexId*2; + int offset = Adjacency[adjOffsetIdx]; + int valence = Adjacency[adjOffsetIdx+1]; + + vec3 currVertex = vec3(Positions[renderingVertexOffset], Positions[renderingVertexOffset+1], Positions[renderingVertexOffset+2]); + vec3 accumulatedNormal = vec3(0.0, 0.0, 0.0); + + for (int neighbour=0; neighbourdrawAPIVersion() returns 4. + // Compute was added in 4.3 so I don't have enough information to make the check + if (renderer && renderer->drawAPIIsOpenGL() + && (TfGetenvInt("HDVP2_USE_GPU_NORMAL_COMPUTATION", 0) > 0)) + _gpuNormalsComputeThreshold + = TfGetenvInt("HDVP2_GPU_NORMAL_COMPUTATION_MINIMUM_THRESHOLD", 8000); + else + _gpuNormalsComputeThreshold = INT_MAX; +} + +int HdVP2Mesh::_gpuNormalsComputeThreshold = -1; //! \brief Constructor #if defined(HD_API_VERSION) && HD_API_VERSION >= 36 HdVP2Mesh::HdVP2Mesh(HdVP2RenderDelegate* delegate, const SdfPath& id) @@ -338,9 +353,16 @@ HdVP2Mesh::HdVP2Mesh(HdVP2RenderDelegate* delegate, const SdfPath& id, const Sdf , _delegate(delegate) , _rprimId(id.GetText()) { + _meshSharedData = std::make_shared(); const MHWRender::MVertexBufferDescriptor vbDesc( "", MHWRender::MGeometry::kPosition, MHWRender::MGeometry::kFloat, 3); - _meshSharedData._positionsBuffer.reset(new MHWRender::MVertexBuffer(vbDesc)); + _meshSharedData->_positionsBuffer.reset(new MHWRender::MVertexBuffer(vbDesc)); + // HdChangeTracker::IsVarying() can check dirty bits to tell us if an object is animated or not. + // Not sure if it is correct on file load +#ifdef HDVP2_ENABLE_GPU_COMPUTE + static std::once_flag initGPUComputeOnce; + std::call_once(initGPUComputeOnce, _InitGPUCompute); +#endif } //! \brief Synchronize VP2 state with scene delegate state based on dirty bits and representation @@ -398,18 +420,28 @@ void HdVP2Mesh::Sync( } if (HdChangeTracker::IsTopologyDirty(*dirtyBits, id)) { - _meshSharedData._topology = GetMeshTopology(delegate); + _meshSharedData->_topology = GetMeshTopology(delegate); - const HdMeshTopology& topology = _meshSharedData._topology; + const HdMeshTopology& topology = _meshSharedData->_topology; const VtIntArray& faceVertexIndices = topology.GetFaceVertexIndices(); const size_t numFaceVertexIndices = faceVertexIndices.size(); VtIntArray newFaceVertexIndices; newFaceVertexIndices.resize(numFaceVertexIndices); - if (_IsUnsharedVertexLayoutRequired(_meshSharedData._primvarSourceMap)) { - _meshSharedData._numVertices = numFaceVertexIndices; - _meshSharedData._renderingToSceneFaceVtxIds = faceVertexIndices; + if (_IsUnsharedVertexLayoutRequired(_meshSharedData->_primvarSourceMap)) { + _meshSharedData->_numVertices = numFaceVertexIndices; + _meshSharedData->_renderingToSceneFaceVtxIds = faceVertexIndices; + _meshSharedData->_sceneToRenderingFaceVtxIds.clear(); + _meshSharedData->_sceneToRenderingFaceVtxIds.resize(topology.GetNumPoints(), -1); + + for (size_t i = 0; i < numFaceVertexIndices; i++) { + const int sceneFaceVtxId = faceVertexIndices[i]; + _meshSharedData->_sceneToRenderingFaceVtxIds[sceneFaceVtxId] + = i; // could check if the existing value is -1, but it doesn't matter. we just + // need to map to a vertex in the position buffer that has the correct + // value. + } // Fill with sequentially increasing values, starting from 0. The new // face vertex indices will be used to populate index data for unshared @@ -419,31 +451,37 @@ void HdVP2Mesh::Sync( // should update _FillPrimvarData() code to remap indices correctly. std::iota(newFaceVertexIndices.begin(), newFaceVertexIndices.end(), 0); } else { - _meshSharedData._numVertices = topology.GetNumPoints(); - _meshSharedData._renderingToSceneFaceVtxIds.clear(); + _meshSharedData->_numVertices = topology.GetNumPoints(); + _meshSharedData->_renderingToSceneFaceVtxIds.clear(); // Allocate large enough memory with initial value of -1 to indicate // the rendering face vertex index is not determined yet. - std::vector authorToRenderFaceVtxIds(numFaceVertexIndices, -1); + _meshSharedData->_sceneToRenderingFaceVtxIds.clear(); + _meshSharedData->_sceneToRenderingFaceVtxIds.resize(numFaceVertexIndices, -1); + unsigned int sceneToRenderingFaceVtxIdsCount = 0; // Sort vertices to avoid drastically jumping indices. Cache efficiency // is important to fast rendering performance for dense mesh. for (size_t i = 0; i < numFaceVertexIndices; i++) { - const int authorFaceVtxId = faceVertexIndices[i]; + const int sceneFaceVtxId = faceVertexIndices[i]; - int renderFaceVtxId = authorToRenderFaceVtxIds[authorFaceVtxId]; + int renderFaceVtxId = _meshSharedData->_sceneToRenderingFaceVtxIds[sceneFaceVtxId]; if (renderFaceVtxId < 0) { - renderFaceVtxId = _meshSharedData._renderingToSceneFaceVtxIds.size(); - _meshSharedData._renderingToSceneFaceVtxIds.push_back(authorFaceVtxId); + renderFaceVtxId = _meshSharedData->_renderingToSceneFaceVtxIds.size(); + _meshSharedData->_renderingToSceneFaceVtxIds.push_back(sceneFaceVtxId); - authorToRenderFaceVtxIds[authorFaceVtxId] = renderFaceVtxId; + _meshSharedData->_sceneToRenderingFaceVtxIds[sceneFaceVtxId] = renderFaceVtxId; + sceneToRenderingFaceVtxIdsCount++; } newFaceVertexIndices[i] = renderFaceVtxId; } + + _meshSharedData->_sceneToRenderingFaceVtxIds.resize( + sceneToRenderingFaceVtxIdsCount); // drop any extra -1 values. } - _meshSharedData._renderingTopology = HdMeshTopology( + _meshSharedData->_renderingTopology = HdMeshTopology( topology.GetScheme(), topology.GetOrientation(), topology.GetFaceVertexCounts(), @@ -456,39 +494,46 @@ void HdVP2Mesh::Sync( // be updated only once when it gets dirty. if (HdChangeTracker::IsPrimvarDirty(*dirtyBits, id, HdTokens->points)) { const VtValue value = delegate->Get(id, HdTokens->points); - _meshSharedData._points = value.Get(); + _meshSharedData->_points = value.Get(); - const HdMeshTopology& topology = _meshSharedData._topology; - const size_t numVertices = _meshSharedData._numVertices; - - void* bufferData = _meshSharedData._positionsBuffer->acquire(numVertices, true); - if (bufferData) { - _FillPrimvarData( - static_cast(bufferData), - numVertices, - 0, - _meshSharedData._renderingToSceneFaceVtxIds, - _rprimId, - topology, - HdTokens->points, - _meshSharedData._points, - HdInterpolationVertex); - - // Capture class member for lambda - MHWRender::MVertexBuffer* const positionsBuffer - = _meshSharedData._positionsBuffer.get(); - const MString& rprimId = _rprimId; + const HdMeshTopology& topology = _meshSharedData->_topology; + const size_t numVertices = _meshSharedData->_numVertices; +#ifdef HDVP2_ENABLE_GPU_COMPUTE + _gpuNormalsEnabled = _gpuNormalsEnabled && numVertices >= _gpuNormalsComputeThreshold; +#else + _gpuNormalsEnabled = false; +#endif - _delegate->GetVP2ResourceRegistry().EnqueueCommit( - [positionsBuffer, bufferData, rprimId]() { - MProfilingScope profilingScope( - HdVP2RenderDelegate::sProfilerCategory, - MProfiler::kColorC_L2, - rprimId.asChar(), - "CommitPositions"); + if (numVertices > 0) { + void* bufferData = _meshSharedData->_positionsBuffer->acquire(numVertices, true); + if (bufferData) { + _FillPrimvarData( + static_cast(bufferData), + numVertices, + 0, + _meshSharedData->_renderingToSceneFaceVtxIds, + _rprimId, + topology, + HdTokens->points, + _meshSharedData->_points, + HdInterpolationVertex); + + // Capture class member for lambda + MHWRender::MVertexBuffer* const positionsBuffer + = _meshSharedData->_positionsBuffer.get(); + const MString& rprimId = _rprimId; + + _delegate->GetVP2ResourceRegistry().EnqueueCommit( + [positionsBuffer, bufferData, rprimId]() { + MProfilingScope profilingScope( + HdVP2RenderDelegate::sProfilerCategory, + MProfiler::kColorC_L2, + rprimId.asChar(), + "CommitPositions"); - positionsBuffer->commit(bufferData); - }); + positionsBuffer->commit(bufferData); + }); + } } } @@ -510,7 +555,7 @@ void HdVP2Mesh::Sync( | HdChangeTracker::DirtyVisibility #endif )) { - _meshSharedData._renderTag = delegate->GetRenderTag(id); + _meshSharedData->_renderTag = delegate->GetRenderTag(id); } *dirtyBits = HdChangeTracker::Clean; @@ -848,20 +893,27 @@ void HdVP2Mesh::_UpdateDrawItem( const HdRenderIndex& renderIndex = sceneDelegate->GetRenderIndex(); - const HdMeshTopology& topology = _meshSharedData._topology; - const auto& primvarSourceMap = _meshSharedData._primvarSourceMap; - const size_t numVertices = _meshSharedData._numVertices; + const HdMeshTopology& topology = _meshSharedData->_topology; + const auto& primvarSourceMap = _meshSharedData->_primvarSourceMap; + const size_t numVertices = _meshSharedData->_numVertices; // The bounding box item uses a globally-shared geometry data therefore it // doesn't need to extract index data from topology. Points use non-indexed // draw. const bool isBBoxItem = (renderItem->drawMode() == MHWRender::MGeometry::kBoundingBox); const bool isPointSnappingItem = (renderItem->primitive() == MHWRender::MGeometry::kPoints); +#ifdef HDVP2_ENABLE_GPU_OSD + const bool isLineItem = (renderItem->primitive() == MHWRender::MGeometry::kLines); + // when we do OSD we don't bother creating indexing until after we have a smooth mesh + const bool requiresIndexUpdate + = !isBBoxItem && !isPointSnappingItem && (!_gpuOSDEnabled || isLineItem); +#else const bool requiresIndexUpdate = !isBBoxItem && !isPointSnappingItem; +#endif // Prepare index buffer. if (requiresIndexUpdate && (itemDirtyBits & HdChangeTracker::DirtyTopology)) { - const HdMeshTopology& topologyToUse = _meshSharedData._renderingTopology; + const HdMeshTopology& topologyToUse = _meshSharedData->_renderingTopology; if (desc.geomStyle == HdMeshGeomStyleHull) { HdMeshUtil meshUtil(&topologyToUse, id); @@ -871,19 +923,31 @@ void HdVP2Mesh::_UpdateDrawItem( const int numIndex = trianglesFaceVertexIndices.size() * 3; - stateToCommit._indexBufferData - = static_cast(drawItemData._indexBuffer->acquire(numIndex, true)); - - memcpy( - stateToCommit._indexBufferData, - trianglesFaceVertexIndices.data(), - numIndex * sizeof(int)); + stateToCommit._indexBufferData = numIndex > 0 + ? static_cast(drawItemData._indexBuffer->acquire(numIndex, true)) + : nullptr; + if (stateToCommit._indexBufferData) + memcpy( + stateToCommit._indexBufferData, + trianglesFaceVertexIndices.data(), + numIndex * sizeof(int)); + +#ifdef HDVP2_ENABLE_GPU_COMPUTE + if (requireSmoothNormals && (_gpuNormalsEnabled || _gpuOSDEnabled)) { + // these function only do something if HDVP2_ENABLE_GPU_COMPUTE or + // HDVP2_ENABLE_GPU_OSD is defined + _CreateViewportCompute(*drawItem); +#ifdef HDVP2_ENABLE_GPU_OSD + _CreateOSDTables(); +#endif + } +#endif } else if (desc.geomStyle == HdMeshGeomStyleHullEdgeOnly) { unsigned int numIndex = _GetNumOfEdgeIndices(topologyToUse); - stateToCommit._indexBufferData - = static_cast(drawItemData._indexBuffer->acquire(numIndex, true)); - + stateToCommit._indexBufferData = numIndex + ? static_cast(drawItemData._indexBuffer->acquire(numIndex, true)) + : nullptr; _FillEdgeIndices(stateToCommit._indexBufferData, topologyToUse); } } @@ -910,23 +974,36 @@ void HdVP2Mesh::_UpdateDrawItem( if (!normals.empty()) { prepareNormals = ((itemDirtyBits & HdChangeTracker::DirtyNormals) != 0); } else if (requireSmoothNormals && (itemDirtyBits & DirtySmoothNormals)) { - // note: normals gets dirty when points are marked as dirty, - // at change tracker. - // HdC_TODO: move the normals computation to GPU to save expensive - // computation and buffer transfer. - Hd_VertexAdjacencySharedPtr adjacency(new Hd_VertexAdjacency()); - HdBufferSourceSharedPtr adjacencyComputation - = adjacency->GetSharedAdjacencyBuilderComputation(&topology); - adjacencyComputation->Resolve(); // IS the adjacency updated now? - - // Only the points referenced by the topology are used to compute - // smooth normals. - normals = Hd_SmoothNormals::ComputeSmoothNormals( - adjacency.get(), _meshSharedData._points.size(), _meshSharedData._points.cdata()); - - interp = HdInterpolationVertex; - - prepareNormals = !normals.empty(); +#ifdef HDVP2_ENABLE_GPU_COMPUTE + if (_gpuNormalsEnabled || _gpuOSDEnabled) { + if (!_meshSharedData->_viewportCompute) { + _CreateViewportCompute(*drawItem); +#ifdef HDVP2_ENABLE_GPU_OSD + _CreateOSDTables(); +#endif + } + _meshSharedData->_viewportCompute->setNormalVertexBufferGPUDirty(); + prepareNormals = false; + } else +#endif + { + // note: normals gets dirty when points are marked as dirty, + // at change tracker. + Hd_VertexAdjacencySharedPtr adjacency(new Hd_VertexAdjacency()); + HdBufferSourceSharedPtr adjacencyComputation + = adjacency->GetSharedAdjacencyBuilderComputation(&topology); + adjacencyComputation->Resolve(); // IS the adjacency updated now? + + // Only the points referenced by the topology are used to compute + // smooth normals. + normals = Hd_SmoothNormals::ComputeSmoothNormals( + adjacency.get(), + _meshSharedData->_points.size(), + _meshSharedData->_points.cdata()); + interp = HdInterpolationVertex; + + prepareNormals = !normals.empty(); + } } if (prepareNormals) { @@ -937,13 +1014,15 @@ void HdVP2Mesh::_UpdateDrawItem( drawItemData._normalsBuffer.reset(new MHWRender::MVertexBuffer(vbDesc)); } - void* bufferData = drawItemData._normalsBuffer->acquire(numVertices, true); + void* bufferData = numVertices > 0 + ? drawItemData._normalsBuffer->acquire(numVertices, true) + : nullptr; if (bufferData) { _FillPrimvarData( static_cast(bufferData), numVertices, 0, - _meshSharedData._renderingToSceneFaceVtxIds, + _meshSharedData->_renderingToSceneFaceVtxIds, _rprimId, topology, HdTokens->normals, @@ -1040,7 +1119,9 @@ void HdVP2Mesh::_UpdateDrawItem( drawItemData._colorBuffer.reset(new MHWRender::MVertexBuffer(vbDesc)); } - void* bufferData = drawItemData._colorBuffer->acquire(numVertices, true); + void* bufferData = numVertices > 0 + ? drawItemData._colorBuffer->acquire(numVertices, true) + : nullptr; // Fill color and opacity into the float4 color stream. if (bufferData) { @@ -1048,7 +1129,7 @@ void HdVP2Mesh::_UpdateDrawItem( static_cast(bufferData), numVertices, 0, - _meshSharedData._renderingToSceneFaceVtxIds, + _meshSharedData->_renderingToSceneFaceVtxIds, _rprimId, topology, HdTokens->displayColor, @@ -1059,7 +1140,7 @@ void HdVP2Mesh::_UpdateDrawItem( static_cast(bufferData), numVertices, 3, - _meshSharedData._renderingToSceneFaceVtxIds, + _meshSharedData->_renderingToSceneFaceVtxIds, _rprimId, topology, HdTokens->displayOpacity, @@ -1118,13 +1199,13 @@ void HdVP2Mesh::_UpdateDrawItem( } if (buffer) { - bufferData = buffer->acquire(numVertices, true); + bufferData = numVertices > 0 ? buffer->acquire(numVertices, true) : nullptr; if (bufferData) { _FillPrimvarData( static_cast(bufferData), numVertices, 0, - _meshSharedData._renderingToSceneFaceVtxIds, + _meshSharedData->_renderingToSceneFaceVtxIds, _rprimId, topology, token, @@ -1142,13 +1223,13 @@ void HdVP2Mesh::_UpdateDrawItem( } if (buffer) { - bufferData = buffer->acquire(numVertices, true); + bufferData = numVertices > 0 ? buffer->acquire(numVertices, true) : nullptr; if (bufferData) { _FillPrimvarData( static_cast(bufferData), numVertices, 0, - _meshSharedData._renderingToSceneFaceVtxIds, + _meshSharedData->_renderingToSceneFaceVtxIds, _rprimId, topology, token, @@ -1166,13 +1247,13 @@ void HdVP2Mesh::_UpdateDrawItem( } if (buffer) { - bufferData = buffer->acquire(numVertices, true); + bufferData = numVertices > 0 ? buffer->acquire(numVertices, true) : nullptr; if (bufferData) { _FillPrimvarData( static_cast(bufferData), numVertices, 0, - _meshSharedData._renderingToSceneFaceVtxIds, + _meshSharedData->_renderingToSceneFaceVtxIds, _rprimId, topology, token, @@ -1190,13 +1271,13 @@ void HdVP2Mesh::_UpdateDrawItem( } if (buffer) { - bufferData = buffer->acquire(numVertices, true); + bufferData = numVertices > 0 ? buffer->acquire(numVertices, true) : nullptr; if (bufferData) { _FillPrimvarData( static_cast(bufferData), numVertices, 0, - _meshSharedData._renderingToSceneFaceVtxIds, + _meshSharedData->_renderingToSceneFaceVtxIds, _rprimId, topology, token, @@ -1403,7 +1484,7 @@ void HdVP2Mesh::_UpdateDrawItem( & (HdChangeTracker::DirtyVisibility | HdChangeTracker::DirtyRenderTag | HdChangeTracker::DirtyPoints | HdChangeTracker::DirtyExtent | DirtySelectionHighlight))) { - bool enable = drawItem->GetVisible() && !_meshSharedData._points.empty() + bool enable = drawItem->GetVisible() && !_meshSharedData->_points.empty() && !instancerWithNoInstances; if (isDedicatedSelectionHighlightItem) { @@ -1414,7 +1495,7 @@ void HdVP2Mesh::_UpdateDrawItem( enable = enable && !range.IsEmpty(); } - enable = enable && drawScene.DrawRenderTag(_meshSharedData._renderTag); + enable = enable && drawScene.DrawRenderTag(_meshSharedData->_renderTag); if (drawItemData._enabled != enable) { drawItemData._enabled = enable; @@ -1431,7 +1512,7 @@ void HdVP2Mesh::_UpdateDrawItem( drawItem->ResetDirtyBits(); // Capture the valid position buffer and index buffer - MHWRender::MVertexBuffer* positionsBuffer = _meshSharedData._positionsBuffer.get(); + MHWRender::MVertexBuffer* positionsBuffer = _meshSharedData->_positionsBuffer.get(); MHWRender::MIndexBuffer* indexBuffer = drawItemData._indexBuffer.get(); if (isBBoxItem) { @@ -1611,6 +1692,145 @@ void HdVP2Mesh::_HideAllDrawItems(const TfToken& reprToken) } } +#ifdef HDVP2_ENABLE_GPU_COMPUTE +/*! \brief Save topology information for later GPGPU evaluation + + This function pulls topology and UV data from the scene delegate and save that + information to be used as an input to the normal calculation later. +*/ +void HdVP2Mesh::_CreateViewportCompute(const HdVP2DrawItem& drawItem) +{ + if (_meshSharedData->_viewportCompute) { + // I can't handle multiple draw items that require normals + TF_VERIFY(_meshSharedData->_viewportCompute->verifyDrawItem(drawItem)); + } else { + _meshSharedData->_viewportCompute + = MSharedPtr::make<>(_meshSharedData, &drawItem); + MHWRender::MRenderItem* renderItem = drawItem.GetRenderItem(); + renderItem->addViewportComputeItem(_meshSharedData->_viewportCompute); + } +} +#endif + +#ifdef HDVP2_ENABLE_GPU_OSD +void HdVP2Mesh::_CreateOSDTables() +{ +#if defined(DO_CPU_OSD) || defined(DO_OPENGL_OSD) + if (!_gpuOSDEnabled) + return; + + assert(_meshSharedData->_viewportCompute); + MProfilingScope subProfilingScope( + HdVP2RenderDelegate::sProfilerCategory, MProfiler::kColorD_L2, "createOSDTables"); + + // create topology refiner + PxOsdTopologyRefinerSharedPtr refiner; + + OpenSubdiv::Far::StencilTable const* vertexStencils = nullptr; + OpenSubdiv::Far::StencilTable const* varyingStencils = nullptr; + OpenSubdiv::Far::PatchTable const* patchTable = nullptr; + + HdMeshTopology* topology + = &_meshSharedData->_renderingTopology; // TODO: something with _topology? + + // for empty topology, we don't need to refine anything. + // but still need to return the typed buffer for codegen + if (topology->GetFaceVertexCounts().size() == 0) { + // leave refiner empty + } else { + refiner = PxOsdRefinerFactory::Create( + topology->GetPxOsdMeshTopology(), TfToken(_meshSharedData->_renderTag.GetText())); + } + + if (refiner) { + OpenSubdiv::Far::PatchTableFactory::Options patchOptions( + _meshSharedData->_viewportCompute->level); + if (_meshSharedData->_viewportCompute->adaptive) { + patchOptions.endCapType + = OpenSubdiv::Far::PatchTableFactory::Options::ENDCAP_BSPLINE_BASIS; +#if OPENSUBDIV_VERSION_NUMBER >= 30400 + // Improve fidelity when refining to limit surface patches + // These options supported since v3.1.0 and v3.2.0 respectively. + patchOptions.useInfSharpPatch = true; + patchOptions.generateLegacySharpCornerPatches = false; +#endif + } + + // split trace scopes. + { + MProfilingScope subProfilingScope( + HdVP2RenderDelegate::sProfilerCategory, MProfiler::kColorD_L2, "refine"); + if (_meshSharedData->_viewportCompute->adaptive) { + OpenSubdiv::Far::TopologyRefiner::AdaptiveOptions adaptiveOptions( + _meshSharedData->_viewportCompute->level); +#if OPENSUBDIV_VERSION_NUMBER >= 30400 + adaptiveOptions = patchOptions.GetRefineAdaptiveOptions(); +#endif + refiner->RefineAdaptive(adaptiveOptions); + } else { + refiner->RefineUniform(_meshSharedData->_viewportCompute->level); + } + } +#define GENERATE_SOURCE_TABLES +#ifdef GENERATE_SOURCE_TABLES + { + MProfilingScope subProfilingScope( + HdVP2RenderDelegate::sProfilerCategory, MProfiler::kColorD_L2, "stencilFactory"); + OpenSubdiv::Far::StencilTableFactory::Options options; + options.generateOffsets = true; + options.generateIntermediateLevels = _meshSharedData->_viewportCompute->adaptive; + options.interpolationMode = OpenSubdiv::Far::StencilTableFactory::INTERPOLATE_VERTEX; + vertexStencils = OpenSubdiv::Far::StencilTableFactory::Create(*refiner, options); + + options.interpolationMode = OpenSubdiv::Far::StencilTableFactory::INTERPOLATE_VARYING; + varyingStencils = OpenSubdiv::Far::StencilTableFactory::Create(*refiner, options); + } + { + MProfilingScope subProfilingScope( + HdVP2RenderDelegate::sProfilerCategory, MProfiler::kColorD_L2, "patchFactory"); + patchTable = OpenSubdiv::Far::PatchTableFactory::Create(*refiner, patchOptions); + } +#else + // grab the values we need from the refiner. + const OpenSubdiv::Far::TopologyLevel& refinedLevel + = refiner->GetLevel(refiner->GetMaxLevel()); + size_t indexLength = refinedLevel.GetNumFaces() + * 4; // i know it is quads but not always? can we do this more safely? + size_t vertexLength = GetNumVerticesTotal(); + // save these values and use them to create the updated geometry index mapping. +#endif + } +#ifdef GENERATE_SOURCE_TABLES + // merge endcap + if (patchTable && patchTable->GetLocalPointStencilTable()) { + // append stencils + if (OpenSubdiv::Far::StencilTable const* vertexStencilsWithLocalPoints + = OpenSubdiv::Far::StencilTableFactory::AppendLocalPointStencilTable( + *refiner, vertexStencils, patchTable->GetLocalPointStencilTable())) { + delete vertexStencils; + vertexStencils = vertexStencilsWithLocalPoints; + } + if (OpenSubdiv::Far::StencilTable const* varyingStencilsWithLocalPoints + = OpenSubdiv::Far::StencilTableFactory::AppendLocalPointStencilTable( + *refiner, varyingStencils, patchTable->GetLocalPointStencilTable())) { + delete varyingStencils; + varyingStencils = varyingStencilsWithLocalPoints; + } + } + + // save values for the next loop + _meshSharedData->_viewportCompute->vertexStencils.reset(vertexStencils); + _meshSharedData->_viewportCompute->varyingStencils.reset(varyingStencils); + _meshSharedData->_viewportCompute->patchTable.reset(patchTable); +#endif + + // if there is a sourceMeshSharedData it should have entries for every vertex in that geometry + // source. + +#endif +} +#endif + /*! \brief Update _primvarSourceMap, our local cache of raw primvar data. This function pulls data from the scene delegate, but defers processing. @@ -1637,10 +1857,10 @@ void HdVP2Mesh::_UpdatePrimvarSources( if (std::find(begin, end, pv.name) != end) { if (HdChangeTracker::IsPrimvarDirty(dirtyBits, id, pv.name)) { const VtValue value = GetPrimvar(sceneDelegate, pv.name); - _meshSharedData._primvarSourceMap[pv.name] = { value, interp }; + _meshSharedData->_primvarSourceMap[pv.name] = { value, interp }; } } else { - _meshSharedData._primvarSourceMap.erase(pv.name); + _meshSharedData->_primvarSourceMap.erase(pv.name); } } } diff --git a/lib/mayaUsd/render/vp2RenderDelegate/mesh.h b/lib/mayaUsd/render/vp2RenderDelegate/mesh.h index e76c633e32..9249a64156 100644 --- a/lib/mayaUsd/render/vp2RenderDelegate/mesh.h +++ b/lib/mayaUsd/render/vp2RenderDelegate/mesh.h @@ -16,7 +16,7 @@ #ifndef HD_VP2_MESH #define HD_VP2_MESH -#include +#include "meshViewportCompute.h" #include #include @@ -64,6 +64,10 @@ struct HdVP2MeshSharedData //! The number of vertices in each vertex buffer. size_t _numVertices; + //! An array to store a rendering face vertex index for each original scene + //! face vertex index. + std::vector _sceneToRenderingFaceVtxIds; + //! A local cache of primvar scene data. "data" is a copy-on-write handle to //! the actual primvar buffer, and "interpolation" is the interpolation mode //! to be used. @@ -78,6 +82,9 @@ struct HdVP2MeshSharedData //! Render tag of the Rprim. TfToken _renderTag; +#ifdef HDVP2_ENABLE_GPU_COMPUTE + MSharedPtr _viewportCompute; +#endif }; /*! \brief VP2 representation of poly-mesh object. @@ -117,6 +124,13 @@ class HdVP2Mesh final : public HdMesh void _UpdateRepr(HdSceneDelegate*, const TfToken&); +#ifdef HDVP2_ENABLE_GPU_COMPUTE + void _CreateViewportCompute(const HdVP2DrawItem& drawItem); +#endif +#ifdef HDVP2_ENABLE_GPU_OSD + void _CreateOSDTables(); +#endif + void _UpdateDrawItem( HdSceneDelegate*, HdVP2DrawItem*, @@ -137,6 +151,8 @@ class HdVP2Mesh final : public HdMesh MHWRender::MRenderItem* _CreatePointsRenderItem(const MString& name) const; MHWRender::MRenderItem* _CreateBoundingBoxRenderItem(const MString& name) const; + static void _InitGPUCompute(); + //! Custom dirty bits used by this mesh enum DirtyBits : HdDirtyBits { @@ -156,10 +172,25 @@ class HdVP2Mesh final : public HdMesh 0 }; //!< Storage for custom dirty bits. See _PropagateDirtyBits for details. const MString _rprimId; //!< Rprim id cached as a maya string for easier debugging and profiling - HdVP2MeshSharedData _meshSharedData; //!< Shared data for all draw items of the Rprim + std::shared_ptr + _meshSharedData; //!< Shared data for all draw items of the Rprim //! Selection status of the Rprim HdVP2SelectionStatus _selectionStatus { kUnselected }; + + //! Control GPU compute behavior + //! Having these in place even without HDVP2_ENABLE_GPU_COMPUTE or HDVP2_ENABLE_GPU_OSD defined + //! makes the expressions using these variables much simpler + bool _gpuNormalsEnabled { true }; //!< Use GPU Compute for normal calculation, only used when + //!< HDVP2_ENABLE_GPU_COMPUTE is defined + bool _gpuOSDEnabled { +#ifdef HDVP2_ENABLE_GPU_OSD + true +#else + false +#endif + }; //!< Use GPU Compute for OSD, only used when HDVP2_ENABLE_GPU_OSD is defined + static int _gpuNormalsComputeThreshold; }; PXR_NAMESPACE_CLOSE_SCOPE diff --git a/lib/mayaUsd/render/vp2RenderDelegate/meshViewportCompute.cpp b/lib/mayaUsd/render/vp2RenderDelegate/meshViewportCompute.cpp new file mode 100644 index 0000000000..5cf0b049c5 --- /dev/null +++ b/lib/mayaUsd/render/vp2RenderDelegate/meshViewportCompute.cpp @@ -0,0 +1,1335 @@ +// +// Copyright 2020 Autodesk +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +#include "meshViewportCompute.h" + +#include "mesh.h" +#include "render_delegate.h" + +#include + +#include + +#include + +#ifdef HDVP2_ENABLE_GPU_COMPUTE + +PXR_NAMESPACE_OPEN_SCOPE + +namespace { + +std::string _GetResourcePath(const std::string& resource) +{ + static PlugPluginPtr plugin + = PlugRegistry::GetInstance().GetPluginWithName("mayaUsd_ComputeShaders"); + if (!TF_VERIFY(plugin, "Could not get plugin\n")) { + return std::string(); + } + + const std::string path = PlugFindPluginResource(plugin, resource); + TF_VERIFY(!path.empty(), "Cound not find resource: %s\n", resource.c_str()); + + return path; +} + +template class LambdaTask : public tbb::task +{ +public: + LambdaTask(const F& func) + : _func(func) + { + } + +private: + tbb::task* execute() + { + _func(); + return nullptr; + } + + F _func; +}; + +template void EnqueueLambdaTask(const F& f) +{ + tbb::task::enqueue(*new (tbb::task::allocate_root()) LambdaTask(f)); +} +} // namespace + +std::once_flag MeshViewportCompute::_compileProgramOnce; +PxrMayaGLSLProgram* MeshViewportCompute::_computeNormalsProgram; + +bool MeshViewportCompute::verifyDrawItem(const HdVP2DrawItem& drawItem) const +{ + return &drawItem == _drawItem; +} + +void MeshViewportCompute::openGLErrorCheck() +{ +//#define DO_OPENGL_ERROR_CHECK +#ifdef DO_OPENGL_ERROR_CHECK + glFinish(); + // Check for errors. + GLenum err = glGetError(); + TF_VERIFY(GL_NO_ERROR == err); +#endif +} + +void MeshViewportCompute::setTopologyDirty() +{ + _topologyDirty = true; + _executed = false; + _vertexCount = 0; + if (_consolidatedCompute) { + _consolidatedCompute->_topologyDirty = true; + _consolidatedCompute->_vertexCount = 0; + } +} + +void MeshViewportCompute::setAdjacencyBufferGPUDirty() +{ + _adjacencyBufferGPUDirty = true; + _executed = false; + if (_consolidatedCompute) + _consolidatedCompute->_adjacencyBufferGPUDirty = true; +} + +void MeshViewportCompute::setNormalVertexBufferGPUDirty() +{ + _normalVertexBufferGPUDirty = true; + _executed = false; + if (_consolidatedCompute) + _consolidatedCompute->_normalVertexBufferGPUDirty = true; +} + +void MeshViewportCompute::reset() +{ + /* don't clear _meshSharedData, it's either an input from the external HdVP2Mesh + or it has been created explicitly for this consolidated viewport compute. + */ + _drawItem = nullptr; + _executed = false; + _sourcesExecuted = false; + + _consolidatedCompute.reset(); + _geometryIndexMapping.reset(); + _vertexCount = 0; + + _adjacencyBufferSize = 0; + _adjacencyBufferCPU.reset(); + _adjacencyBufferGPU.reset(); + _renderingToSceneFaceVtxIdsGPU.reset(); + _sceneToRenderingFaceVtxIdsGPU.reset(); + + fRenderGeom = nullptr; + + _positionVertexBufferGPU = nullptr; + _normalVertexBufferGPU = nullptr; + _colorVertexBufferGPU = nullptr; + + _topologyDirty = true; + _adjacencyBufferGPUDirty = true; + _normalVertexBufferGPUDirty = true; + +#if defined(DO_CPU_OSD) || defined(DO_OPENGL_OSD) + // OSD information + _vertexStencils.reset(); + _varyingStencils.reset(); + _patchTable.reset(); +#endif +} + +bool MeshViewportCompute::hasExecuted() const { return _executed; } + +void MeshViewportCompute::findConsolidationMapping(MRenderItem& renderItem) +{ + // If the item is not consolidated clear any stale consolidatedCompute information + if (!renderItem.isConsolidated()) { + _consolidatedCompute.reset(); + _sourcesExecuted = false; + return; + } + + MProfilingScope subProfilingScope( + HdVP2RenderDelegate::sProfilerCategory, + MProfiler::kColorD_L2, + "MeshViewportCompute:MGeometryIndexMapping"); + + if (_geometryIndexMapping) { + if (renderItem.isSourceIndexMappingValid(*_geometryIndexMapping.get())) + return; + } + + reset(); + _geometryIndexMapping.reset(new MHWRender::MGeometryIndexMapping()); + renderItem.sourceIndexMapping(*_geometryIndexMapping.get()); + + for (int i = 0; i < _geometryIndexMapping->geometryCount(); i++) { + MRenderItem* sourceItem = _geometryIndexMapping->sourceRenderItem(i); + MSharedPtr sourceViewportComputeItem + = MSharedPtr::dynamic_pointer_cast<>( + sourceItem->viewportComputeItem()); + sourceViewportComputeItem->_consolidatedCompute + = MSharedPtr::dynamic_pointer_cast<>( + renderItem.viewportComputeItem()); + TF_VERIFY(i == 0 || _sourcesExecuted == sourceViewportComputeItem->hasExecuted()); + _sourcesExecuted = sourceViewportComputeItem->hasExecuted(); + } +} + +HdMeshTopology& getSceneTopology(HdVP2MeshSharedData* meshSharedData) +{ + return meshSharedData->_topology; +} + +HdMeshTopology& getRenderingTopology(HdVP2MeshSharedData* meshSharedData) +{ + return meshSharedData->_renderingTopology; +} + +template +void MeshViewportCompute::createConsolidatedTopology(TopologyAccessor getTopology) +{ + if (!_topologyDirty && getTopology(_meshSharedData.get()).GetNumPoints() > 0) + return; + _topologyDirty = false; + + MProfilingScope subProfilingScope( + HdVP2RenderDelegate::sProfilerCategory, + MProfiler::kColorD_L2, + "MeshViewportCompute:createConsolidatedTopology"); + + size_t faceVertexCountsSize = 0; + size_t faceVertexIndicesSize = 0; + size_t holeIndicesSize = 0; + size_t sceneToRenderingFaceVtxIdsCount = 0; + size_t vertexCount = 0; + + // figure out the size of the consolidated mesh topology + if (_geometryIndexMapping.get()) { + for (int i = 0; i < _geometryIndexMapping->geometryCount(); i++) { + MRenderItem* sourceItem = _geometryIndexMapping->sourceRenderItem(i); + + MSharedPtr sourceViewportComputeItem + = MSharedPtr::dynamic_pointer_cast<>( + sourceItem->viewportComputeItem()); + HdVP2MeshSharedData* sourceMeshSharedData + = sourceViewportComputeItem->_meshSharedData.get(); + TF_VERIFY(sourceMeshSharedData); + HdMeshTopology& sourceTopology = getTopology(sourceMeshSharedData); + + faceVertexCountsSize += sourceTopology.GetNumFaces(); + faceVertexIndicesSize += sourceTopology.GetFaceVertexIndices().size(); + holeIndicesSize += sourceTopology.GetHoleIndices().size(); + // if there is a sourceMeshSharedData it should have entries for every vertex in that + // geometry source. + vertexCount += sourceTopology.GetNumPoints(); + sceneToRenderingFaceVtxIdsCount += sourceTopology.GetNumPoints(); + } + } + + TF_VERIFY(faceVertexCountsSize > 0 || !_geometryIndexMapping.get()); + + // check to see if it is actually a consolidated geometry that needs a consolidated adjacency + // buffer + if (faceVertexCountsSize > 0) { + TfToken scheme = PxOsdOpenSubdivTokens->catmullClark; + TfToken orientation = PxOsdOpenSubdivTokens->rightHanded; + VtIntArray faceVertexCounts; + VtIntArray faceVertexIndices; + VtIntArray holeIndices; + int refineLevel = 0; + + faceVertexCounts.reserve(faceVertexCountsSize); + faceVertexIndices.reserve(faceVertexIndicesSize); + holeIndices.reserve(holeIndicesSize); + _meshSharedData->_renderingToSceneFaceVtxIds.clear(); + _meshSharedData->_renderingToSceneFaceVtxIds.reserve(vertexCount); + _meshSharedData->_sceneToRenderingFaceVtxIds.clear(); + _meshSharedData->_sceneToRenderingFaceVtxIds.reserve(sceneToRenderingFaceVtxIdsCount); + + for (int sourceIndex = 0; sourceIndex < _geometryIndexMapping->geometryCount(); + sourceIndex++) { + MRenderItem* sourceItem = _geometryIndexMapping->sourceRenderItem(sourceIndex); + int consolidatedBufferVertexOffset = _geometryIndexMapping->vertexStart(sourceIndex); + MSharedPtr sourceViewportComputeItem + = MSharedPtr::dynamic_pointer_cast<>( + sourceItem->viewportComputeItem()); + HdVP2MeshSharedData* sourceMeshSharedData + = sourceViewportComputeItem->_meshSharedData.get(); + TF_VERIFY(sourceMeshSharedData); + + HdMeshTopology& sourceTopology = getTopology(sourceMeshSharedData); + + TF_VERIFY(sourceTopology.GetScheme() == scheme); + TF_VERIFY(sourceTopology.GetOrientation() == orientation); + TF_VERIFY(sourceTopology.GetRefineLevel() == refineLevel); + + VtIntArray const& sourceFaceVertexCounts = sourceTopology.GetFaceVertexCounts(); + for (int faceId = 0; faceId < sourceFaceVertexCounts.size(); faceId++) { + faceVertexCounts.push_back(sourceFaceVertexCounts[faceId]); + } + + VtIntArray const& sourceFaceVertexIndices = sourceTopology.GetFaceVertexIndices(); + for (int faceVertexId = 0; faceVertexId < sourceFaceVertexIndices.size(); + faceVertexId++) { + faceVertexIndices.push_back( + sourceFaceVertexIndices[faceVertexId] + consolidatedBufferVertexOffset); + } + + VtIntArray const& sourceHoleIndices = sourceTopology.GetHoleIndices(); + for (int faceId = 0; faceId < sourceHoleIndices.size(); faceId++) { + holeIndices.push_back( + sourceHoleIndices[faceId] + consolidatedBufferVertexOffset); // untested? + } + + for (int idx = 0; idx < sourceMeshSharedData->_renderingToSceneFaceVtxIds.size(); + idx++) { + _meshSharedData->_renderingToSceneFaceVtxIds.push_back( + sourceMeshSharedData->_renderingToSceneFaceVtxIds[idx] + + consolidatedBufferVertexOffset); + } + + // add padding to _sceneToRenderingFaceVtxIds because the scene IDs start at + // consolidatedBufferVertexOffset + while (consolidatedBufferVertexOffset + > _meshSharedData->_sceneToRenderingFaceVtxIds.size()) { + _meshSharedData->_sceneToRenderingFaceVtxIds.push_back(-1); + } + + for (int idx = 0; idx < sourceMeshSharedData->_sceneToRenderingFaceVtxIds.size(); + idx++) { + _meshSharedData->_sceneToRenderingFaceVtxIds.push_back( + sourceMeshSharedData->_sceneToRenderingFaceVtxIds[idx] + + consolidatedBufferVertexOffset); + } + } + + HdMeshTopology consolidatedTopology( + scheme, orientation, faceVertexCounts, faceVertexIndices, holeIndices, refineLevel); + getTopology(_meshSharedData.get()) = consolidatedTopology; + } else { + // It is not a consolidated draw item. There is only a single topology so we can use that + // directly In fact it is already there, nothing to do! + vertexCount = _meshSharedData->_renderingTopology.GetNumPoints(); + + // the non-consolidated topology doesn't get updated with _sceneToRenderingFaceVtxIds, I + // guess we have to do it in the kernel? Or have different storage for a topology we modify. + + // Can't modify _meshSharedData if we are not consolidated! + } + + TF_VERIFY(vertexCount == 0 || vertexCount == _vertexCount); + _vertexCount = vertexCount; +} + +void MeshViewportCompute::createConsolidatedAdjacency() +{ + if (_adjacencyBufferSize > 0) + return; + + MProfilingScope subProfilingScope( + HdVP2RenderDelegate::sProfilerCategory, + MProfiler::kColorD_L2, + "MeshViewportCompute:createConsolidatedAdjacency"); + + bool isConsolidated = nullptr != _geometryIndexMapping.get(); + + Hd_VertexAdjacencySharedPtr adjacency(new Hd_VertexAdjacency()); + HdBufferSourceSharedPtr adjacencyComputation + = adjacency->GetSharedAdjacencyBuilderComputation(&_meshSharedData->_topology); + adjacencyComputation->Resolve(); + + const VtIntArray& adjacencyTable = adjacency->GetAdjacencyTable(); + size_t adjacencyBufferSize = adjacencyTable.size(); + int* adjCopy = new int[adjacencyBufferSize]; + memcpy(adjCopy, adjacencyTable.data(), adjacencyBufferSize * sizeof(int)); + _adjacencyBufferCPU.reset( + adjCopy); // make sure this is really using the const version and doesn't copy the data. + _adjacencyBufferSize = adjacencyTable.size(); +} + +void MeshViewportCompute::findRenderGeometry(MRenderItem& renderItem) +{ + MProfilingScope subProfilingScope( + HdVP2RenderDelegate::sProfilerCategory, + MProfiler::kColorD_L2, + "MeshViewportCompute:findRenderGeometry"); + + MGeometry* renderGeometry = renderItem.geometry(); + if (fRenderGeom && (fRenderGeom != renderGeometry)) { + _positionVertexBufferGPU = nullptr; + _normalVertexBufferGPU = nullptr; + _colorVertexBufferGPU = nullptr; + fRenderGeom = nullptr; + } + fRenderGeom = renderGeometry; +} + +void MeshViewportCompute::createConsolidatedOSDTables(MRenderItem& renderItem) +{ +#if defined(DO_CPU_OSD) || defined(DO_OPENGL_OSD) + + MProfilingScope subProfilingScope( + HdVP2RenderDelegate::sProfilerCategory, + MProfiler::kColorD_L2, + "MeshViewportCompute:consolidatedOSDTables"); + + // refine + // and + // create stencil/patch table + { + OpenSubdiv::Far::StencilTable const* consolidatedVertexStencils = nullptr; + OpenSubdiv::Far::StencilTable const* consolidatedVaryingStencils = nullptr; + OpenSubdiv::Far::PatchTable const* consolidatedPatchTable = nullptr; + + // if this is a consolidated item then we won't have any stencils or tables. + // If this is an unconsolidated item then we'll already have the tables we need. + if (!_vertexStencils || !_varyingStencils || !_patchTable) { + MProfilingScope subsubProfilingScope( + HdVP2RenderDelegate::sProfilerCategory, + MProfiler::kColorD_L2, + "MeshViewportCompute:createConsolidatedMeshTables"); + + // create topology refiner + PxOsdTopologyRefinerSharedPtr refiner; + // for empty topology, we don't need to refine anything. + // but still need to return the typed buffer for codegen + if (_meshSharedData->_renderingTopology.GetFaceVertexCounts().size() == 0) { + // leave refiner empty + } else { + refiner = PxOsdRefinerFactory::Create( + _meshSharedData->_renderingTopology.GetPxOsdMeshTopology(), + TfToken(_meshSharedData->_renderTag.GetText())); + } + + if (refiner) { + OpenSubdiv::Far::PatchTableFactory::Options patchOptions(_level); + if (_adaptive) { + patchOptions.endCapType + = OpenSubdiv::Far::PatchTableFactory::Options::ENDCAP_BSPLINE_BASIS; +#if OPENSUBDIV_VERSION_NUMBER >= 30400 + // Improve fidelity when refining to limit surface patches + // These options supported since v3.1.0 and v3.2.0 respectively. + patchOptions.useInfSharpPatch = true; + patchOptions.generateLegacySharpCornerPatches = false; +#endif + } + + // split trace scopes. + { + MProfilingScope subsubsubProfilingScope( + HdVP2RenderDelegate::sProfilerCategory, + MProfiler::kColorD_L2, + "MeshViewportCompute:refine"); + if (_adaptive) { + OpenSubdiv::Far::TopologyRefiner::AdaptiveOptions adaptiveOptions(_level); +#if OPENSUBDIV_VERSION_NUMBER >= 30400 + adaptiveOptions = patchOptions.GetRefineAdaptiveOptions(); +#endif + refiner->RefineAdaptive(adaptiveOptions); + } else { + refiner->RefineUniform(_level); + } + } + { + MProfilingScope subsubsubProfilingScope( + HdVP2RenderDelegate::sProfilerCategory, + MProfiler::kColorD_L2, + "MeshViewportCompute:stencilFactory"); + OpenSubdiv::Far::StencilTableFactory::Options options; + options.generateOffsets = true; + options.generateIntermediateLevels = _adaptive; + options.interpolationMode + = OpenSubdiv::Far::StencilTableFactory::INTERPOLATE_VERTEX; + consolidatedVertexStencils + = OpenSubdiv::Far::StencilTableFactory::Create(*refiner, options); + + options.interpolationMode + = OpenSubdiv::Far::StencilTableFactory::INTERPOLATE_VARYING; + consolidatedVaryingStencils + = OpenSubdiv::Far::StencilTableFactory::Create(*refiner, options); + } + { + MProfilingScope subsubsubProfilingScope( + HdVP2RenderDelegate::sProfilerCategory, + MProfiler::kColorD_L2, + "MeshViewportCompute:patchFactory"); + consolidatedPatchTable + = OpenSubdiv::Far::PatchTableFactory::Create(*refiner, patchOptions); + } + } + + // merge endcap + if (consolidatedPatchTable && consolidatedPatchTable->GetLocalPointStencilTable()) { + // append stencils + if (OpenSubdiv::Far::StencilTable const* vertexStencilsWithLocalPoints + = OpenSubdiv::Far::StencilTableFactory::AppendLocalPointStencilTable( + *refiner, + consolidatedVertexStencils, + consolidatedPatchTable->GetLocalPointStencilTable())) { + delete consolidatedVertexStencils; + consolidatedVertexStencils = vertexStencilsWithLocalPoints; + } + if (OpenSubdiv::Far::StencilTable const* varyingStencilsWithLocalPoints + = OpenSubdiv::Far::StencilTableFactory::AppendLocalPointStencilTable( + *refiner, + consolidatedVaryingStencils, + consolidatedPatchTable->GetLocalPointStencilTable())) { + delete consolidatedVaryingStencils; + consolidatedVaryingStencils = varyingStencilsWithLocalPoints; + } + } + + // save tables to topology + _vertexStencils.reset(consolidatedVertexStencils); + _varyingStencils.reset(consolidatedVaryingStencils); + _patchTable.reset(consolidatedPatchTable); + } + } + + if (_geometryIndexMapping && _geometryIndexMapping->geometryCount() > 0) { + MProfilingScope subsubProfilingScope( + HdVP2RenderDelegate::sProfilerCategory, + MProfiler::kColorD_L2, + "MeshViewportCompute:updateIndexMapping"); + + // TODO: assumes quads + int indexStart = 0; + int vertexStart = 0; + for (int i = 0; i < _geometryIndexMapping->geometryCount(); i++) { + MRenderItem* sourceItem = _geometryIndexMapping->sourceRenderItem(i); + + // we can have a nullptr sourceMeshSharedData, but if we do all the sourceItems should + // have a nullptr + MSharedPtr sourceViewportComputeItem + = MSharedPtr::dynamic_pointer_cast<>( + sourceItem->viewportComputeItem()); + size_t sourcePtableSize + = sourceViewportComputeItem->_patchTable->GetPatchControlVerticesTable().size(); + size_t sourceBaseVertexCount + = sourceViewportComputeItem->_meshSharedData->_renderingTopology + .GetNumPoints(); // _geometryIndexMapping->vertexLength(i); + size_t sourceSmoothVertexCount + = sourceViewportComputeItem->_vertexStencils->GetNumStencils(); + _geometryIndexMapping->updateSource( + i, + indexStart, + sourcePtableSize * 1.5, + vertexStart, + sourceBaseVertexCount + sourceSmoothVertexCount); + indexStart += sourcePtableSize * 1.5; + // vertexStart += sourceBaseVertexCount + sourceSmoothVertexCount; multiDrawIndirect + // adds vertexStart to each vertexId for the draw. The index buffer I have is a fully + // consolidated index buffer, so I don't need to use that. + } + renderItem.setSourceIndexMapping(*_geometryIndexMapping.get()); + } + + MProfilingScope subsubProfilingScope( + HdVP2RenderDelegate::sProfilerCategory, + MProfiler::kColorD_L2, + "MeshViewportCompute:triangulateSmoothPatchTable"); + + // Inspired by HdSt_Osd3IndexComputation::Resolve() + // PxOsdOpenSubdivTokens->loop -> _patchTable is triangles + // PxOsdOpenSubdivTokens->catmullClark + _adaptive -> BSplinePatches + // PxOsdOpenSubdivTokens->catmullClark + !_adaptive -> quads + // HdSt draws with tessellation shaders and we do have that? try calling + OpenSubdiv::Far::Index const* firstIndex = nullptr; + size_t ptableSize = 0; + if (_patchTable) { + ptableSize = _patchTable->GetPatchControlVerticesTable().size(); + if (ptableSize > 0) + firstIndex = &_patchTable->GetPatchControlVerticesTable()[0]; + } + + int indexLength = 0; + + if (!_adaptive + && _meshSharedData->_renderingTopology.GetScheme() == PxOsdOpenSubdivTokens->catmullClark) { + // _patchTable is quads. Convert to triangles and make an index buffer we can draw + int patchSize + = _patchTable ? _patchTable->GetPatchArrayDescriptor(0).GetNumControlVertices() : 0; + TF_VERIFY(patchSize == 4); + VtArray indices(ptableSize); + memcpy(indices.data(), firstIndex, ptableSize * sizeof(int)); + + { + MProfilingScope subsubProfilingScope( + HdVP2RenderDelegate::sProfilerCategory, + MProfiler::kColorD_L1, + "MeshViewportCompute:createTriangleIndexBuffer"); + + MIndexBuffer* indexBuffer = fRenderGeom->indexBuffer(0); + if (!indexBuffer) { + indexBuffer = fRenderGeom->createIndexBuffer(MGeometry::kInt32); + } + // The new size of the index buffer needs to be 50% larger than the patch table size + // when the patch table is quads. + void* indexData = indexBuffer->acquire( + ptableSize * 1.5, + true); // we are not going to use any of the old indexing so this is write only + int* indexWriteLocation = (int*)indexData; + for (size_t faceStart = 0; faceStart < ptableSize; faceStart += 4) { + *(indexWriteLocation++) = indices[faceStart]; + *(indexWriteLocation++) = indices[faceStart + 1]; + *(indexWriteLocation++) = indices[faceStart + 2]; + + *(indexWriteLocation++) = indices[faceStart]; + *(indexWriteLocation++) = indices[faceStart + 2]; + *(indexWriteLocation++) = indices[faceStart + 3]; + } + indexBuffer->commit(indexData); + indexLength = ptableSize; + } + } else if (_meshSharedData->_renderingTopology.GetScheme() == PxOsdOpenSubdivTokens->loop) { + int patchSize + = _patchTable ? _patchTable->GetPatchArrayDescriptor(0).GetNumControlVertices() : 0; + TF_VERIFY(patchSize == 3); + MIndexBuffer* indexBuffer = fRenderGeom->indexBuffer(0); + void* indexData = indexBuffer->acquire(ptableSize, true); + memcpy(indexData, firstIndex, ptableSize * sizeof(int)); + indexBuffer->commit(indexData); + } else { + // I can't handle it. Need to use patch drawing with + // MRenderItem::setPrimitive(MGeometry::kPatch, stride) + // But I don't have a shader set up with Tessellation. + } +#endif +} + +void MeshViewportCompute::findVertexBuffers(MRenderItem& renderItem) +{ + if (_positionVertexBufferGPU) { + return; + } + + MProfilingScope subProfilingScope( + HdVP2RenderDelegate::sProfilerCategory, + MProfiler::kColorD_L2, + "MeshViewportCompute:findVertexBuffers"); + + for (int bufferIndex = 0; bufferIndex < fRenderGeom->vertexBufferCount(); bufferIndex++) { + MVertexBuffer* renderBuffer = fRenderGeom->vertexBuffer(bufferIndex); + TF_VERIFY(renderBuffer->resourceHandle()); + const MVertexBufferDescriptor& descriptor = renderBuffer->descriptor(); + + if (MGeometry::kPosition == descriptor.semantic()) { + MProfilingScope subsubProfilingScope( + HdVP2RenderDelegate::sProfilerCategory, + MProfiler::kColorD_L2, + "MeshViewportCompute:positionBufferResourceHandle"); + TF_VERIFY(renderBuffer->vertexCount() == _vertexCount); + _positionVertexBufferGPU = renderBuffer; + } else if (MGeometry::kNormal == descriptor.semantic()) { + MProfilingScope subsubProfilingScope( + HdVP2RenderDelegate::sProfilerCategory, + MProfiler::kColorD_L2, + "MeshViewportCompute:normalBufferResourceHandle"); + _normalVertexBufferGPU = renderBuffer; + } else if (MGeometry::kColor == descriptor.semantic()) { + _colorVertexBufferGPU = renderBuffer; + } else { + fprintf(stderr, "Unsupported buffer type.\n"); + continue; + } + } + + if (nullptr == _normalVertexBufferGPU) { + MProfilingScope subsubProfilingScope( + HdVP2RenderDelegate::sProfilerCategory, + MProfiler::kColorD_L1, + "MeshViewportCompute:createNormalBuffer"); + + const MHWRender::MVertexBufferDescriptor vbDesc( + "", MHWRender::MGeometry::kNormal, MHWRender::MGeometry::kFloat, 3); + + _normalVertexBufferGPU = fRenderGeom->createVertexBuffer(vbDesc); + } + + GLuint* normalBufferResourceHandle = (GLuint*)_normalVertexBufferGPU->resourceHandle(); + if (!normalBufferResourceHandle) { + // tell the buffer what size it is + void* normalsBufferData = _normalVertexBufferGPU->acquire(_vertexCount, true); + memset(normalsBufferData, 0, _vertexCount * sizeof(float) * 3); + _normalVertexBufferGPU->commit(normalsBufferData); + } +} + +void MeshViewportCompute::prepareAdjacencyBuffer() +{ +#if defined(HDVP2_OPENGL_NORMALS) + if (!_adjacencyBufferGPUDirty) + return; + _adjacencyBufferGPUDirty = false; + + MProfilingScope subProfilingScope( + HdVP2RenderDelegate::sProfilerCategory, + MProfiler::kColorD_L2, + "MeshViewportCompute:prepareAdjacencyBuffer"); + + // we compute the number of normals as required by the topology. + + // https://www.khronos.org/assets/uploads/developers/library/2014-siggraph-bof/KITE-BOF_Aug14.pdf + // https://github.com/PixarAnimationStudios/USD/blob/be1a80f8cb91133ac75e1fc2a2e1832cd10d91c8/pxr/imaging/hdSt/smoothNormals.cpp + + // We need additional padding in the header of adjacencyData because glsl compute seems to still + // execute all branch not taken code, and just not write anything. If we don't have padding then + // some gpu threads will see a valance in the thousands and take forever to run and/or crash + // accessing memory out of bounds. + + const int* adjacencyData = _adjacencyBufferCPU.get(); + int numVertex = adjacencyData[0] / 2; // two entries per vertex. + size_t localWorkSize = 256; + size_t paddingSize = (localWorkSize - numVertex % localWorkSize) * 2; + + size_t adjacencyBufferSize = _adjacencyBufferSize + paddingSize; + size_t vertexDataSize = (numVertex * 2); + int* vertexDataStart = const_cast(adjacencyData); + size_t vertexIdSize = _adjacencyBufferSize - vertexDataSize; + int* vertexIdStart = vertexDataStart + vertexDataSize; + + const MHWRender::MVertexBufferDescriptor intArrayDesc( + "", MHWRender::MGeometry::kColor, MHWRender::MGeometry::kInt32, 1); + _adjacencyBufferGPU.reset(new MHWRender::MVertexBuffer(intArrayDesc)); + void* bufferData = _adjacencyBufferGPU->acquire(adjacencyBufferSize, true); + + for (int i = 0; i < vertexDataSize; i += 2) { + vertexDataStart[i] += paddingSize; + } + + // copy the vertex data information into the new padded buffer + int* destination = ((int*)bufferData); + memcpy(bufferData, vertexDataStart, vertexDataSize * sizeof(int)); + // set the padding space to be zeros + destination = destination + vertexDataSize; + memset(destination, 0, paddingSize * sizeof(int)); + // copy the adjacency information for each vertex into the upper part of the buffer. + destination = destination + paddingSize; + memcpy(destination, vertexIdStart, vertexIdSize * sizeof(int)); + + // commit the adjacency information + _adjacencyBufferGPU->commit(bufferData); + + // prepare the remapping array from the regular topology to the render topology + _renderingToSceneFaceVtxIdsGPU.reset(new MHWRender::MVertexBuffer(intArrayDesc)); + bufferData = _renderingToSceneFaceVtxIdsGPU->acquire( + _meshSharedData->_renderingToSceneFaceVtxIds.size(), true); + memcpy( + bufferData, + _meshSharedData->_renderingToSceneFaceVtxIds.data(), + _meshSharedData->_renderingToSceneFaceVtxIds.size() * sizeof(int)); + _renderingToSceneFaceVtxIdsGPU->commit(bufferData); + + _sceneToRenderingFaceVtxIdsGPU.reset(new MHWRender::MVertexBuffer(intArrayDesc)); + bufferData = _sceneToRenderingFaceVtxIdsGPU->acquire( + _meshSharedData->_sceneToRenderingFaceVtxIds.size(), true); + memcpy( + bufferData, + _meshSharedData->_sceneToRenderingFaceVtxIds.data(), + _meshSharedData->_sceneToRenderingFaceVtxIds.size() * sizeof(int)); + _sceneToRenderingFaceVtxIdsGPU->commit(bufferData); +#endif +} + +void MeshViewportCompute::prepareUniformBufferForNormals() +{ + if (0 != _uboResourceHandle) + return; + + if (hasOpenGL()) { + glGenBuffers(1, &_uboResourceHandle); + glBindBuffer(GL_UNIFORM_BUFFER, _uboResourceHandle); + glBufferData(GL_UNIFORM_BUFFER, sizeof(unsigned int), &_vertexCount, GL_STATIC_DRAW); + glBindBuffer(GL_UNIFORM_BUFFER, 0); + } +} + +bool MeshViewportCompute::hasOpenGL() +{ + // test an arbitrary OpenGL function pointer and make sure it is not nullptr + return nullptr != glBindBufferBase; +} + +void MeshViewportCompute::initializeOpenGL() { glewInit(); } + +void MeshViewportCompute::compileNormalsProgram() +{ +#if defined(HDVP2_OPENGL_NORMALS) + MProfilingScope subProfilingScope( + HdVP2RenderDelegate::sProfilerCategory, + MProfiler::kColorD_L2, + "MeshViewportCompute:compileNormalsProgram"); + + std::string computeShaderSource = _GetResourcePath("computeNormals.glsl"); + std::ifstream glslFile(computeShaderSource.c_str()); + std::string glslString; + glslFile.seekg(0, std::ios::end); + glslString.reserve(glslFile.tellg()); + glslFile.seekg(0, std::ios::beg); + + glslString.assign((std::istreambuf_iterator(glslFile)), std::istreambuf_iterator()); + + if (hasOpenGL()) { + initializeOpenGL(); + } + TF_VERIFY(hasOpenGL()); + + _computeNormalsProgram = new PxrMayaGLSLProgram; + _computeNormalsProgram->CompileShader(GL_COMPUTE_SHADER, glslString); + _computeNormalsProgram->Link(); + _computeNormalsProgram->Validate(); + openGLErrorCheck(); +#endif +} + +void MeshViewportCompute::computeNormals() +{ +#if defined(HDVP2_OPENGL_NORMALS) + + if (!_normalVertexBufferGPUDirty) + return; + _normalVertexBufferGPUDirty = false; + + MProfilingScope subProfilingScope( + HdVP2RenderDelegate::sProfilerCategory, + MProfiler::kColorD_L2, + "MeshViewportCompute:computeNormals"); + GLuint* adjacencyBufferResourceHandle = (GLuint*)_adjacencyBufferGPU->resourceHandle(); + + std::call_once(_compileProgramOnce, MeshViewportCompute::compileNormalsProgram); + + GLuint programId = _computeNormalsProgram->GetProgramId(); + + // We already did another lambda task that did the commit for _positionsBuffer, so we should be + // able to get the resource handle. + GLuint* positionBufferResourceHandle = (GLuint*)_positionVertexBufferGPU->resourceHandle(); + + // normal buffer needs to be locked because we are modifying it. We don't want the CPU version + // and GPU version of the buffer to hold different data. Locking the buffer deletes the CPU + // version of the buffer. + _normalVertexBufferGPU->lockResourceHandle(); + GLuint* normalBufferResourceHandle = (GLuint*)_normalVertexBufferGPU->resourceHandle(); + + // remapping buffers + GLuint* renderingToSceneFaceVtxIdsResourceHandle + = (GLuint*)_renderingToSceneFaceVtxIdsGPU->resourceHandle(); + GLuint* sceneToRenderingFaceVtxIdsResourceHandle + = (GLuint*)_sceneToRenderingFaceVtxIdsGPU->resourceHandle(); + + if (hasOpenGL()) { + glBindBufferBase(GL_UNIFORM_BUFFER, 0, _uboResourceHandle); + openGLErrorCheck(); + + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, *positionBufferResourceHandle); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, *adjacencyBufferResourceHandle); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, *renderingToSceneFaceVtxIdsResourceHandle); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, *sceneToRenderingFaceVtxIdsResourceHandle); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, *normalBufferResourceHandle); + openGLErrorCheck(); + + size_t localWorkSize = 256; + size_t globalWorkSize = (localWorkSize - _vertexCount % localWorkSize) + _vertexCount; + size_t num_groups = globalWorkSize / localWorkSize; + + glUseProgram(programId); + glDispatchCompute(num_groups, 1, 1); + glUseProgram(0); + openGLErrorCheck(); + + glBindBufferBase(GL_UNIFORM_BUFFER, 0, 0); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, 0); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, 0); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, 0); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 4, 0); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 5, 0); + openGLErrorCheck(); + } + _normalVertexBufferGPU->unlockResourceHandle(); +#elif defined(HDVP2_OPENCL_NORMALS) + + // Make shared buffers + std::vector sharedBuffers; + cl_int err; + + { + MProfilingScope subProfilingScope( + HdVP2RenderDelegate::sProfilerCategory, + MProfiler::kColorD_L2, + "MeshViewportCompute:copyAdjacencyToOpenCL"); + for (unsigned int i = 0; i < consolidatedItems.size(); i++) { + _renderItemData& _renderItemData = _renderItemDatas[i]; + MRenderItem* renderItem = consolidatedItems[i].get(); + + { + _meshSharedData->_openCLMeshInfo._adjacencyBufferCL.reset(new MOpenCLBuffer( + MOpenCLInfo::getOpenCLContext(), + CL_MEM_READ_ONLY, // the READ and WRITE flags are from the point of view of an + // OpenCL Kernel. + _adjacencyBufferSize * sizeof(int), + (void*)_adjacencyBufferCPU, + &err)); + MOpenCLInfo::checkCLErrorStatus(err); + } + } + } + + { + MProfilingScope subProfilingScope( + HdVP2RenderDelegate::sProfilerCategory, + MProfiler::kColorD_L2, + "MeshViewportCompute:attachToGLBuffers"); + for (unsigned int i = 0; i < consolidatedItems.size(); i++) { + _renderItemData& _renderItemData = _renderItemDatas[i]; + MRenderItem* renderItem = consolidatedItems[i].get(); + { + + _meshSharedData->_openCLMeshInfo._positionsBufferShared.attach(clCreateFromGLBuffer( + MOpenCLInfo::getOpenCLContext(), + CL_MEM_READ_ONLY, + *(cl_GLuint*)_positionVertexBufferGPU->resourceHandle(), + &err)); + MOpenCLInfo::checkCLErrorStatus(err); + sharedBuffers.push_back( + _meshSharedData->_openCLMeshInfo._positionsBufferShared.get()); + + _meshSharedData->_openCLMeshInfo._normalsBufferShared.attach(clCreateFromGLBuffer( + MOpenCLInfo::getOpenCLContext(), + CL_MEM_WRITE_ONLY, + *(cl_GLuint*)_normalVertexBufferGPU->resourceHandle(), + &err)); // from the point of view of OpenCL!!! + MOpenCLInfo::checkCLErrorStatus(err); + sharedBuffers.push_back( + _meshSharedData->_openCLMeshInfo._normalsBufferShared.get()); + } + } + } + + // acquire the shared buffers + MAutoCLEvent acquireEvent; + { + MProfilingScope subProfilingScope( + HdVP2RenderDelegate::sProfilerCategory, + MProfiler::kColorD_L2, + "MeshViewportCompute:acquireSharedBuffers"); +#ifdef MAYA_BLOCKING_OPENCL + err = clEnqueueAcquire3DObjects_blocking_api( + MOpenCLInfo::getMayaDefaultOpenCLCommandQueue(), + sharedBuffers.size(), + sharedBuffers.data(), + 0, + nullptr, + acquireEvent.getReferenceForAssignment()); +#else + err = clEnqueueAcquireGLObjects( + MOpenCLInfo::getMayaDefaultOpenCLCommandQueue(), + sharedBuffers.size(), + sharedBuffers.data(), + 0, + nullptr, + acquireEvent.getReferenceForAssignment()); +#endif + } + MOpenCLInfo::checkCLErrorStatus(err); + + // get the kernel + MString kernelFile("C:/dev/usd/ecg-maya-usd/maya-usd/lib/mayaUsd/render/vp2RenderDelegate/" + "mesh.cl"); // needs to get installed somewhere + MString kernelName("computeNormals"); + MAutoCLKernel computeNormalsKernel(MOpenCLInfo::getOpenCLKernel(kernelFile, kernelName)); + + // compute the work group size and global work size + size_t workGroupSize; + size_t retSize; + err = clGetKernelWorkGroupInfo( + computeNormalsKernel.get(), + MOpenCLInfo::getOpenCLDeviceId(), + CL_KERNEL_WORK_GROUP_SIZE, + sizeof(size_t), + &workGroupSize, + &retSize); + MOpenCLInfo::checkCLErrorStatus(err); + size_t localWorkSize = 256; + if (retSize > 0) { + localWorkSize = workGroupSize; + } + + cl_event* events = new cl_event[consolidatedItems.size()]; + { + MProfilingScope subProfilingScope( + HdVP2RenderDelegate::sProfilerCategory, + MProfiler::kColorD_L2, + "MeshViewportCompute:enqueueKernels"); + for (unsigned int i = 0; i < consolidatedItems.size(); i++) { + _renderItemData& _renderItemData = _renderItemDatas[i]; + MRenderItem* renderItem = consolidatedItems[i].get(); + + size_t remain = _vertexCount % localWorkSize; + size_t globalWorkSize = _vertexCount; + if (remain) { + globalWorkSize = _vertexCount + localWorkSize - remain; + } + + // set up the compute kernel! This can be mostly copied from + // CommitOpenCLPositions::operator()(), just integrate the new data handles we got + // above. + + // set kernel args + err = clSetKernelArg( + computeNormalsKernel.get(), + 0, + sizeof(cl_mem), + (const void*) + _meshSharedData->_openCLMeshInfo._positionsBufferShared.getReadOnlyRef()); + MOpenCLInfo::checkCLErrorStatus(err); + err = clSetKernelArg( + computeNormalsKernel.get(), 1, sizeof(cl_uint), (const void*)&_vertexCount); + MOpenCLInfo::checkCLErrorStatus(err); + err = clSetKernelArg( + computeNormalsKernel.get(), + 2, + sizeof(cl_mem), + (const void*)_meshSharedData->_openCLMeshInfo._adjacencyBufferCL->buffer() + .getReadOnlyRef()); + MOpenCLInfo::checkCLErrorStatus(err); + err = clSetKernelArg( + computeNormalsKernel.get(), + 3, + sizeof(cl_mem), + (const void*) + _meshSharedData->_openCLMeshInfo._normalsBufferShared.getReadOnlyRef()); + MOpenCLInfo::checkCLErrorStatus(err); + + // run kernel + err = clEnqueueNDRangeKernel( + MOpenCLInfo::getMayaDefaultOpenCLCommandQueue(), + computeNormalsKernel.get(), + 1, + nullptr, + &globalWorkSize, + &localWorkSize, + 1, + acquireEvent.getReadOnlyRef(), + _meshSharedData->_openCLMeshInfo._normalsBufferReady.getReferenceForAssignment()); + MOpenCLInfo::checkCLErrorStatus(err); + + // build a list of all the kernel complete events + events[i] = _meshSharedData->_openCLMeshInfo._normalsBufferReady.get(); + } + } + + // release the shared buffers + MAutoCLEvent releaseEvent; + { + MProfilingScope subProfilingScope( + HdVP2RenderDelegate::sProfilerCategory, + MProfiler::kColorD_L2, + "MeshViewportCompute:releaseSharedBuffers"); +#ifdef MAYA_BLOCKING_OPENCL + clEnqueueRelease3DObjects_blocking_api( + MOpenCLInfo::getMayaDefaultOpenCLCommandQueue(), + sharedBuffers.size(), + sharedBuffers.data(), + 1, + _meshSharedData->_openCLMeshInfo._normalsBufferReady.getReadOnlyRef(), + releaseEvent.getReferenceForAssignment()); +#else + clEnqueueReleaseGLObjects( + MOpenCLInfo::getMayaDefaultOpenCLCommandQueue(), + sharedBuffers.size(), + sharedBuffers.data(), + consolidatedItems.size(), + events, + releaseEvent.getReferenceForAssignment()); +#endif + } + { + MProfilingScope subProfilingScope( + HdVP2RenderDelegate::sProfilerCategory, + MProfiler::kColorD_L3, + "MeshViewportCompute:syncOpenCL"); + DoGLWaitSync(releaseEvent.get()); + } + delete[] events; + + MProfilingScope subProfilingScope( + HdVP2RenderDelegate::sProfilerCategory, + MProfiler::kColorD_L2, + "MeshViewportCompute:releaseOpenCLBuffers"); + + for (unsigned int i = 0; i < consolidatedItems.size(); i++) { + _renderItemData& _renderItemData = _renderItemDatas[i]; + MRenderItem* renderItem = consolidatedItems[i].get(); + + _meshSharedData->_openCLMeshInfo._positionsBufferShared.reset(); + _meshSharedData->_openCLMeshInfo._normalsBufferShared.reset(); + _meshSharedData->_openCLMeshInfo._adjacencyBufferCL->reset(); + _meshSharedData->_openCLMeshInfo._normalsBufferReady.reset(); + } + + // clFinish(MOpenCLInfo::getMayaDefaultOpenCLCommandQueue()); + +#elif defined(DO_CPU_NORMALS) + +#endif +} + +void MeshViewportCompute::computeOSD() +{ +#if defined(DO_CPU_OSD) || defined(DO_OPENGL_OSD) + MProfilingScope subProfilingScope( + HdVP2RenderDelegate::sProfilerCategory, MProfiler::kColorD_L2, "MeshViewportCompute:doOSD"); + // Inspired by HdSt_Osd3TopologyComputation::Resolve() + + // refine + // and + // create stencil/patch table + OpenSubdiv::Far::StencilTable const* consolidatedVertexStencils = _vertexStencils.get(); + OpenSubdiv::Far::StencilTable const* consolidatedVaryingStencils = _varyingStencils.get(); + OpenSubdiv::Far::PatchTable const* consolidatedPatchTable = _patchTable.get(); +#endif +#if defined(DO_CPU_OSD) + + class OsdCPUBuffer + { + public: + OsdCPUBuffer(float* buffer) + : _buffer(buffer) + { + } + + float* BindCpuBuffer() { return _buffer; } + + private: + float* _buffer { nullptr }; + }; + + // smooth the normals + const MVertexBufferDescriptor& normalsDescriptor = _normalVertexBufferGPU->descriptor(); + int dimension = normalsDescriptor.dimension(); + void* normalsBufferData = _normalVertexBufferGPU->acquire( + _vertexCount + consolidatedVertexStencils->GetNumStencils(), false); + OpenSubdiv::Osd::BufferDescriptor normalSrcDesc(0, dimension, dimension); + OpenSubdiv::Osd::BufferDescriptor normalDstDesc(_vertexCount * dimension, dimension, dimension); + OsdCPUBuffer osdNormalVertexBuffer((float*)normalsBufferData); + OpenSubdiv::Osd::CpuEvaluator:: + EvalStencils( + &osdNormalVertexBuffer, + normalSrcDesc, + &osdNormalVertexBuffer, + normalDstDesc, + consolidatedVertexStencils); + _normalVertexBufferGPU->commit(normalsBufferData); + + // smooth the positions + const MVertexBufferDescriptor& positionsDescriptor = _positionVertexBufferGPU->descriptor(); + dimension = positionsDescriptor.dimension(); + void* positionsBufferData = _positionVertexBufferGPU->acquire( + _vertexCount + consolidatedVertexStencils->GetNumStencils(), false); + OpenSubdiv::Osd::BufferDescriptor positionSrcDesc(0, dimension, dimension); + OpenSubdiv::Osd::BufferDescriptor positionDstDesc( + _vertexCount * dimension, dimension, dimension); + OsdCPUBuffer osdPositionVertexBuffer((float*)positionsBufferData); + OpenSubdiv::Osd::CpuEvaluator:: + EvalStencils( + &osdPositionVertexBuffer, + positionSrcDesc, + &osdPositionVertexBuffer, + positionDstDesc, + consolidatedVertexStencils); + _positionVertexBufferGPU->commit(positionsBufferData); +#elif defined(DO_OPENGL_OSD) + + class OsdGLBuffer + { + public: + OsdGLBuffer(GLuint resourceId, size_t dimension) + : _dimension(dimension) + , _resourceId(resourceId) + { + } + + size_t GetNumElements() const { return _dimension; } + GLuint BindVBO() { return _resourceId; } + + private: + size_t _dimension { 0 }; + GLuint _resourceId; + }; + + // Smooth the positions + // ensure the position buffer is large enough to hold the smoothed result + const MVertexBufferDescriptor& positionsDescriptor = _positionVertexBufferGPU->descriptor(); + int dimension = positionsDescriptor.dimension(); + void* positionsBufferData = _positionVertexBufferGPU->acquire( + _vertexCount + consolidatedVertexStencils->GetNumStencils(), false); + _positionVertexBufferGPU->commit(positionsBufferData); + + // BufferDescriptor is meant to describe an interleaved buffer, but we are not interleaved, so + // it looks dumb + OpenSubdiv::Osd::BufferDescriptor positionSrcDesc(0, dimension, dimension); + OpenSubdiv::Osd::BufferDescriptor positionDstDesc( + _vertexCount * dimension, dimension, dimension); + + static OpenSubdiv::Osd::EvaluatorCacheT evaluatorCache; + OpenSubdiv::Osd::GLComputeEvaluator const* positionInstance + = OpenSubdiv::Osd::GetEvaluator( + &evaluatorCache, positionSrcDesc, positionDstDesc, (void*)NULL); + + OsdGLBuffer osdPositionBuffer( + *((GLuint*)(_positionVertexBufferGPU->resourceHandle())), dimension); + OpenSubdiv::Osd::GLStencilTableSSBO* gpuStencilTable + = OpenSubdiv::Osd::GLStencilTableSSBO::Create(consolidatedVertexStencils, nullptr); + positionInstance->EvalStencils( + &osdPositionBuffer, positionSrcDesc, &osdPositionBuffer, positionDstDesc, gpuStencilTable); + + // ensure the normal buffer is large enough to hold the smoothed result + const MVertexBufferDescriptor& normalsDescriptor = _normalVertexBufferGPU->descriptor(); + dimension = normalsDescriptor.dimension(); + void* normalsBufferData = _normalVertexBufferGPU->acquire( + _vertexCount + consolidatedVertexStencils->GetNumStencils(), false); + _normalVertexBufferGPU->commit(normalsBufferData); + + // BufferDescriptor is meant to describe an interleaved buffer, but we are not interleaved, so + // it looks dumb + OpenSubdiv::Osd::BufferDescriptor normalSrcDesc(0, dimension, dimension); + OpenSubdiv::Osd::BufferDescriptor normalDstDesc(_vertexCount * dimension, dimension, dimension); + + OpenSubdiv::Osd::GLComputeEvaluator const* normalInstance + = OpenSubdiv::Osd::GetEvaluator( + &evaluatorCache, normalSrcDesc, normalDstDesc, (void*)NULL); + + OsdGLBuffer osdNormalBuffer(*((GLuint*)(_normalVertexBufferGPU->resourceHandle())), dimension); + normalInstance->EvalStencils( + &osdNormalBuffer, normalSrcDesc, &osdNormalBuffer, normalDstDesc, gpuStencilTable); + + if (_colorVertexBufferGPU) { + // ensure the color buffer is large enough to hold the smoothed result + const MVertexBufferDescriptor& colorsDescriptor = _colorVertexBufferGPU->descriptor(); + dimension = colorsDescriptor.dimension(); + void* ColorsBufferData = _colorVertexBufferGPU->acquire( + _vertexCount + consolidatedVertexStencils->GetNumStencils(), false); + _colorVertexBufferGPU->commit(ColorsBufferData); + + // BufferDescriptor is meant to describe an interleaved buffer, but we are not interleaved, + // so it looks dumb + OpenSubdiv::Osd::BufferDescriptor ColorSrcDesc(0, dimension, dimension); + OpenSubdiv::Osd::BufferDescriptor ColorDstDesc( + _vertexCount * dimension, dimension, dimension); + + OpenSubdiv::Osd::GLComputeEvaluator const* colorInstance + = OpenSubdiv::Osd::GetEvaluator( + &evaluatorCache, ColorSrcDesc, ColorDstDesc, (void*)NULL); + + OsdGLBuffer osdColorBuffer( + *((GLuint*)(_colorVertexBufferGPU->resourceHandle())), dimension); + colorInstance->EvalStencils( + &osdColorBuffer, ColorSrcDesc, &osdColorBuffer, ColorDstDesc, gpuStencilTable); + } +#endif +} + +void MeshViewportCompute::setClean() +{ + // when we perform consolidated compute the dirty flags for the source items remain dirty. + // Each individual source item isn't capable of drawing unconsolidated, so the flags must + // remain dirty to guard against potentially drawing unconsolidated & requiring the compute + // to execute. + + _topologyDirty = false; + _adjacencyBufferGPUDirty = false; + _normalVertexBufferGPUDirty = false; + _executed = true; +} + +bool MeshViewportCompute::execute( + const MPxViewportComputeItem::Actions& availableActions, + MRenderItem& renderItem) +{ + if (!_normalVertexBufferGPUDirty) + return true; + + if (_adjacencyTaskInProgress) + return false; + + MProfilingScope mainProfilingScope( + HdVP2RenderDelegate::sProfilerCategory, + MProfiler::kColorD_L1, + "MeshViewportCompute::execute"); + + findConsolidationMapping(renderItem); + + if (_topologyDirty || _adjacencyBufferSize == 0) { + TF_VERIFY(!_adjacencyTaskInProgress); + _adjacencyTaskInProgress = true; + EnqueueLambdaTask([this] { + this->createConsolidatedTopology(getSceneTopology); + this->createConsolidatedTopology(getRenderingTopology); + this->createConsolidatedAdjacency(); + this->_adjacencyTaskInProgress = false; + }); + return false; + } + + findRenderGeometry(renderItem); + + createConsolidatedOSDTables(renderItem); // disabled by preprocessor macros + + findVertexBuffers(renderItem); + + prepareAdjacencyBuffer(); + + prepareUniformBufferForNormals(); + + computeNormals(); + + computeOSD(); // disabled by preprocessor macros + + setClean(); + + return true; +} + +bool MeshViewportCompute::canConsolidate(const MPxViewportComputeItem& other) const +{ + const MeshViewportCompute* otherMeshViewportCompute + = dynamic_cast(&other); + if (nullptr == otherMeshViewportCompute) + return false; + + // If the compute has executed then the data to be consolidated will already + // be smoothed. Smoothed items can only consolidate with other smoothed items. + return hasExecuted() == otherMeshViewportCompute->hasExecuted() +#if defined(DO_CPU_OSD) || defined(DO_OPENGL_OSD) + && _adaptive == otherMeshViewportCompute->_adaptive + && _level == otherMeshViewportCompute->_level + && _meshSharedData->_renderingTopology.GetScheme() + == otherMeshViewportCompute->_meshSharedData->_renderingTopology.GetScheme() +#endif + ; +} + +MSharedPtr MeshViewportCompute::cloneForConsolidation() const +{ + MSharedPtr clone + = MSharedPtr::make<>(std::make_shared(), nullptr); + return clone; +} + +PXR_NAMESPACE_CLOSE_SCOPE + +#endif diff --git a/lib/mayaUsd/render/vp2RenderDelegate/meshViewportCompute.h b/lib/mayaUsd/render/vp2RenderDelegate/meshViewportCompute.h new file mode 100644 index 0000000000..47119ca98b --- /dev/null +++ b/lib/mayaUsd/render/vp2RenderDelegate/meshViewportCompute.h @@ -0,0 +1,261 @@ +// +// Copyright 2020 Autodesk +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#ifndef HD_VP2_MESHVIEWPORTCOMPUTE +#define HD_VP2_MESHVIEWPORTCOMPUTE + +#include // for MAYA_API_VERSION + +/* + GPU Compute Prototype + + The GPU evaluation prototype is two separate parts, the normal calculation code and the OSD + code. + + The normal calculation code is enabled by setting HDVP2_USE_GPU_NORMAL_COMPUTATION=1 at runtime. + The normal calculation code is close to being stable enough for general use, but hasn't had + enough polish to enable by default. + + The OSD code requires the normal calculation code to be enabled to use. OSD is enabled + by compiling with HDVP2_ENABLE_GPU_OSD. The OSD code is much less stable then the normals + calculation code and comes with a number of huge + + OSD Limitations: + * No OSD adaptive support + * scenes with animation behave poorly + * selection in the viewport is very slow + * toggling VP2 consolidation world off and on will cause crashes + * some objects draw with incorrect indexing +*/ + +#if MAYA_API_VERSION >= 20210000 \ + && !defined(OSMac_) // OSX doesn't have OpenGL 4.3 support necessary for compute +#define HDVP2_ENABLE_GPU_COMPUTE +#endif + +#ifdef HDVP2_ENABLE_GPU_COMPUTE +/* + GPU OSD computation implementation is experimental. +*/ +//#define HDVP2_ENABLE_GPU_OSD +#ifdef HDVP2_ENABLE_GPU_OSD +#define DO_OPENGL_OSD +//#define DO_CPU_OSD +#endif + +#include +#include + +#include +#include + +#ifdef HDVP2_ENABLE_GPU_OSD +#include +#include +#endif + +#include + +#include + +#define HDVP2_OPENGL_NORMALS +/* + OpenCL Normals calculation is experimental +*/ +//#define HDVP2_OPENCL_NORMALS +#ifdef HDVP2_OPENCL_NORMALS +#include +#endif +#ifdef HDVP2_OPENGL_NORMALS +//#define DO_OPENGL_ERROR_CHECK +// clang-format wants to re-order these two includes but they must be done in this order or +// the code will not compile. +// clang-format off +#include "pxr/imaging/glf/glew.h" // needs to be included before anything else includes gl.h +#include "../px_vp20/glslProgram.h" // this includes gl.h and not glew. +//clang-format on +#endif + +#include + +#ifdef HDVP2_OPENCL_NORMALS +#include +#include +#endif + +#include +#include +#include +#include + +#ifdef HDVP2_ENABLE_GPU_OSD +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#endif + +#include + +#include +#include +#include + +PXR_NAMESPACE_OPEN_SCOPE + +struct HdVP2MeshSharedData; +class HdVP2DrawItem; +class PxrMayaGLSLProgram; + +/*! \brief HdVP2Mesh-specific compute class for evaluating geometry streams and OSD + \class MeshViewportCompute + + A mesh can have shader stream requires (such as normals, tangents etc) and OSD + requirements. This class executes GPU compute kernels to fulfill the geometry + requirements of an HdVP2Mesh. + + A key performance feature of MeshViewportCompute is MRenderItems from different + HdVP2Mesh objects which have the same compute requirements can be consolidated + together in consolidated world and have their compute executed in a single + consolidated compute kernel, rather than issuing a compute kernel per unconsolidated + render item. +*/ +class MeshViewportCompute : public MPxViewportComputeItem +{ +private: + std::shared_ptr _meshSharedData; + const void* _drawItem { nullptr }; // only set for a consolidation source, never dereferenced + bool _executed { false }; // Has this compute been executed + bool _sourcesExecuted { + false + }; // Have the source compute's executed. only valid for a consolidated compute + + MSharedPtr _consolidatedCompute; + + std::unique_ptr _geometryIndexMapping; + unsigned int _vertexCount { 0 }; + GLuint _uboResourceHandle { 0 }; + + // adjacency information for normals + size_t _adjacencyBufferSize { 0 }; + std::unique_ptr _adjacencyBufferCPU; + std::unique_ptr _adjacencyBufferGPU; + std::unique_ptr _renderingToSceneFaceVtxIdsGPU; + std::unique_ptr _sceneToRenderingFaceVtxIdsGPU; + + // Geometry information + MGeometry* fRenderGeom { nullptr }; + // Buffers in MGeometry. + MVertexBuffer* _positionVertexBufferGPU { nullptr }; // not owned by *this, owned by fRenderGeom + MVertexBuffer* _normalVertexBufferGPU { nullptr }; // not owned by *this, owned by fRenderGeom + MVertexBuffer* _colorVertexBufferGPU { nullptr }; // not owned by *this, owned by fRenderGeom + + bool _adjacencyTaskInProgress { false };z + bool _topologyDirty { true }; // sourceMeshSharedData->_renderingTopology has changed + bool _adjacencyBufferGPUDirty { true }; //_adjacencyBufferGPU is dirty + bool _normalVertexBufferGPUDirty { true }; //_normalVertexBufferGPU is dirty + +#if defined(DO_CPU_OSD) || defined(DO_OPENGL_OSD) + // OSD information + std::unique_ptr _vertexStencils; + std::unique_ptr _varyingStencils; + std::unique_ptr _patchTable; + bool _adaptive { false }; + int _level { 1 }; +#endif + +#if defined(HDVP2_OPENGL_NORMALS) + static std::once_flag _compileProgramOnce; + static PxrMayaGLSLProgram* _computeNormalsProgram; +#endif + +#if defined(HDVP2_OPENCL_NORMALS) + //!< pure OpenCL version of the positions buffer + std::unique_ptr _positionsBufferCL; + + //!< A shared CL-GL version of _positionsBuffer + MAutoCLMem _positionsBufferShared; + + // data to compute smooth normals + std::unique_ptr _adjacencyBufferCL; + MAutoCLMem _normalsBufferShared; + MAutoCLEvent _normalsBufferReady; +#endif + + static void openGLErrorCheck(); + static bool hasOpenGL(); + static void initializeOpenGL(); + bool hasExecuted() const; + void reset(); + void findConsolidationMapping(MRenderItem& renderItem); + template + void createConsolidatedTopology(TopologyAccessor getTopology); + void createConsolidatedAdjacency(); + void findRenderGeometry(MRenderItem& renderItem); + void createConsolidatedOSDTables(MRenderItem& renderItem); + void findVertexBuffers(MRenderItem& renderItem); + void prepareAdjacencyBuffer(); + void prepareUniformBufferForNormals(); + static void compileNormalsProgram(); + void computeNormals(); + void computeOSD(); + void setClean(); + +public: + MeshViewportCompute(std::shared_ptr meshSharedData, const void* drawItem) + : MPxViewportComputeItem(false) + , _meshSharedData(meshSharedData) + , _drawItem(drawItem) + { + setRequiredAction(MPxViewportComputeItem::kAccessVirtualDevice, true); + setRequiredAction(MPxViewportComputeItem::kAccessConsolidation, true); + setRequiredAction(MPxViewportComputeItem::kModifyVertexBufferData, true); +#if defined(DO_CPU_OSD) || defined(DO_OPENGL_OSD) + setRequiredAction(MPxViewportComputeItem::kModifyVertexBufferSize, true); + setRequiredAction(MPxViewportComputeItem::kModifyConsolidation, true); +#endif + } + + virtual ~MeshViewportCompute() + { + if (0 != _uboResourceHandle) + glDeleteBuffers(1, &_uboResourceHandle); + } + + bool execute(const MPxViewportComputeItem::Actions& availableActions, MRenderItem& renderItem) + override; + bool canConsolidate(const MPxViewportComputeItem& other) const override; + MSharedPtr cloneForConsolidation() const override; + bool verifyDrawItem(const HdVP2DrawItem& drawItem) const; + + void setTopologyDirty(); + void setAdjacencyBufferGPUDirty(); + void setNormalVertexBufferGPUDirty(); +}; + +PXR_NAMESPACE_CLOSE_SCOPE + +#endif + +#endif From 65b16345b23f05161d69af8b601d447d2d80cbd8 Mon Sep 17 00:00:00 2001 From: krickw Date: Fri, 4 Dec 2020 12:48:31 -0500 Subject: [PATCH 2/5] Build fixes for versions of Maya where the MAYA_API_VERSION is 2020 or less. --- lib/mayaUsd/render/vp2RenderDelegate/mesh.cpp | 1 + lib/mayaUsd/render/vp2RenderDelegate/mesh.h | 2 ++ lib/mayaUsd/render/vp2RenderDelegate/meshViewportCompute.h | 3 +-- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/mayaUsd/render/vp2RenderDelegate/mesh.cpp b/lib/mayaUsd/render/vp2RenderDelegate/mesh.cpp index 32a4db7cec..ee38f51c8f 100644 --- a/lib/mayaUsd/render/vp2RenderDelegate/mesh.cpp +++ b/lib/mayaUsd/render/vp2RenderDelegate/mesh.cpp @@ -27,6 +27,7 @@ #include #include +#include #include #include #include diff --git a/lib/mayaUsd/render/vp2RenderDelegate/mesh.h b/lib/mayaUsd/render/vp2RenderDelegate/mesh.h index 9249a64156..4003a35a20 100644 --- a/lib/mayaUsd/render/vp2RenderDelegate/mesh.h +++ b/lib/mayaUsd/render/vp2RenderDelegate/mesh.h @@ -18,6 +18,8 @@ #include "meshViewportCompute.h" +#include + #include #include diff --git a/lib/mayaUsd/render/vp2RenderDelegate/meshViewportCompute.h b/lib/mayaUsd/render/vp2RenderDelegate/meshViewportCompute.h index 47119ca98b..8b8575b7d8 100644 --- a/lib/mayaUsd/render/vp2RenderDelegate/meshViewportCompute.h +++ b/lib/mayaUsd/render/vp2RenderDelegate/meshViewportCompute.h @@ -98,7 +98,6 @@ #include #include #include -#include #ifdef HDVP2_ENABLE_GPU_OSD #include @@ -171,7 +170,7 @@ class MeshViewportCompute : public MPxViewportComputeItem MVertexBuffer* _normalVertexBufferGPU { nullptr }; // not owned by *this, owned by fRenderGeom MVertexBuffer* _colorVertexBufferGPU { nullptr }; // not owned by *this, owned by fRenderGeom - bool _adjacencyTaskInProgress { false };z + bool _adjacencyTaskInProgress { false }; bool _topologyDirty { true }; // sourceMeshSharedData->_renderingTopology has changed bool _adjacencyBufferGPUDirty { true }; //_adjacencyBufferGPU is dirty bool _normalVertexBufferGPUDirty { true }; //_normalVertexBufferGPU is dirty From c970e7b811f1010216f7124800880d0baed8b3d4 Mon Sep 17 00:00:00 2001 From: krickw Date: Fri, 4 Dec 2020 14:14:05 -0500 Subject: [PATCH 3/5] Fix warnings which are errors on Linux and OSX. --- lib/mayaUsd/render/vp2RenderDelegate/mesh.cpp | 9 +++------ lib/mayaUsd/render/vp2RenderDelegate/mesh.h | 7 ------- .../vp2RenderDelegate/meshViewportCompute.cpp | 18 +++++++++--------- 3 files changed, 12 insertions(+), 22 deletions(-) diff --git a/lib/mayaUsd/render/vp2RenderDelegate/mesh.cpp b/lib/mayaUsd/render/vp2RenderDelegate/mesh.cpp index ee38f51c8f..35e9c590d2 100644 --- a/lib/mayaUsd/render/vp2RenderDelegate/mesh.cpp +++ b/lib/mayaUsd/render/vp2RenderDelegate/mesh.cpp @@ -906,8 +906,7 @@ void HdVP2Mesh::_UpdateDrawItem( #ifdef HDVP2_ENABLE_GPU_OSD const bool isLineItem = (renderItem->primitive() == MHWRender::MGeometry::kLines); // when we do OSD we don't bother creating indexing until after we have a smooth mesh - const bool requiresIndexUpdate - = !isBBoxItem && !isPointSnappingItem && (!_gpuOSDEnabled || isLineItem); + const bool requiresIndexUpdate = !isBBoxItem && !isPointSnappingItem && isLineItem; #else const bool requiresIndexUpdate = !isBBoxItem && !isPointSnappingItem; #endif @@ -934,7 +933,7 @@ void HdVP2Mesh::_UpdateDrawItem( numIndex * sizeof(int)); #ifdef HDVP2_ENABLE_GPU_COMPUTE - if (requireSmoothNormals && (_gpuNormalsEnabled || _gpuOSDEnabled)) { + if (requireSmoothNormals && _gpuNormalsEnabled) { // these function only do something if HDVP2_ENABLE_GPU_COMPUTE or // HDVP2_ENABLE_GPU_OSD is defined _CreateViewportCompute(*drawItem); @@ -976,7 +975,7 @@ void HdVP2Mesh::_UpdateDrawItem( prepareNormals = ((itemDirtyBits & HdChangeTracker::DirtyNormals) != 0); } else if (requireSmoothNormals && (itemDirtyBits & DirtySmoothNormals)) { #ifdef HDVP2_ENABLE_GPU_COMPUTE - if (_gpuNormalsEnabled || _gpuOSDEnabled) { + if (_gpuNormalsEnabled) { if (!_meshSharedData->_viewportCompute) { _CreateViewportCompute(*drawItem); #ifdef HDVP2_ENABLE_GPU_OSD @@ -1717,8 +1716,6 @@ void HdVP2Mesh::_CreateViewportCompute(const HdVP2DrawItem& drawItem) void HdVP2Mesh::_CreateOSDTables() { #if defined(DO_CPU_OSD) || defined(DO_OPENGL_OSD) - if (!_gpuOSDEnabled) - return; assert(_meshSharedData->_viewportCompute); MProfilingScope subProfilingScope( diff --git a/lib/mayaUsd/render/vp2RenderDelegate/mesh.h b/lib/mayaUsd/render/vp2RenderDelegate/mesh.h index 4003a35a20..d7196bcbcf 100644 --- a/lib/mayaUsd/render/vp2RenderDelegate/mesh.h +++ b/lib/mayaUsd/render/vp2RenderDelegate/mesh.h @@ -185,13 +185,6 @@ class HdVP2Mesh final : public HdMesh //! makes the expressions using these variables much simpler bool _gpuNormalsEnabled { true }; //!< Use GPU Compute for normal calculation, only used when //!< HDVP2_ENABLE_GPU_COMPUTE is defined - bool _gpuOSDEnabled { -#ifdef HDVP2_ENABLE_GPU_OSD - true -#else - false -#endif - }; //!< Use GPU Compute for OSD, only used when HDVP2_ENABLE_GPU_OSD is defined static int _gpuNormalsComputeThreshold; }; diff --git a/lib/mayaUsd/render/vp2RenderDelegate/meshViewportCompute.cpp b/lib/mayaUsd/render/vp2RenderDelegate/meshViewportCompute.cpp index 5cf0b049c5..cce9ee5eaf 100644 --- a/lib/mayaUsd/render/vp2RenderDelegate/meshViewportCompute.cpp +++ b/lib/mayaUsd/render/vp2RenderDelegate/meshViewportCompute.cpp @@ -264,7 +264,9 @@ void MeshViewportCompute::createConsolidatedTopology(TopologyAccessor getTopolog for (int sourceIndex = 0; sourceIndex < _geometryIndexMapping->geometryCount(); sourceIndex++) { MRenderItem* sourceItem = _geometryIndexMapping->sourceRenderItem(sourceIndex); - int consolidatedBufferVertexOffset = _geometryIndexMapping->vertexStart(sourceIndex); + int vertexStart = _geometryIndexMapping->vertexStart(sourceIndex); + TF_VERIFY(vertexStart >= 0); + size_t consolidatedBufferVertexOffset = (size_t)vertexStart; MSharedPtr sourceViewportComputeItem = MSharedPtr::dynamic_pointer_cast<>( sourceItem->viewportComputeItem()); @@ -279,24 +281,24 @@ void MeshViewportCompute::createConsolidatedTopology(TopologyAccessor getTopolog TF_VERIFY(sourceTopology.GetRefineLevel() == refineLevel); VtIntArray const& sourceFaceVertexCounts = sourceTopology.GetFaceVertexCounts(); - for (int faceId = 0; faceId < sourceFaceVertexCounts.size(); faceId++) { + for (size_t faceId = 0; faceId < sourceFaceVertexCounts.size(); faceId++) { faceVertexCounts.push_back(sourceFaceVertexCounts[faceId]); } VtIntArray const& sourceFaceVertexIndices = sourceTopology.GetFaceVertexIndices(); - for (int faceVertexId = 0; faceVertexId < sourceFaceVertexIndices.size(); + for (size_t faceVertexId = 0; faceVertexId < sourceFaceVertexIndices.size(); faceVertexId++) { faceVertexIndices.push_back( sourceFaceVertexIndices[faceVertexId] + consolidatedBufferVertexOffset); } VtIntArray const& sourceHoleIndices = sourceTopology.GetHoleIndices(); - for (int faceId = 0; faceId < sourceHoleIndices.size(); faceId++) { + for (size_t faceId = 0; faceId < sourceHoleIndices.size(); faceId++) { holeIndices.push_back( sourceHoleIndices[faceId] + consolidatedBufferVertexOffset); // untested? } - for (int idx = 0; idx < sourceMeshSharedData->_renderingToSceneFaceVtxIds.size(); + for (size_t idx = 0; idx < sourceMeshSharedData->_renderingToSceneFaceVtxIds.size(); idx++) { _meshSharedData->_renderingToSceneFaceVtxIds.push_back( sourceMeshSharedData->_renderingToSceneFaceVtxIds[idx] @@ -310,7 +312,7 @@ void MeshViewportCompute::createConsolidatedTopology(TopologyAccessor getTopolog _meshSharedData->_sceneToRenderingFaceVtxIds.push_back(-1); } - for (int idx = 0; idx < sourceMeshSharedData->_sceneToRenderingFaceVtxIds.size(); + for (size_t idx = 0; idx < sourceMeshSharedData->_sceneToRenderingFaceVtxIds.size(); idx++) { _meshSharedData->_sceneToRenderingFaceVtxIds.push_back( sourceMeshSharedData->_sceneToRenderingFaceVtxIds[idx] @@ -346,8 +348,6 @@ void MeshViewportCompute::createConsolidatedAdjacency() MProfiler::kColorD_L2, "MeshViewportCompute:createConsolidatedAdjacency"); - bool isConsolidated = nullptr != _geometryIndexMapping.get(); - Hd_VertexAdjacencySharedPtr adjacency(new Hd_VertexAdjacency()); HdBufferSourceSharedPtr adjacencyComputation = adjacency->GetSharedAdjacencyBuilderComputation(&_meshSharedData->_topology); @@ -708,7 +708,7 @@ void MeshViewportCompute::prepareAdjacencyBuffer() _adjacencyBufferGPU.reset(new MHWRender::MVertexBuffer(intArrayDesc)); void* bufferData = _adjacencyBufferGPU->acquire(adjacencyBufferSize, true); - for (int i = 0; i < vertexDataSize; i += 2) { + for (size_t i = 0; i < vertexDataSize; i += 2) { vertexDataStart[i] += paddingSize; } From 72df53843052fedb36bf6a505ee8ca4d47ef5ef0 Mon Sep 17 00:00:00 2001 From: krickw Date: Mon, 7 Dec 2020 10:12:42 -0500 Subject: [PATCH 4/5] Fix more unsigned-signed comparisons. --- lib/mayaUsd/render/vp2RenderDelegate/mesh.cpp | 12 ++++++------ lib/mayaUsd/render/vp2RenderDelegate/mesh.h | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/lib/mayaUsd/render/vp2RenderDelegate/mesh.cpp b/lib/mayaUsd/render/vp2RenderDelegate/mesh.cpp index 35e9c590d2..3379042085 100644 --- a/lib/mayaUsd/render/vp2RenderDelegate/mesh.cpp +++ b/lib/mayaUsd/render/vp2RenderDelegate/mesh.cpp @@ -335,14 +335,14 @@ void HdVP2Mesh::_InitGPUCompute() // would also be nice to check the openGL version but renderer->drawAPIVersion() returns 4. // Compute was added in 4.3 so I don't have enough information to make the check if (renderer && renderer->drawAPIIsOpenGL() - && (TfGetenvInt("HDVP2_USE_GPU_NORMAL_COMPUTATION", 0) > 0)) - _gpuNormalsComputeThreshold - = TfGetenvInt("HDVP2_GPU_NORMAL_COMPUTATION_MINIMUM_THRESHOLD", 8000); - else - _gpuNormalsComputeThreshold = INT_MAX; + && (TfGetenvInt("HDVP2_USE_GPU_NORMAL_COMPUTATION", 0) > 0)) { + int threshold = TfGetenvInt("HDVP2_GPU_NORMAL_COMPUTATION_MINIMUM_THRESHOLD", 8000); + _gpuNormalsComputeThreshold = threshold >= 0 ? (size_t)threshold : SIZE_MAX; + } else + _gpuNormalsComputeThreshold = SIZE_MAX; } -int HdVP2Mesh::_gpuNormalsComputeThreshold = -1; +size_t HdVP2Mesh::_gpuNormalsComputeThreshold = SIZE_MAX; //! \brief Constructor #if defined(HD_API_VERSION) && HD_API_VERSION >= 36 HdVP2Mesh::HdVP2Mesh(HdVP2RenderDelegate* delegate, const SdfPath& id) diff --git a/lib/mayaUsd/render/vp2RenderDelegate/mesh.h b/lib/mayaUsd/render/vp2RenderDelegate/mesh.h index d7196bcbcf..70bde70e68 100644 --- a/lib/mayaUsd/render/vp2RenderDelegate/mesh.h +++ b/lib/mayaUsd/render/vp2RenderDelegate/mesh.h @@ -185,7 +185,7 @@ class HdVP2Mesh final : public HdMesh //! makes the expressions using these variables much simpler bool _gpuNormalsEnabled { true }; //!< Use GPU Compute for normal calculation, only used when //!< HDVP2_ENABLE_GPU_COMPUTE is defined - static int _gpuNormalsComputeThreshold; + static size_t _gpuNormalsComputeThreshold; }; PXR_NAMESPACE_CLOSE_SCOPE From ad202091942e54b71eacf3dd40fe65716164cdb0 Mon Sep 17 00:00:00 2001 From: krickw Date: Thu, 17 Dec 2020 11:33:15 -0500 Subject: [PATCH 5/5] Add USD version check. --- lib/mayaUsd/render/vp2RenderDelegate/meshViewportCompute.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/mayaUsd/render/vp2RenderDelegate/meshViewportCompute.h b/lib/mayaUsd/render/vp2RenderDelegate/meshViewportCompute.h index 8b8575b7d8..5b1d0cfbc0 100644 --- a/lib/mayaUsd/render/vp2RenderDelegate/meshViewportCompute.h +++ b/lib/mayaUsd/render/vp2RenderDelegate/meshViewportCompute.h @@ -40,8 +40,10 @@ * some objects draw with incorrect indexing */ -#if MAYA_API_VERSION >= 20210000 \ - && !defined(OSMac_) // OSX doesn't have OpenGL 4.3 support necessary for compute +// Maya 2020 is missing API necessary for compute support +// OSX doesn't have OpenGL 4.3 support necessary for compute +// USD before 20.08 doesn't include some OSD commits we rely on +#if MAYA_API_VERSION >= 20210000 && !defined(OSMac_) && USD_VERSION_NUM > 2002 #define HDVP2_ENABLE_GPU_COMPUTE #endif