TensorRT 8.4 GA ONNX Parser Release (#851)

* TensorRT 8.4 EA ONNX Parser Release * support protobuf >= 3.11 (#812) Signed-off-by: George Wu <[email protected]> * missed other sections for supporting >= protobuf 3.11 (#817) * missed one other section for supporting >= protobuf 3.11 Signed-off-by: George Wu <[email protected]> * one more section Signed-off-by: George Wu <[email protected]> * fix ident Signed-off-by: George Wu <[email protected]> * TensorRT 8.4-GA ONNX Parser Release * TensorRT 8.4.1.5 updates (#849) Signed-off-by: Rajeev Rao <[email protected]> Co-authored-by: George Wu <[email protected]> Co-authored-by: Rajeev Rao <[email protected]>
onnx · Jun 14, 2022 · 1da7332 · 1da7332
1 parent 4ebfd96
commit 1da7332
Show file tree

Hide file tree

Showing 26 changed files with 3,948 additions and 615 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -28,7 +28,7 @@ add_definitions("-DSOURCE_LENGTH=${SOURCE_LENGTH}")
 # Version information
 #--------------------------------------------------
 set(ONNX2TRT_MAJOR 8)
-set(ONNX2TRT_MINOR 2)
+set(ONNX2TRT_MINOR 4)
 set(ONNX2TRT_PATCH 1)
 set(ONNX2TRT_VERSION "${ONNX2TRT_MAJOR}.${ONNX2TRT_MINOR}.${ONNX2TRT_PATCH}" CACHE STRING "ONNX2TRT version")
 
@@ -42,6 +42,7 @@ set(IMPORTER_SOURCES
   builtin_op_importers.cpp
   onnx2trt_utils.cpp
   onnxErrorRecorder.cpp
+  ImporterContext.cpp
   ShapedWeights.cpp
   ShapeTensor.cpp
   LoopHelpers.cpp

diff --git a/ConditionalHelpers.cpp b/ConditionalHelpers.cpp
@@ -71,13 +71,23 @@ Status addConditionalInputLayer(IImporterContext* ctx, nvinfer1::IIfConditional*
 
 // Take a snapshot of the network before and after parsing the subgraph and return a list
 // of newly added network layers.
-Status importSubgraph(
-    IImporterContext* ctx, const ::ONNX_NAMESPACE::GraphProto& subgraph, std::vector<nvinfer1::ILayer*>& newLayers)
+Status importSubgraph(IImporterContext* ctx, ::ONNX_NAMESPACE::GraphProto const& subgraph,
+    std::vector<nvinfer1::ILayer*>& newLayers, StringMap<TensorOrWeights>& subgraphTensors)
 {
     auto net = ctx->network();
     int32_t beforeSubgraph = net->getNbLayers();
+
+    // Establish scope for names local to the subgraph.
+    NameScope nameScope(*ctx);
+
     CHECK(onnx2trt::parseGraph(ctx, subgraph));
 
+    for (int32_t i = 0; i < subgraph.output_size(); ++i)
+    {
+        std::string name = subgraph.output(i).name();
+        subgraphTensors.emplace(std::make_pair(name, ctx->tensors().at(name)));
+    }
+
     for (int32_t i = beforeSubgraph; i < net->getNbLayers(); i++)
     {
         newLayers.push_back(net->getLayer(i));
@@ -135,8 +145,9 @@ Status addIfInputLayers(IImporterContext* ctx, nvinfer1::IIfConditional* conditi
 
 // Add an IConditionalOutputLayer to `layer`'s outputs.
 Status addIfOutputLayers(IImporterContext* ctx, nvinfer1::IIfConditional* conditional,
-    const ::ONNX_NAMESPACE::GraphProto& thenGraph, const std::vector<nvinfer1::ILayer*>& thenLayers,
-    const ::ONNX_NAMESPACE::GraphProto& elseGraph, const std::vector<nvinfer1::ILayer*>& elseLayers,
+    ::ONNX_NAMESPACE::GraphProto const& thenGraph, std::vector<nvinfer1::ILayer*> const& thenLayers,
+    StringMap<TensorOrWeights> const& thenSubgraphTensors, ::ONNX_NAMESPACE::GraphProto const& elseGraph,
+    std::vector<nvinfer1::ILayer*> const& elseLayers, StringMap<TensorOrWeights> const& elseSubgraphTensors,
     std::vector<TensorOrWeights>& graphOutputs)
 {
     // Reported outputs are outputs that the ONNX model reports as subgraph outputs.  This list is
@@ -166,7 +177,8 @@ Status addIfOutputLayers(IImporterContext* ctx, nvinfer1::IIfConditional* condit
     // Retrieve the output tensors of a subgraph (tensors exiting the subgraph).
     auto getSubgraphOutputTensors
         = [](IImporterContext* ctx, std::vector<nvinfer1::ITensor*>& sgOutputs, SubgraphPortsMap& subgraphOutputs,
-              const ::ONNX_NAMESPACE::GraphProto& subgraph, std::vector<nvinfer1::ILayer*> subgraphLayers) {
+              ::ONNX_NAMESPACE::GraphProto const& subgraph, std::vector<nvinfer1::ILayer*> subgraphLayers,
+              StringMap<TensorOrWeights> const& subgraphTensors) {
               for (const auto& layer : subgraphLayers)
               {
                   const auto layerName = layer->getName();
@@ -184,17 +196,18 @@ Status addIfOutputLayers(IImporterContext* ctx, nvinfer1::IIfConditional* condit
                   for (int32_t outIdx = 0; outIdx < nbOutputs; outIdx++)
                   {
                       const auto thenName = subgraph.output(outIdx).name();
-                      auto* thenTensor = &convertToTensor(ctx->tensors().at(thenName), ctx);
+                      TensorOrWeights tw = subgraphTensors.at(thenName);
+                      auto* thenTensor = &convertToTensor(tw, ctx);
                       sgOutputs.push_back(thenTensor);
                   }
               }
           };
 
     std::vector<nvinfer1::ITensor*> thenOutputTensors;
-    getSubgraphOutputTensors(ctx, thenOutputTensors, thenOutputs, thenGraph, thenLayers);
+    getSubgraphOutputTensors(ctx, thenOutputTensors, thenOutputs, thenGraph, thenLayers, thenSubgraphTensors);
 
     std::vector<nvinfer1::ITensor*> elseSGOutputTensors;
-    getSubgraphOutputTensors(ctx, elseSGOutputTensors, elseOutputs, elseGraph, elseLayers);
+    getSubgraphOutputTensors(ctx, elseSGOutputTensors, elseOutputs, elseGraph, elseLayers, elseSubgraphTensors);
 
     ASSERT(thenOutputTensors.size() == elseSGOutputTensors.size()
             && "The then/else branches of an If operator must have the same number of outputs.",

diff --git a/ConditionalHelpers.hpp b/ConditionalHelpers.hpp
@@ -34,8 +34,8 @@ Status getSubgraphOutputs(const std::vector<nvinfer1::ILayer*>& newLayers,
 
 // Take a snapshot of the network before and after parsing the subgraph and return a list
 // of newly added network layers.
-Status importSubgraph(
-    IImporterContext* ctx, const ::ONNX_NAMESPACE::GraphProto& subgraph, std::vector<nvinfer1::ILayer*>& newLayers);
+Status importSubgraph(IImporterContext* ctx, ::ONNX_NAMESPACE::GraphProto const& subgraph,
+    std::vector<nvinfer1::ILayer*>& newLayers, StringMap<TensorOrWeights>& subgraphTensors);
 
 using InputsMap = std::unordered_map<std::string, nvinfer1::IIfConditionalInputLayer*>;
 
@@ -45,8 +45,9 @@ onnx2trt::Status addIfInputLayers(IImporterContext* ctx, nvinfer1::IIfConditiona
 
 // Add IIfConditionalOutputLayers to the outputs of the subgraph indicated by `subgraph`.
 onnx2trt::Status addIfOutputLayers(IImporterContext* ctx, nvinfer1::IIfConditional* conditional,
-    const ::ONNX_NAMESPACE::GraphProto& thenGraph, const std::vector<nvinfer1::ILayer*>& thenLayers,
-    const ::ONNX_NAMESPACE::GraphProto& elseGraph, const std::vector<nvinfer1::ILayer*>& elseLayers,
+    ::ONNX_NAMESPACE::GraphProto const& thenGraph, std::vector<nvinfer1::ILayer*> const& thenLayers,
+    StringMap<TensorOrWeights> const& thenSubgraphTensors, ::ONNX_NAMESPACE::GraphProto const& elseGraph,
+    std::vector<nvinfer1::ILayer*> const& elseLayers, StringMap<TensorOrWeights> const& elseSubgraphTensors,
     std::vector<TensorOrWeights>& graphOutputs);
 
 } // namespace onnx2trt
diff --git a/ImporterContext.cpp b/ImporterContext.cpp
@@ -0,0 +1,110 @@
+#include "ImporterContext.hpp"
+
+namespace onnx2trt
+{
+
+void ImporterContext::pushBaseNameScope()
+{
+    mBaseNameScopeStack.push_back({});
+}
+
+void ImporterContext::popBaseNameScope()
+{
+    auto& tensorMap = tensors();
+    for (auto& binding : mBaseNameScopeStack.back())
+    {
+        if (binding.second.first)
+        {
+            tensorMap.erase(binding.first);
+        }
+        else
+        {
+            tensorMap.at(binding.first) = std::move(binding.second.second);
+        }
+    }
+    mBaseNameScopeStack.pop_back();
+}
+
+void ImporterContext::registerTensor(TensorOrWeights tensor, std::string const& basename)
+{
+    // TRT requires unique tensor names.
+    std::string const& uniqueName = generateUniqueName(mTensorNames, basename);
+
+    if (tensor)
+    {
+        if (tensor.is_tensor())
+        {
+            tensor.tensor().setName(uniqueName.c_str());
+            // Logging macro refers to ctx.
+            auto* ctx = this;
+            LOG_VERBOSE("Registering tensor: " << uniqueName << " for ONNX tensor: " << basename);
+        }
+        else if (tensor.is_weights())
+        {
+            auto const& weights = tensor.weights();
+            if (tensor.weights().type == ::ONNX_NAMESPACE::TensorProto::INT64)
+            {
+                tensor = ShapedWeights{::ONNX_NAMESPACE::TensorProto::INT32,
+                    convertINT64(reinterpret_cast<int64_t*>(weights.values), weights.shape, this), weights.shape};
+            }
+            // It may be possible for nested subgraphs to have different values for the same initializer.
+            // For multiple name scopes - use unique name to keep track of weights. 
+            if (!mBaseNameScopeStack.empty())
+            {
+                tensor.weights().setName(uniqueName.c_str());
+            }
+            else
+            {
+                tensor.weights().setName(basename.c_str());
+            }
+        }
+    }
+
+    auto const p = this->tensors().emplace(basename, TensorOrWeights{});
+    bool nameIsDuplicate = false;
+    if (!mBaseNameScopeStack.empty())
+    {
+        // Remember original binding so it can be restored when scope is popped.
+        auto const q
+            = mBaseNameScopeStack.back().emplace(basename, std::make_pair(p.second, std::move(p.first->second)));
+        // Check that scope did not already have a binding for basename.
+        nameIsDuplicate = !q.second;
+    }
+    else
+    {
+        // The condition here accounts for ModelImporter::importModel reserving
+        // output names by registering null tensors.
+        nameIsDuplicate = !p.second && !p.first->second.isNullTensor();
+    }
+    if (nameIsDuplicate)
+    {
+        throw std::runtime_error("ONNX graph has duplicate tensor name: " + basename);
+    }
+    p.first->second = std::move(tensor);
+}
+
+void ImporterContext::registerLayer(nvinfer1::ILayer* layer, std::string const& basename)
+{
+    // No layer will be added for Constant nodes in ONNX.
+    if (layer)
+    {
+        std::string const name = basename.empty() ? layer->getName() : basename;
+        std::string const& uniqueName = generateUniqueName(mLayerNames, name);
+
+        auto* ctx = this; // To enable logging.
+        LOG_VERBOSE("Registering layer: " << uniqueName << " for ONNX node: " << basename);
+
+        layer->setName(uniqueName.c_str());
+        if (layer->getType() == nvinfer1::LayerType::kCONSTANT)
+        {
+            if (basename != uniqueName)
+            {
+                LOG_ERROR("Constant layer: " << uniqueName << " can be a duplicate of: " << basename);
+                assert(!"Internal error: duplicate constant layers for the same weights");
+            }
+            mConstantLayers.insert({uniqueName, static_cast<nvinfer1::IConstantLayer*>(layer)});
+        }
+    }
+}
+
+} // namespace onnx2trt
diff --git a/ImporterContext.hpp b/ImporterContext.hpp
@@ -9,7 +9,9 @@
 #include "onnxErrorRecorder.hpp"
 #include "onnx/common/stl_backports.h"
 #include <list>
+#include <string>
 #include <unordered_map>
+#include <utility>
 
 namespace onnx2trt
 {
@@ -84,8 +86,24 @@ class ImporterContext final : public IImporterContext
     int64_t mSuffixCounter{0}; // increasing suffix counter used to uniquify layer names.
     std::unordered_set<std::string> mUnsupportedShapeTensors; // Container to hold output tensor names of layers that produce shape tensor outputs but do not natively support them.
     StringMap<std::string> mLoopTensors; // Container to map subgraph tensors to their original outer graph names.
-    std::string mOnnxFileLocation; // Keep track of the directory of the parsed ONNX file
+    std::string mOnnxFileLocation;       // Keep track of the directory of the parsed ONNX file
     std::unique_ptr<ErrorRecorderWrapper> mErrorWrapper; // error recorder to control TRT errors
+    StringMap<nvinfer1::IConstantLayer*> mConstantLayers;
+
+    //! Stack of names defined by nested ONNX graphs, with information about how to
+    //! restore their associated values when popping back to the surrounding scope.
+    //!
+    //! The stack is empty when processing the top-level ONNX graph.
+    //! back() corresponds to the innermost ONNX graph being processed.
+    //!
+    //! For each entry {name, {bool, TensorOrWeights}}:
+    //!
+    //! * If the bool is true, the name was newly introduced by the scope.
+    //!
+    //! * If the bool is false, the name shadows a name in a surrounding scope,
+    //!   and TensorOrWeights was the name's value before being shadowed.
+    //!
+    std::vector<StringMap<std::pair<bool, TensorOrWeights>>> mBaseNameScopeStack;
 
 public:
     ImporterContext(nvinfer1::INetworkDefinition* network, nvinfer1::ILogger* logger)
@@ -134,52 +152,15 @@ class ImporterContext final : public IImporterContext
     {
         return mOnnxFileLocation;
     }
-    // This actually handles weights as well, but is named this way to be consistent with the tensors()
-    void registerTensor(TensorOrWeights tensor, const std::string& basename) override
-    {
-        // TRT requires unique tensor names.
-        const std::string uniqueName = generateUniqueName(mTensorNames, basename);
 
-        if (tensor)
-        {
-            auto* ctx = this; // To enable logging.
-            if (tensor.is_tensor())
-            {
-                tensor.tensor().setName(uniqueName.c_str());
+    void pushBaseNameScope() override;
 
-                LOG_VERBOSE("Registering tensor: " << uniqueName << " for ONNX tensor: " << basename);
-            }
-            else if (tensor.is_weights())
-            {
-                const auto& weights = tensor.weights();
-                if (tensor.weights().type == ::ONNX_NAMESPACE::TensorProto::INT64)
-                {
-                    tensor = ShapedWeights{::ONNX_NAMESPACE::TensorProto::INT32,
-                        convertINT64(reinterpret_cast<int64_t*>(weights.values), weights.shape, ctx), weights.shape};
-                }
-                tensor.weights().setName(basename.c_str());
-            }
+    void popBaseNameScope() override;
 
-        }
-        // Overwrite previous tensors registered with the same name (this only happens when there are subgraphs,
-        // and in that case, overwriting is the desired behavior).
-        this->tensors()[basename] = std::move(tensor);
-    }
-
-    void registerLayer(nvinfer1::ILayer* layer, const std::string& basename) override
-    {
-        // No layer will be added for Constant nodes in ONNX.
-        if (layer)
-        {
-            const std::string name = basename.empty() ? layer->getName() : basename;
-            const std::string uniqueName = generateUniqueName(mLayerNames, name);
-
-            auto* ctx = this; // To enable logging.
-            LOG_VERBOSE("Registering layer: " << uniqueName << " for ONNX node: " << basename);
+    // This actually handles weights as well, but is named this way to be consistent with the tensors()
+    void registerTensor(TensorOrWeights tensor, std::string const& basename) override;
 
-            layer->setName(uniqueName.c_str());
-        }
-    }
+    void registerLayer(nvinfer1::ILayer* layer, std::string const& basename) override;
 
     nvinfer1::ILogger& logger() override
     {
@@ -188,16 +169,10 @@ class ImporterContext final : public IImporterContext
 
     ShapedWeights createTempWeights(ShapedWeights::DataType type, nvinfer1::Dims shape, uint8_t value = 0) override
     {
+        std::string const& name = generateUniqueName(mTensorNames, "tmp_weight");
         ShapedWeights weights(type, nullptr, shape);
-        // Need special logic for handling scalars.
-        if (shape.nbDims == 0)
-        {
-            mTempBufs.push_back(std::vector<uint8_t>(getDtypeSize(type), value));
-        }
-        else
-        {
-            mTempBufs.push_back(std::vector<uint8_t>(weights.size_bytes(), value));
-        }
+        weights.setName(name.c_str());
+        mTempBufs.push_back(std::vector<uint8_t>(weights.size_bytes(), value));
         weights.values = mTempBufs.back().data();
         return weights;
     }
@@ -256,8 +231,13 @@ class ImporterContext final : public IImporterContext
         {
             return mOpsets.begin()->second;
         }
+        else if (mOpsets.count(domain))
+        {
+            return mOpsets.at(domain);
+        }
         else
         {
+            domain = "ai.onnx";
             assert(mOpsets.count(domain));
             return mOpsets.at(domain);
         }
@@ -271,8 +251,22 @@ class ImporterContext final : public IImporterContext
     {
         return mErrorWrapper ? mErrorWrapper->getErrorRecorder() : nullptr;
     }
+    nvinfer1::IConstantLayer* getConstantLayer(const char* name) const final
+    {
+        if (name == nullptr)
+        {
+            return nullptr;
+        }
+        auto const iter = mConstantLayers.find(name);
+        if (iter == mConstantLayers.end())
+        {
+            return nullptr;
+        }
+        return iter->second;
+    }
+
 private:
-    std::string generateUniqueName(std::set<std::string>& namesSet, const std::string& basename)
+    std::string const& generateUniqueName(std::set<std::string>& namesSet, const std::string& basename)
     {
         std::string candidate = basename;
 
@@ -283,8 +277,8 @@ class ImporterContext final : public IImporterContext
         }
 
         namesSet.insert(candidate);
-
-        return candidate;
+        // Return reference to newly inserted string to avoid any c_str()'s going out of scope
+        return *namesSet.find(candidate);
     }
 };