ONNX-TensorRT 10.6 GA Release (#1000)

Signed-off-by: Kevin Chen <[email protected]>
onnx · Nov 5, 2024 · 4442153 · 4442153
1 parent 886aff9
commit 4442153
Show file tree

Hide file tree

Showing 20 changed files with 355 additions and 223 deletions.
diff --git a/.gitmodules b/.gitmodules
@@ -1,4 +1,4 @@
 [submodule "third_party/onnx"]
 	path = third_party/onnx
 	url = https://github.com/onnx/onnx.git
-	branch = v1.16.0
+	branch = v1.17.0
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -28,7 +28,7 @@ add_definitions("-DSOURCE_LENGTH=${SOURCE_LENGTH}")
 # Version information
 #--------------------------------------------------
 set(ONNX2TRT_MAJOR 10)
-set(ONNX2TRT_MINOR 5)
+set(ONNX2TRT_MINOR 6)
 set(ONNX2TRT_PATCH 0)
 set(ONNX2TRT_VERSION "${ONNX2TRT_MAJOR}.${ONNX2TRT_MINOR}.${ONNX2TRT_PATCH}" CACHE STRING "ONNX2TRT version")
 
@@ -106,6 +106,12 @@ find_path(TENSORRT_INCLUDE_DIR NvInfer.h
   PATH_SUFFIXES include)
 MESSAGE(STATUS "Found TensorRT headers at ${TENSORRT_INCLUDE_DIR}")
 
+# TensorRT Python Headers
+find_path(TENSORRT_PYTHON_INCLUDE_DIR plugin.h
+  HINTS ${TENSORRT_ROOT}
+  PATH_SUFFIXES python/include/impl)
+message(NOTICE "Found TensorRT Python headers at ${TENSORRT_PYTHON_INCLUDE_DIR}")
+
 # Output dynamic library names depends on platform:
 if (MSVC)
     set(nvonnxparser_lib_name "nvonnxparser_${ONNX2TRT_MAJOR}")
@@ -119,7 +125,7 @@ set(nvonnxparser_lib_name_static "nvonnxparser_static")
 # Importer library
 # --------------------------------
 add_library(${nvonnxparser_lib_name} SHARED ${IMPORTER_SOURCES})
-target_include_directories(${nvonnxparser_lib_name} PUBLIC ${ONNX_INCLUDE_DIRS} ${TENSORRT_INCLUDE_DIR} ${CUDA_INCLUDE_DIR})
+target_include_directories(${nvonnxparser_lib_name} PUBLIC ${ONNX_INCLUDE_DIRS} ${TENSORRT_INCLUDE_DIR} ${TENSORRT_PYTHON_INCLUDE_DIR} ${CUDA_INCLUDE_DIR})
 target_link_libraries(${nvonnxparser_lib_name} PUBLIC onnx_proto ${PROTOBUF_LIBRARY})
 set_target_properties(${nvonnxparser_lib_name} PROPERTIES
   VERSION   ${ONNX2TRT_VERSION}
@@ -131,7 +137,7 @@ set_target_properties(${nvonnxparser_lib_name} PROPERTIES
   RUNTIME_OUTPUT_DIRECTORY "${TRT_OUT_DIR}"
 )
 add_library(${nvonnxparser_lib_name_static} STATIC ${IMPORTER_SOURCES})
-target_include_directories(${nvonnxparser_lib_name_static} PUBLIC ${ONNX_INCLUDE_DIRS} ${TENSORRT_INCLUDE_DIR} ${CUDA_INCLUDE_DIR})
+target_include_directories(${nvonnxparser_lib_name_static} PUBLIC ${ONNX_INCLUDE_DIRS} ${TENSORRT_INCLUDE_DIR} ${TENSORRT_PYTHON_INCLUDE_DIR} ${CUDA_INCLUDE_DIR})
 target_link_libraries(${nvonnxparser_lib_name_static} PUBLIC onnx_proto ${PROTOBUF_LIBRARY})
 set_target_properties(${nvonnxparser_lib_name_static} PROPERTIES
   ARCHIVE_OUTPUT_DIRECTORY "${TRT_OUT_DIR}"
@@ -143,7 +149,7 @@ set_target_properties(${nvonnxparser_lib_name_static} PROPERTIES
 # --------------------------------
 if(BUILD_ONNXIFI)
   add_library(trt_onnxify SHARED ${ONNXIFI_SOURCES})
-  target_include_directories(trt_onnxify PUBLIC ${CUDA_INCLUDE_DIR} ${ONNX_INCLUDE_DIRS} ${TENSORRT_INCLUDE_DIR})
+  target_include_directories(trt_onnxify PUBLIC ${CUDA_INCLUDE_DIR} ${ONNX_INCLUDE_DIRS} ${TENSORRT_INCLUDE_DIR} ${TENSORRT_PYTHON_INCLUDE_DIR})
   target_link_libraries(trt_onnxify PUBLIC ${nvonnxparser_lib_name_static} ${CMAKE_THREAD_LIBS_INIT} ${CMAKE_DL_LIBS})
 endif()
 

diff --git a/ConditionalHelpers.cpp b/ConditionalHelpers.cpp
@@ -10,32 +10,17 @@
 namespace onnx2trt
 {
 
-using NodeName = std::string;
-using LayerName = std::string;
-using InputIndex = int32_t;
-
-// A SubgraphPortsMap maps either the inputs or outputs ports of each node in an ONNX graph.
-using SubgraphPortsMap = std::unordered_map<nvinfer1::ITensor*, std::set<InputIndex>>;
-
-// An InputsMap tracks which IIfConditionalInputLayer we've added to a layer's inputs,
-// so that we can reuse them if needed.
-using InputsMap = std::unordered_map<LayerName, nvinfer1::IIfConditionalInputLayer*>;
-
-// Search for a network Layer name in a SubgraphPortsMap using partial (prefix) name matching.
-// ONNX nodes are matched to network layers using prefix-matching because an ONNX node may have
-// several network layers associcated with it.
+// Search for a network Layer name in a SubgraphPortsMap.
 SubgraphPortsMap::const_iterator findLayer(const SubgraphPortsMap& inputs, const std::string layerName)
 {
-    return std::find_if(inputs.begin(), inputs.end(), [&](const auto& item) {
-        std::string const key = item.first->getName();
-        return layerName.compare(0, key.size(), key) == 0;
-    });
+    return std::find_if(
+        inputs.begin(), inputs.end(), [&](const auto& item) { return layerName == item.first->getName(); });
 }
 
 // Add an ConditionalInputLayer between `layer` and its inputs.
 // I.e. input[inIdx] -> layer ==> input[inIdx] -> ConditionalInputLayer -> layer.
 void addConditionalInputLayer(ImporterContext* ctx, nvinfer1::IIfConditional* conditional, InputsMap& inputsMap,
-    nvinfer1::ILayer& layer, int32_t inIdx)
+    nvinfer1::ILayer& layer, int32_t inIdx, ::ONNX_NAMESPACE::NodeProto const* node)
 {
     auto input = layer.getInput(inIdx);
     if (input == nullptr)
@@ -57,7 +42,7 @@ void addConditionalInputLayer(ImporterContext* ctx, nvinfer1::IIfConditional* co
         inputLayer = N_CHECK(conditional->addInput(*input));
         inputsMap[name] = inputLayer;
         const std::string inputLayerName(name);
-        ctx->registerLayer(inputLayer, inputLayerName + "_InputLayer", nullptr);
+        ctx->registerLayer(inputLayer, inputLayerName + "_InputLayer", node);
         // Note: Since multiple conditionals may use the same external tensor, check unique names for output tensors of
         // IfConditionalInputLayers to avoid tensor name duplication.
         ctx->registerTensor(
@@ -100,7 +85,7 @@ void importSubgraph(ImporterContext* ctx, ::ONNX_NAMESPACE::GraphProto const& su
 
 // Add an IConditionalInputLayer to `layer`'s inputs, if they don't already exist.
 void addConditionalInputIfNeeded(ImporterContext* ctx, nvinfer1::IIfConditional* conditional, InputsMap& inputsMap,
-    nvinfer1::ILayer& layer, SubgraphPortsMap subgraphInputsMap)
+    nvinfer1::ILayer& layer, SubgraphPortsMap subgraphInputsMap, ::ONNX_NAMESPACE::NodeProto const* node)
 {
     // Return all of the layer's inputs that are external to the subgraph that
     // that the layer belongs to.
@@ -120,36 +105,29 @@ void addConditionalInputIfNeeded(ImporterContext* ctx, nvinfer1::IIfConditional*
     for (auto inIdx : inIndices)
     {
         LOG_VERBOSE("Adding Input layer for " << layer.getName());
-        addConditionalInputLayer(ctx, conditional, inputsMap, layer, inIdx);
+        addConditionalInputLayer(ctx, conditional, inputsMap, layer, inIdx, node);
     }
 }
 
 // Add IConditionalInputLayers to `layer`'s inputs.
 void addIfInputLayers(ImporterContext* ctx, nvinfer1::IIfConditional* conditional, InputsMap& inputsMap,
-    const std::vector<nvinfer1::ILayer*>& newLayers)
+    const std::vector<nvinfer1::ILayer*>& newLayers, ::ONNX_NAMESPACE::NodeProto const* node)
 {
     // Find all of the tensors entering the subgraph.
-    // The node-names are from the ONNX context.
-    using InputIndex = int32_t;
-    std::unordered_map<nvinfer1::ITensor*, std::set<InputIndex>> subgraphInputsMap;
-    getSubgraphInputs(newLayers, subgraphInputsMap);
+    SubgraphPortsMap externalInputs;
+    getSubgraphInputs(newLayers, externalInputs);
 
     // Add a ConditionalInputLayer in front of each input that is external to the subgraph.
     for (const auto& layer : newLayers)
     {
-        addConditionalInputIfNeeded(ctx, conditional, inputsMap, *layer, subgraphInputsMap);
+        addConditionalInputIfNeeded(ctx, conditional, inputsMap, *layer, externalInputs, node);
     }
 }
 
-// Given a subgraph, find all of its external inputs/outputs (tensors entering/exiting the subgraph).
-void getSubgraphTensors(const std::vector<nvinfer1::ILayer*>& newLayers,
-    std::unordered_map<nvinfer1::ITensor*, std::set<int32_t>>& externalOutputs, bool extractOutputs,
-    const std::vector<std::string>* reportedOutputs = nullptr)
+// Given a subgraph, find all of its external inputs (tensors entering the subgraph).
+void getSubgraphInputs(const std::vector<nvinfer1::ILayer*>& newLayers, SubgraphPortsMap& externalInputs)
 {
-    using NodeName = std::string;
-    using TensorName = std::string;
     using PortIndex = int32_t;
-    using Port = std::pair<NodeName, PortIndex>;
     using TensorsSet = std::unordered_set<nvinfer1::ITensor*>;
     TensorsSet outputTensors;
     TensorsSet inputTensors;
@@ -176,20 +154,14 @@ void getSubgraphTensors(const std::vector<nvinfer1::ILayer*>& newLayers,
     }
 
     using TensorsVec = std::vector<nvinfer1::ITensor*>;
-    auto getOutputs = [&](nvinfer1::ILayer const* l, TensorsVec& res) {
-        getTensors(l, false, [&](nvinfer1::ITensor* t) { res.emplace_back(t); });
-    };
-
     auto getInputs = [&](nvinfer1::ILayer const* l, TensorsVec& res) {
         getTensors(l, true, [&](nvinfer1::ITensor* t) { res.emplace_back(t); });
     };
 
     // Retrieve the list of tensors either exiting or entering the subgraph.
-    std::unordered_map<nvinfer1::ITensor*, std::vector<Port>> externalPortsMap;
     auto filterTensors = [&](TensorsSet const& tensors, auto getNodeAccessor) {
         for (nvinfer1::ILayer const* l : newLayers)
         {
-            const auto& nodeName = l->getName();
             PortIndex i = 0;
 
             TensorsVec nodeAccessor;
@@ -202,66 +174,14 @@ void getSubgraphTensors(const std::vector<nvinfer1::ILayer*>& newLayers,
                 }
                 if (tensors.count(tensor) == 0)
                 {
-                    TensorName tensorName = tensor->getName();
-                    auto prefixFound = false;
-                    if (reportedOutputs)
-                    {
-                        // reportedOutputs are the names of the outputs as reported by the ONNX parser and help
-                        // us further filter the output tensors.
-                        //      Exiting tensors := {outputs} - {inputs} - {unreported tensors}
-                        // An example: a Split node is internal to a subgraph and has 4 outputs, but only two are
-                        // connected to the rest of the graph.  To prevent mistaking the 2 unused outputs as subgraph
-                        // outputs, we look for them in reportedOutputs which leads us to ignore the 2 tensors.
-                        const auto iter = std::find_if(
-                            reportedOutputs->begin(), reportedOutputs->end(), [&](const auto& outputName) {
-                                // Prefix name matching.
-                                return tensorName.compare(0, outputName.size(), outputName) == 0;
-                            });
-                        prefixFound = iter != reportedOutputs->end();
-                    }
-                    if (!reportedOutputs || prefixFound)
-                    {
-                        externalPortsMap[tensor].push_back(std::make_pair(nodeName, i));
-                    }
+                    externalInputs[l].insert(i);
                 }
                 i++;
             }
         }
     };
 
-    if (extractOutputs)
-    {
-        filterTensors(inputTensors, getOutputs);
-    }
-    else
-    {
-        filterTensors(outputTensors, getInputs);
-    }
-
-    // Create the user's view of the external inputs, which uses the node-name as the key for
-    // looking up input/output port index.
-    for (auto const& input : externalPortsMap)
-    {
-        for (const Port& inPort : input.second)
-        {
-            auto* tensor = input.first;
-            auto const portIndex = inPort.second;
-            externalOutputs[tensor].insert(portIndex);
-        }
-    }
-}
-
-void getSubgraphOutputs(const std::vector<nvinfer1::ILayer*>& newLayers,
-    std::unordered_map<nvinfer1::ITensor*, std::set<int32_t>>& externalOutputs,
-    const std::vector<std::string>& reportedOutputs)
-{
-    getSubgraphTensors(newLayers, externalOutputs, true, &reportedOutputs);
-}
-
-void getSubgraphInputs(const std::vector<nvinfer1::ILayer*>& newLayers,
-    std::unordered_map<nvinfer1::ITensor*, std::set<int32_t>>& externalInputs)
-{
-    getSubgraphTensors(newLayers, externalInputs, false);
+    filterTensors(outputTensors, getInputs);
 }
 
 } // namespace onnx2trt
diff --git a/ConditionalHelpers.hpp b/ConditionalHelpers.hpp
@@ -18,28 +18,27 @@
 namespace onnx2trt
 {
 
+using NodeName = std::string;
+using LayerName = std::string;
+using InputIndex = int32_t;
+
+// A SubgraphPortsMap maps inputs' ports of each layer in an ONNX graph.
+using SubgraphPortsMap = std::unordered_map<const nvinfer1::ILayer*, std::unordered_set<InputIndex>>;
+
 // Given a subgraph, find all of its external inputs (tensors entering the subgraph).
-// The result is returned in `subgraphInputs`, which is a map indexed by ITensor (a tensor entering the subgraph) and
-// with values indicating a set of external input indices.
-void getSubgraphInputs(std::vector<nvinfer1::ILayer*> const& newLayers,
-    std::unordered_map<nvinfer1::ITensor*, std::set<int32_t>>& subgraphInputs);
-
-// Given a subgraph, find all of its external outputs (tensors exiting the subgraph).
-// The result is returned in `subgraphInputs`, which is a map indexed by ITensor (a tensor exiting the subgraph) and
-// with values indicating a set of external outputs indices.
-void getSubgraphOutputs(const std::vector<nvinfer1::ILayer*>& newLayers,
-    std::unordered_map<nvinfer1::ITensor*, std::set<int32_t>>& subgraphOutputs,
-    const std::vector<std::string>& reportedOutputs);
+void getSubgraphInputs(const std::vector<nvinfer1::ILayer*>& newLayers, SubgraphPortsMap& externalInputs);
 
 // Take a snapshot of the network before and after parsing the subgraph and return a list
 // of newly added network layers.
 void importSubgraph(ImporterContext* ctx, ::ONNX_NAMESPACE::GraphProto const& subgraph,
     std::vector<nvinfer1::ILayer*>& newLayers, std::vector<TensorOrWeights>& subgraphTensors);
 
-using InputsMap = std::unordered_map<std::string, nvinfer1::IIfConditionalInputLayer*>;
+// An InputsMap tracks which IIfConditionalInputLayer we've added to a layer's inputs,
+// so that we can reuse them if needed.
+using InputsMap = std::unordered_map<LayerName, nvinfer1::IIfConditionalInputLayer*>;
 
 // Add IIfConditionalInputLayers to the inputs of the subgraph indicated by `subgraph`.
 void addIfInputLayers(ImporterContext* ctx, nvinfer1::IIfConditional* conditional, InputsMap& inputsMap,
-    const std::vector<nvinfer1::ILayer*>& newLayers);
+    const std::vector<nvinfer1::ILayer*>& newLayers, ::ONNX_NAMESPACE::NodeProto const* node);
 
 } // namespace onnx2trt
diff --git a/ImporterContext.hpp b/ImporterContext.hpp
@@ -138,8 +138,17 @@ class ImporterContext
     //! Map holding FunctionProtos
     StringMap<::ONNX_NAMESPACE::FunctionProto> mLocalFunctions;
 
+    //! Data type to keep track of a local function in mLocalFunctionStack.
+    //! It is a tuple of three elements: (1) function name (2) node name and (3) function attributes.
+    struct LocalFunctionMetadata
+    {
+        std::string functionName;
+        std::string nodeName;
+        StringMap<::ONNX_NAMESPACE::AttributeProto const*> attrs;
+    };
+
     //! Vector to hold current local function names and attributes
-    std::vector<std::pair<std::string, StringMap<::ONNX_NAMESPACE::AttributeProto const*>>> mLocalFunctionStack;
+    std::vector<LocalFunctionMetadata> mLocalFunctionStack;
 
     //! Vector to hold the local function names at each error
     std::vector<std::vector<std::string>> mLocalFunctionErrors;
@@ -325,7 +334,7 @@ class ImporterContext
     {
         return mLocalFunctions;
     }
-    std::vector<std::pair<std::string, StringMap<::ONNX_NAMESPACE::AttributeProto const*>>>& localFunctionStack()
+    std::vector<LocalFunctionMetadata>& localFunctionStack()
     {
         return mLocalFunctionStack;
     }

diff --git a/ModelImporter.cpp b/ModelImporter.cpp
@@ -221,19 +221,27 @@ void parseNode(
         }
     }
 
-    ONNXTRT_CHECK_NODE((node.output().size() <= static_cast<int32_t>(outputs.size())),
+    int32_t nonEmptyOutputs
+        = std::count_if(node.output().begin(), node.output().end(), [](std::string const& str) { return !str.empty(); });
+    ONNXTRT_CHECK_NODE(nonEmptyOutputs == static_cast<int32_t>(outputs.size()),
         "Node has more output tensors than TRT expected, expected output size is "
-            << outputs.size() << ", actual output size is " << node.output().size() << ".",
+            << outputs.size() << ", actual output size is " << nonEmptyOutputs << ".",
         node, nodeIdx, ErrorCode::kINVALID_GRAPH);
 
     // Set output names and register outputs with the context.
     std::ostringstream ssOutputs{};
     ssOutputs << nodeName << " [" << node.op_type() << "] outputs: ";
-    for (int32_t i = 0; i < node.output().size(); ++i)
+    for (int32_t i = 0, trtCnt = 0; i < node.output().size(); ++i)
     {
         auto const& outputName = node.output(i);
-        auto& output = outputs.at(i);
-        ssOutputs << "[" << outputName << " -> " << output.shape() << "[" << output.getType() << "]" << "], ";
+        // Empty strings denote null-tensor outputs. Ignore these.
+        if (outputName.empty())
+        {
+            continue;
+        }
+        auto& output = outputs.at(trtCnt);
+        ssOutputs << "[" << outputName << " -> " << output.shape() << "[" << output.getType() << "]"
+                  << "], ";
         // Note: This condition is to allow ONNX outputs to be ignored
         // Always register output weights (even empty ones) as it may be mapped to an unused input
         if ((output || output.is_weights()) && !outputName.empty())
@@ -255,6 +263,7 @@ void parseNode(
             ONNXTRT_CHECK_NODE(legalUINT8, "TensorRT does not support UINT8 types for intermediate tensors!", node,
                 nodeIdx, ErrorCode::kUNSUPPORTED_NODE);
         }
+        trtCnt++;
     }
     LOG_VERBOSE(ssOutputs.str());
 }