parser 10.2 updates (#977)

Signed-off-by: Yuan Yao (yuanyao) <[email protected]>
onnx · Jul 10, 2024 · 706f02e · 706f02e
1 parent 96e7811
commit 706f02e
Show file tree

Hide file tree

Showing 12 changed files with 141 additions and 45 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -28,7 +28,7 @@ add_definitions("-DSOURCE_LENGTH=${SOURCE_LENGTH}")
 # Version information
 #--------------------------------------------------
 set(ONNX2TRT_MAJOR 10)
-set(ONNX2TRT_MINOR 1)
+set(ONNX2TRT_MINOR 2)
 set(ONNX2TRT_PATCH 0)
 set(ONNX2TRT_VERSION "${ONNX2TRT_MAJOR}.${ONNX2TRT_MINOR}.${ONNX2TRT_PATCH}" CACHE STRING "ONNX2TRT version")
 
@@ -56,6 +56,7 @@ set(IMPORTER_SOURCES
   weightUtils.cpp
   WeightsContext.cpp
   TensorOrWeights.cpp
+  errorHelpers.cpp
 )
 
 if (BUILD_ONNXIFI)

diff --git a/NvOnnxParser.h b/NvOnnxParser.h
@@ -175,6 +175,8 @@ class IParserError
 //!
 //! \brief an object for parsing ONNX models into a TensorRT network definition
 //!
+//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+//!
 class IParser
 {
 public:
@@ -406,6 +408,8 @@ class IParser
 //!
 //! \brief An interface designed to refit weights from an ONNX model.
 //!
+//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
+//!
 class IParserRefitter
 {
 public:

diff --git a/README.md b/README.md
@@ -16,7 +16,7 @@ For press and other inquiries, please contact Hector Marinez at hmarinez@nvidia.
 
 ## Supported TensorRT Versions
 
-Development on the `main` branch is for the latest version of [TensorRT 10.0](https://developer.nvidia.com/nvidia-tensorrt-download) with full-dimensions and dynamic shape support.
+Development on the this branch is for the latest version of [TensorRT 10.2](https://developer.nvidia.com/nvidia-tensorrt-download) with full-dimensions and dynamic shape support.
 
 For previous versions of TensorRT, refer to their respective branches.
 
@@ -29,8 +29,8 @@ Current supported ONNX operators are found in the [operator support matrix](docs
 ### Dependencies
 
  - [Protobuf >= 3.0.x](https://github.com/google/protobuf/releases)
- - [TensorRT 10.1](https://developer.nvidia.com/tensorrt)
- - [TensorRT 10.1 open source libaries] (https://github.com/NVIDIA/TensorRT/)
+ - [TensorRT 10.2](https://developer.nvidia.com/tensorrt)
+ - [TensorRT 10.2 open source libaries] (https://github.com/NVIDIA/TensorRT/)
 
 ### Building
 

diff --git a/ShapeTensor.cpp b/ShapeTensor.cpp
@@ -443,6 +443,21 @@ ShapeTensor convertTo1D(ImporterContext* ctx, const ShapeTensor& tensor)
     return ShapeTensor(*N_CHECK(addShuffle(ctx, tensor.tensor(ctx), shapeVector(1))->getOutput(0)));
 }
 
+ShapeTensor convertTo0D(ImporterContext* ctx, const ShapeTensor& tensor)
+{
+    if (tensor.size() != 1)
+    {
+        throw std::runtime_error("Cannot convert a tensor with size > 1 to a scalar!");
+    }
+    if (tensor.valueKnown(0))
+    {
+        return shapeScalar(tensor[0]);
+    }
+    auto* layer = N_CHECK(ctx->network()->addShuffle(tensor.tensor(ctx)));
+    layer->setReshapeDimensions(nvinfer1::Dims{0});
+    return ShapeTensor(*N_CHECK(layer->getOutput(0)));
+}
+
 //! If all values of x are known, return Dims with those values,
 //! but throw exception if any value is outside specified bounds.
 //! Otherwise return Dims with zeros.

diff --git a/ShapeTensor.hpp b/ShapeTensor.hpp
@@ -231,6 +231,9 @@ ShapeTensor shapeOf(TensorOrWeights& t);
 //! Reshape 0D tensor to 1D tensor.
 ShapeTensor convertTo1D(ImporterContext* ctx, const ShapeTensor& tensor);
 
+//! Reshape single value 1D tensor to a 0D tensor.
+ShapeTensor convertTo0D(ImporterContext* ctx, const ShapeTensor& tensor);
+
 //! Add an ISliceLayer.
 nvinfer1::ISliceLayer* addSlice(ImporterContext* ctx, nvinfer1::ITensor& data, const ShapeTensor& starts,
     const ShapeTensor& sizes, const ShapeTensor& strides);

diff --git a/Status.hpp b/Status.hpp
@@ -203,7 +203,6 @@ static std::ostream& operator<<(std::ostream& stream, nvinfer1::DataType const&
     case nvinfer1::DataType::kBOOL: return stream << "bool";
     case nvinfer1::DataType::kFP8: return stream << "float8";
     case nvinfer1::DataType::kINT4: return stream << "int4";
-
     default: throw std::runtime_error("Unknown dtype");
     }
 }

diff --git a/docs/Changelog.md b/docs/Changelog.md
@@ -2,8 +2,14 @@
 
 # ONNX-TensorRT Changelog
 
+# TensorRT 10.2 GA Release - 2024-7-10
+For more details, see the 10.2 GA release notes.
+
+- Improved error handling with new macros and classes
+- Minor changes to op importers for `GRU` and `Squeeze`
+
 # TensorRT 10.1 GA Release - 2024-6-17
-For more details, see the 10.0 GA release notes.
+For more details, see the 10.1 GA release notes.
 
 - Added `supportsModelV2` API
 - Added support for `DeformConv` operation

diff --git a/errorHelpers.hpp b/errorHelpers.hpp
@@ -5,35 +5,107 @@
 
 #include "Status.hpp"
 #include <NvInferRuntime.h>
+#include <exception>
 #include <sstream>
 #include <stdexcept>
 
 #define ONNXTRT_TRY try
 
 #define ONNXTRT_CATCH_RECORD                                                                                           \
+    catch (OnnxTrtException & e)                                                                                       \
+    {                                                                                                                  \
+        Status status = e.getStatus();                                                                                 \
+        mImporterCtx.getErrorRecorder()->reportError(errorCodeToTrtCode(status.code()), e.what());                     \
+        mErrors.push_back(status);                                                                                     \
+    }                                                                                                                  \
     catch (std::exception & e)                                                                                         \
     {                                                                                                                  \
-        mImporterCtx.getErrorRecorder()->reportError(nvinfer1::ErrorCode::kINTERNAL_ERROR, e.what());                  \
+        mImporterCtx.getErrorRecorder()->reportError(nvinfer1::ErrorCode::kUNSPECIFIED_ERROR, e.what());               \
         mErrors.push_back(Status{ErrorCode::kINTERNAL_ERROR, e.what()});                                               \
     }
 
 #define ONNXTRT_CATCH_LOG(logger)                                                                                      \
+    catch (OnnxTrtException & e)                                                                                       \
+    {                                                                                                                  \
+        Status status = e.getStatus();                                                                                 \
+        (logger)->log(nvinfer1::ILogger::Severity::kINTERNAL_ERROR, e.what());                                         \
+        mErrors.push_back(status);                                                                                     \
+    }                                                                                                                  \
     catch (std::exception & e)                                                                                         \
     {                                                                                                                  \
         (logger)->log(nvinfer1::ILogger::Severity::kINTERNAL_ERROR, e.what());                                         \
+        mErrors.push_back(Status{ErrorCode::kINTERNAL_ERROR, e.what()});                                               \
+    }
+
+#define ONNXTRT_THROW(status) throw OnnxTrtException(status)
+
+#define ONNXTRT_CHECK(cond, status)                                                                                    \
+    if (!(cond))                                                                                                       \
+    {                                                                                                                  \
+        std::ostringstream ss;                                                                                         \
+        ss << "Assertion failed: " << #cond;                                                                           \
+        ONNXTRT_THROW(status);                                                                                         \
+    }
+
+#define ONNXTRT_CHECK_NODE(cond, desc, node, nodeIdx, code)                                                            \
+    if (!(cond))                                                                                                       \
+    {                                                                                                                  \
+        ONNXTRT_THROW(MAKE_NODE_ERROR((ss.str()), (code), (node), (nodeIdx)));                                         \
     }
 
 namespace onnx2trt
 {
-inline void ONNXTRT_CHECK(bool cond, Status status)
+inline char const* errorCodeStr(ErrorCode code)
 {
-    if (!cond)
+    switch (code)
     {
-        std::ostringstream os;
-        os << "[" << status.file() << ":" << status.func() << ":" << status.line() << "] ";
-        os << "Error Code " << static_cast<int32_t>(status.code()) << ": " << status.desc();
+    case ErrorCode::kSUCCESS: return "SUCCESS";
+    case ErrorCode::kINTERNAL_ERROR: return "INTERNAL_ERROR";
+    case ErrorCode::kMEM_ALLOC_FAILED: return "MEM_ALLOC_FAILED";
+    case ErrorCode::kMODEL_DESERIALIZE_FAILED: return "MODEL_DESERIALIZE_FAILED";
+    case ErrorCode::kINVALID_VALUE: return "INVALID_VALUE";
+    case ErrorCode::kINVALID_GRAPH: return "INVALID_GRAPH";
+    case ErrorCode::kINVALID_NODE: return "INVALID_NODE";
+    case ErrorCode::kUNSUPPORTED_GRAPH: return "UNSUPPORTED_GRAPH";
+    case ErrorCode::kUNSUPPORTED_NODE: return "UNSUPPORTED_NODE";
+    case ErrorCode::kUNSUPPORTED_NODE_ATTR: return "UNSUPPORTED_NODE_ATTR";
+    case ErrorCode::kUNSUPPORTED_NODE_INPUT: return "UNSUPPORTED_NODE_INPUT";
+    case ErrorCode::kUNSUPPORTED_NODE_DATATYPE: return "UNSUPPORTED_NODE_DATATYPE";
+    case ErrorCode::kUNSUPPORTED_NODE_DYNAMIC: return "UNSUPPORTED_NODE_DYNAMIC";
+    case ErrorCode::kUNSUPPORTED_NODE_SHAPE: return "UNSUPPORTED_NODE_SHAPE";
+    case ErrorCode::kREFIT_FAILED: return "REFIT_FAILED";
+    }
+    return "UNKNOWN";
+};
 
-        throw std::runtime_error(os.str());
+inline std::string const parserErrorStr(nvonnxparser::IParserError const* error)
+{
+    std::string const nodeInfo = "In node " + std::to_string(error->node()) + " with name: " + error->nodeName()
+        + " and operator: " + error->nodeOperator() + " ";
+    std::string const errorInfo
+        = std::string("(") + error->func() + "): " + errorCodeStr(error->code()) + ": " + error->desc();
+    if (error->code() == ErrorCode::kMODEL_DESERIALIZE_FAILED || error->code() == ErrorCode::kREFIT_FAILED)
+    {
+        return errorInfo.c_str();
     }
+    return (nodeInfo + errorInfo).c_str();
 }
+
+nvinfer1::ErrorCode errorCodeToTrtCode(ErrorCode const code);
+
+class OnnxTrtException : public std::exception
+{
+    Status mStatus;
+    mutable std::string mMessage;
+
+public:
+    OnnxTrtException(Status status);
+
+    Status getStatus() const noexcept;
+
+    virtual char const* what() const noexcept override;
+
+    virtual ~OnnxTrtException() {}
+};
+
 } // namespace onnx2trt
diff --git a/importerUtils.cpp b/importerUtils.cpp
@@ -133,10 +133,10 @@ NodeImportResult argMinMaxHelper(ImporterContext* ctx, const ::ONNX_NAMESPACE::N
         auto const dimOnAxis = gather(ctx, dims, axisTensor);
 
         // Create constant of shape indicesDims with values tensor.shape[axis]
-        auto const tensorDimOnAxis = constantOfShape(ctx, node, &dimOnAxis.tensor(ctx), &indicesDims.tensor(ctx));
+        auto const tensorDimOnAxis = constantOfShape(ctx, &dimOnAxis.tensor(ctx), &indicesDims.tensor(ctx));
 
         // Create constant of shape indicesDims with values of 1
-        auto const ones = constantOfShape(ctx, node, &shapeVector(1).tensor(ctx), &indicesDims.tensor(ctx));
+        auto const ones = constantOfShape(ctx, &shapeVector(1).tensor(ctx), &indicesDims.tensor(ctx));
 
         std::vector<TensorOrWeights> newInputs{tensorDimOnAxis, indices, ones};
         std::vector<TensorOrWeights> indicesUpdate;
@@ -285,8 +285,7 @@ nvinfer1::ITensor* castHelper(ImporterContext* ctx, nvinfer1::ITensor* input, nv
     return N_CHECK(cast->getOutput(0));
 }
 
-nvinfer1::ITensor* constantOfShape(ImporterContext* ctx, const ::ONNX_NAMESPACE::NodeProto& node,
-    nvinfer1::ITensor* constant, nvinfer1::ITensor* shape)
+nvinfer1::ITensor* constantOfShape(ImporterContext* ctx, nvinfer1::ITensor* constant, nvinfer1::ITensor* shape)
 {
     ShapeTensor shapeT{*shape};
     ShapeTensor zeros = similar(ctx, shapeT, 0);
@@ -398,17 +397,10 @@ onnx2trt::ShapedWeights createZeroShifts(onnx2trt::ShapedWeights const& shiftInt
 
 nvinfer1::ITensor* createZeroTensor(ImporterContext* ctx, nvinfer1::ITensor* data)
 {
-    auto* zeroLayer = N_CHECK(addConstant(ctx, std::vector<float>{0.f}, ::ONNX_NAMESPACE::TensorProto::FLOAT, {0, {1}}));
-    auto* zeroTensor = N_CHECK(zeroLayer->getOutput(0));
-    zeroTensor = castHelper(ctx, zeroTensor, data->getType());
-    auto result = broadcastTensors(ctx, zeroTensor, data);
-    if (result.is_error())
-    {
-        return nullptr;
-    }
-    auto* zeroBroadcastLayer = N_CHECK(ctx->network()->addElementWise(*data, *zeroTensor, nvinfer1::ElementWiseOperation::kPROD));
-    ctx->registerLayer(zeroBroadcastLayer, "ONNXTRT_createZeroTensor", nullptr);
-    return N_CHECK(zeroBroadcastLayer->getOutput(0));
+    auto shape = shapeOf(*data);
+    auto* zeros = N_CHECK(addConstantScalar(ctx, 0.0F, ::ONNX_NAMESPACE::TensorProto::FLOAT)->getOutput(0));
+    zeros = castHelper(ctx, zeros, data->getType());
+    return constantOfShape(ctx, zeros, &shape.tensor(ctx));
 }
 
 nvinfer1::ITensor* convertToScalar(ImporterContext* ctx, nvinfer1::ITensor* inpTensor)
@@ -1157,13 +1149,13 @@ NodeImportResult modulatedDeformableConvPluginHelper(ImporterContext* ctx, ::ONN
         {
             static_cast<half_float::half*>(defaultMaskWeights.values)[0] = 1.0;
             auto maskTensor = TensorOrWeights{defaultMaskWeights};
-            maskPtr = constantOfShape(ctx, node, &convertToTensor(maskTensor, ctx), &maskShape);
+            maskPtr = constantOfShape(ctx, &convertToTensor(maskTensor, ctx), &maskShape);
         }
         else
         {
             static_cast<float*>(defaultMaskWeights.values)[0] = 1.F;
             auto maskTensor = TensorOrWeights{defaultMaskWeights};
-            maskPtr = constantOfShape(ctx, node, &convertToTensor(maskTensor, ctx), &maskShape);
+            maskPtr = constantOfShape(ctx, &convertToTensor(maskTensor, ctx), &maskShape);
         }
     }
 
@@ -1224,7 +1216,7 @@ NodeImportResult instanceNormPluginHelper(ImporterContext* ctx, ::ONNX_NAMESPACE
 
     // Populate instanceNormalization plugin properties.
     std::string const pluginName = "InstanceNormalization_TRT";
-    std::string const pluginVersion = "1";
+    std::string const pluginVersion = "3";
     std::vector<nvinfer1::PluginField> f;
 
     // get the values of constant inputs and cast them to float32
@@ -1239,12 +1231,12 @@ NodeImportResult instanceNormPluginHelper(ImporterContext* ctx, ::ONNX_NAMESPACE
 
     // Create plugin from registry
     auto const plugin = createPlugin(getNodeName(node),
-        static_cast<nvinfer1::IPluginCreator*>(importPluginCreator(ctx, pluginName, pluginVersion)), f);
+        static_cast<nvinfer1::IPluginCreatorV3One*>(importPluginCreator(ctx, pluginName, pluginVersion)), f);
 
     ASSERT_NODE(plugin != nullptr, "InstanceNormalization plugin was not found in the plugin registry!", node, nodeIdx,
         ErrorCode::kUNSUPPORTED_NODE);
 
-    auto* layer = N_CHECK(ctx->network()->addPluginV2(&tensorPtr, 1, *plugin));
+    auto* layer = N_CHECK(ctx->network()->addPluginV3(&tensorPtr, 1, nullptr, 0, *plugin));
     ctx->registerLayer(layer, node);
     tensorPtr = N_CHECK(layer->getOutput(0));
 

diff --git a/importerUtils.hpp b/importerUtils.hpp
@@ -127,8 +127,7 @@ std::vector<int32_t> calculatePitches(nvinfer1::Dims const& inputDims);
 nvinfer1::ITensor* castHelper(ImporterContext* ctx, nvinfer1::ITensor* input, nvinfer1::DataType dtype);
 
 // Helper function for constantOfShape operator. Input shape must be a shape tensor
-nvinfer1::ITensor* constantOfShape(ImporterContext* ctx, const ::ONNX_NAMESPACE::NodeProto& node,
-    nvinfer1::ITensor* constant, nvinfer1::ITensor* shape);
+nvinfer1::ITensor* constantOfShape(ImporterContext* ctx, nvinfer1::ITensor* constant, nvinfer1::ITensor* shape);
 
 // Helper function to convert an ONNX axis into a TRT axis
 Status convertAxis(int32_t& axis, int32_t const nbDims, ::ONNX_NAMESPACE::NodeProto const& node, size_t const nodeIdx);

diff --git a/onnxOpImporters.cpp b/onnxOpImporters.cpp
@@ -731,7 +731,7 @@ DEFINE_BUILTIN_OP_IMPORTER(ConstantOfShape)
     static_cast<float*>(zeroWeights.values)[0] = 0.f;
     auto valueWeights = TensorOrWeights{attrs.get("value", zeroWeights)};
     nvinfer1::ITensor* value = &convertToTensor(valueWeights, ctx);
-    return {{constantOfShape(ctx, node, value, shape)}};
+    return {{constantOfShape(ctx, value, shape)}};
 }
 
 DEFINE_BUILTIN_OP_IMPORTER(Conv)
@@ -2374,7 +2374,7 @@ DEFINE_BUILTIN_OP_IMPORTER(GRU)
         {
             return &convertToTensor(inputs.at(inputIdx), ctx);
         }
-        return constantOfShape(ctx, node,
+        return constantOfShape(ctx,
             addConstantScalar(ctx, 0.f, ::ONNX_NAMESPACE::TensorProto_DataType_FLOAT, Dims{1, {1}})->getOutput(0),
             gateOutputShape);
     };
@@ -2491,12 +2491,12 @@ DEFINE_BUILTIN_OP_IMPORTER(GRU)
     LOG_VERBOSE("h(t) -> " << ht->getDimensions());
 
     // H(t) = (1 - z(t)) . h(t) + (z(t) . H(t-1))
+    // Constant `1` needs to be the same type as the inputs, either FP16 or FP32.
+    auto onnxType = zt->getType() == nvinfer1::DataType::kHALF ? ::ONNX_NAMESPACE::TensorProto::FLOAT16
+                                                               : ::ONNX_NAMESPACE::TensorProto::FLOAT;
+    auto* constOne = N_CHECK(addConstantScalar(ctx, 1.f, onnxType, Dims3{1, 1, 1})->getOutput(0));
     nvinfer1::ITensor* Ht = getElementWiseResult(ctx,
-        *getElementWiseResult(ctx,
-            *getElementWiseResult(ctx,
-                *addConstantScalar(ctx, 1.f, ::ONNX_NAMESPACE::TensorProto::FLOAT, Dims3{1, 1, 1})->getOutput(0), *zt,
-                eOp::kSUB),
-            *ht, eOp::kPROD),
+        *getElementWiseResult(ctx, *getElementWiseResult(ctx, *constOne, *zt, eOp::kSUB), *ht, eOp::kPROD),
         *getElementWiseResult(ctx, *zt, *Ht1Output, eOp::kPROD), eOp::kSUM);
 
     // singlePassShape = (1, batchSize, hiddenSize)
@@ -3051,7 +3051,7 @@ DEFINE_BUILTIN_OP_IMPORTER(LSTM)
         {
             return &convertToTensor(inputs.at(inputIdx), ctx);
         }
-        return constantOfShape(ctx, node,
+        return constantOfShape(ctx,
             addConstantScalar(ctx, 0.f, ::ONNX_NAMESPACE::TensorProto_DataType_FLOAT, nvinfer1::Dims{1, {1}})
                 ->getOutput(0),
             gateOutputShape);
@@ -4217,6 +4217,11 @@ DEFINE_BUILTIN_OP_IMPORTER(Range)
         delta = ShapeTensor{*input2};
     }
 
+    // In reality, although the ONNX spec requires scalars the inputs may be a vector of rank 1. Squeeze here if necessary.
+    start = start.rank() == 1 ? convertTo0D(ctx, start) : start;
+    limit = limit.rank() == 1 ? convertTo0D(ctx, limit) : limit;
+    delta = delta.rank() == 1 ? convertTo0D(ctx, delta) : delta;
+
     // "number_of_elements = max( ceil( (limit - start) / delta ) , 0 )"
     //
     // To implement this in TensorRT using only operations allowed on
@@ -4804,7 +4809,7 @@ DEFINE_BUILTIN_OP_IMPORTER(RNN)
         {
             return &convertToTensor(inputs.at(inputIdx), ctx);
         }
-        return constantOfShape(ctx, node,
+        return constantOfShape(ctx,
             N_CHECK(addConstantScalar(ctx, 0.f, ::ONNX_NAMESPACE::TensorProto_DataType_FLOAT, nvinfer1::Dims{1, {1}})
                 ->getOutput(0)),
             initialStateShape());

diff --git a/onnx_tensorrt/__init__.py b/onnx_tensorrt/__init__.py
@@ -4,4 +4,4 @@
 
 from . import backend
 
-__version__ = "10.1.0"
+__version__ = "10.2.0"