From 706f02e74366b2cbaacf87be61de95df051a2788 Mon Sep 17 00:00:00 2001 From: Yuan Yao <99693700+yuanyao-nv@users.noreply.github.com> Date: Wed, 10 Jul 2024 11:35:09 -0700 Subject: [PATCH] parser 10.2 updates (#977) Signed-off-by: Yuan Yao (yuanyao) --- CMakeLists.txt | 3 +- NvOnnxParser.h | 4 ++ README.md | 6 +-- ShapeTensor.cpp | 15 +++++++ ShapeTensor.hpp | 3 ++ Status.hpp | 1 - docs/Changelog.md | 8 +++- errorHelpers.hpp | 86 +++++++++++++++++++++++++++++++++++---- importerUtils.cpp | 32 ++++++--------- importerUtils.hpp | 3 +- onnxOpImporters.cpp | 23 +++++++---- onnx_tensorrt/__init__.py | 2 +- 12 files changed, 141 insertions(+), 45 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b761b4b..d64380d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -28,7 +28,7 @@ add_definitions("-DSOURCE_LENGTH=${SOURCE_LENGTH}") # Version information #-------------------------------------------------- set(ONNX2TRT_MAJOR 10) -set(ONNX2TRT_MINOR 1) +set(ONNX2TRT_MINOR 2) set(ONNX2TRT_PATCH 0) set(ONNX2TRT_VERSION "${ONNX2TRT_MAJOR}.${ONNX2TRT_MINOR}.${ONNX2TRT_PATCH}" CACHE STRING "ONNX2TRT version") @@ -56,6 +56,7 @@ set(IMPORTER_SOURCES weightUtils.cpp WeightsContext.cpp TensorOrWeights.cpp + errorHelpers.cpp ) if (BUILD_ONNXIFI) diff --git a/NvOnnxParser.h b/NvOnnxParser.h index dcae29f..eb6b2f9 100644 --- a/NvOnnxParser.h +++ b/NvOnnxParser.h @@ -175,6 +175,8 @@ class IParserError //! //! \brief an object for parsing ONNX models into a TensorRT network definition //! +//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI. +//! class IParser { public: @@ -406,6 +408,8 @@ class IParser //! //! \brief An interface designed to refit weights from an ONNX model. //! +//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI. +//! class IParserRefitter { public: diff --git a/README.md b/README.md index c989cb6..6962df2 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ For press and other inquiries, please contact Hector Marinez at hmarinez@nvidia. ## Supported TensorRT Versions -Development on the `main` branch is for the latest version of [TensorRT 10.0](https://developer.nvidia.com/nvidia-tensorrt-download) with full-dimensions and dynamic shape support. +Development on the this branch is for the latest version of [TensorRT 10.2](https://developer.nvidia.com/nvidia-tensorrt-download) with full-dimensions and dynamic shape support. For previous versions of TensorRT, refer to their respective branches. @@ -29,8 +29,8 @@ Current supported ONNX operators are found in the [operator support matrix](docs ### Dependencies - [Protobuf >= 3.0.x](https://github.com/google/protobuf/releases) - - [TensorRT 10.1](https://developer.nvidia.com/tensorrt) - - [TensorRT 10.1 open source libaries] (https://github.com/NVIDIA/TensorRT/) + - [TensorRT 10.2](https://developer.nvidia.com/tensorrt) + - [TensorRT 10.2 open source libaries] (https://github.com/NVIDIA/TensorRT/) ### Building diff --git a/ShapeTensor.cpp b/ShapeTensor.cpp index 8cb6b95..a9c8af8 100644 --- a/ShapeTensor.cpp +++ b/ShapeTensor.cpp @@ -443,6 +443,21 @@ ShapeTensor convertTo1D(ImporterContext* ctx, const ShapeTensor& tensor) return ShapeTensor(*N_CHECK(addShuffle(ctx, tensor.tensor(ctx), shapeVector(1))->getOutput(0))); } +ShapeTensor convertTo0D(ImporterContext* ctx, const ShapeTensor& tensor) +{ + if (tensor.size() != 1) + { + throw std::runtime_error("Cannot convert a tensor with size > 1 to a scalar!"); + } + if (tensor.valueKnown(0)) + { + return shapeScalar(tensor[0]); + } + auto* layer = N_CHECK(ctx->network()->addShuffle(tensor.tensor(ctx))); + layer->setReshapeDimensions(nvinfer1::Dims{0}); + return ShapeTensor(*N_CHECK(layer->getOutput(0))); +} + //! If all values of x are known, return Dims with those values, //! but throw exception if any value is outside specified bounds. //! Otherwise return Dims with zeros. diff --git a/ShapeTensor.hpp b/ShapeTensor.hpp index ce2048a..df5b0b9 100644 --- a/ShapeTensor.hpp +++ b/ShapeTensor.hpp @@ -231,6 +231,9 @@ ShapeTensor shapeOf(TensorOrWeights& t); //! Reshape 0D tensor to 1D tensor. ShapeTensor convertTo1D(ImporterContext* ctx, const ShapeTensor& tensor); +//! Reshape single value 1D tensor to a 0D tensor. +ShapeTensor convertTo0D(ImporterContext* ctx, const ShapeTensor& tensor); + //! Add an ISliceLayer. nvinfer1::ISliceLayer* addSlice(ImporterContext* ctx, nvinfer1::ITensor& data, const ShapeTensor& starts, const ShapeTensor& sizes, const ShapeTensor& strides); diff --git a/Status.hpp b/Status.hpp index 2af35a0..98c0909 100644 --- a/Status.hpp +++ b/Status.hpp @@ -203,7 +203,6 @@ static std::ostream& operator<<(std::ostream& stream, nvinfer1::DataType const& case nvinfer1::DataType::kBOOL: return stream << "bool"; case nvinfer1::DataType::kFP8: return stream << "float8"; case nvinfer1::DataType::kINT4: return stream << "int4"; - default: throw std::runtime_error("Unknown dtype"); } } diff --git a/docs/Changelog.md b/docs/Changelog.md index 6dec4cc..dc7765c 100644 --- a/docs/Changelog.md +++ b/docs/Changelog.md @@ -2,8 +2,14 @@ # ONNX-TensorRT Changelog +# TensorRT 10.2 GA Release - 2024-7-10 +For more details, see the 10.2 GA release notes. + +- Improved error handling with new macros and classes +- Minor changes to op importers for `GRU` and `Squeeze` + # TensorRT 10.1 GA Release - 2024-6-17 -For more details, see the 10.0 GA release notes. +For more details, see the 10.1 GA release notes. - Added `supportsModelV2` API - Added support for `DeformConv` operation diff --git a/errorHelpers.hpp b/errorHelpers.hpp index 3ef2cf4..031a9f2 100644 --- a/errorHelpers.hpp +++ b/errorHelpers.hpp @@ -5,35 +5,107 @@ #include "Status.hpp" #include +#include #include #include #define ONNXTRT_TRY try #define ONNXTRT_CATCH_RECORD \ + catch (OnnxTrtException & e) \ + { \ + Status status = e.getStatus(); \ + mImporterCtx.getErrorRecorder()->reportError(errorCodeToTrtCode(status.code()), e.what()); \ + mErrors.push_back(status); \ + } \ catch (std::exception & e) \ { \ - mImporterCtx.getErrorRecorder()->reportError(nvinfer1::ErrorCode::kINTERNAL_ERROR, e.what()); \ + mImporterCtx.getErrorRecorder()->reportError(nvinfer1::ErrorCode::kUNSPECIFIED_ERROR, e.what()); \ mErrors.push_back(Status{ErrorCode::kINTERNAL_ERROR, e.what()}); \ } #define ONNXTRT_CATCH_LOG(logger) \ + catch (OnnxTrtException & e) \ + { \ + Status status = e.getStatus(); \ + (logger)->log(nvinfer1::ILogger::Severity::kINTERNAL_ERROR, e.what()); \ + mErrors.push_back(status); \ + } \ catch (std::exception & e) \ { \ (logger)->log(nvinfer1::ILogger::Severity::kINTERNAL_ERROR, e.what()); \ + mErrors.push_back(Status{ErrorCode::kINTERNAL_ERROR, e.what()}); \ + } + +#define ONNXTRT_THROW(status) throw OnnxTrtException(status) + +#define ONNXTRT_CHECK(cond, status) \ + if (!(cond)) \ + { \ + std::ostringstream ss; \ + ss << "Assertion failed: " << #cond; \ + ONNXTRT_THROW(status); \ + } + +#define ONNXTRT_CHECK_NODE(cond, desc, node, nodeIdx, code) \ + if (!(cond)) \ + { \ + ONNXTRT_THROW(MAKE_NODE_ERROR((ss.str()), (code), (node), (nodeIdx))); \ } namespace onnx2trt { -inline void ONNXTRT_CHECK(bool cond, Status status) +inline char const* errorCodeStr(ErrorCode code) { - if (!cond) + switch (code) { - std::ostringstream os; - os << "[" << status.file() << ":" << status.func() << ":" << status.line() << "] "; - os << "Error Code " << static_cast(status.code()) << ": " << status.desc(); + case ErrorCode::kSUCCESS: return "SUCCESS"; + case ErrorCode::kINTERNAL_ERROR: return "INTERNAL_ERROR"; + case ErrorCode::kMEM_ALLOC_FAILED: return "MEM_ALLOC_FAILED"; + case ErrorCode::kMODEL_DESERIALIZE_FAILED: return "MODEL_DESERIALIZE_FAILED"; + case ErrorCode::kINVALID_VALUE: return "INVALID_VALUE"; + case ErrorCode::kINVALID_GRAPH: return "INVALID_GRAPH"; + case ErrorCode::kINVALID_NODE: return "INVALID_NODE"; + case ErrorCode::kUNSUPPORTED_GRAPH: return "UNSUPPORTED_GRAPH"; + case ErrorCode::kUNSUPPORTED_NODE: return "UNSUPPORTED_NODE"; + case ErrorCode::kUNSUPPORTED_NODE_ATTR: return "UNSUPPORTED_NODE_ATTR"; + case ErrorCode::kUNSUPPORTED_NODE_INPUT: return "UNSUPPORTED_NODE_INPUT"; + case ErrorCode::kUNSUPPORTED_NODE_DATATYPE: return "UNSUPPORTED_NODE_DATATYPE"; + case ErrorCode::kUNSUPPORTED_NODE_DYNAMIC: return "UNSUPPORTED_NODE_DYNAMIC"; + case ErrorCode::kUNSUPPORTED_NODE_SHAPE: return "UNSUPPORTED_NODE_SHAPE"; + case ErrorCode::kREFIT_FAILED: return "REFIT_FAILED"; + } + return "UNKNOWN"; +}; - throw std::runtime_error(os.str()); +inline std::string const parserErrorStr(nvonnxparser::IParserError const* error) +{ + std::string const nodeInfo = "In node " + std::to_string(error->node()) + " with name: " + error->nodeName() + + " and operator: " + error->nodeOperator() + " "; + std::string const errorInfo + = std::string("(") + error->func() + "): " + errorCodeStr(error->code()) + ": " + error->desc(); + if (error->code() == ErrorCode::kMODEL_DESERIALIZE_FAILED || error->code() == ErrorCode::kREFIT_FAILED) + { + return errorInfo.c_str(); } + return (nodeInfo + errorInfo).c_str(); } + +nvinfer1::ErrorCode errorCodeToTrtCode(ErrorCode const code); + +class OnnxTrtException : public std::exception +{ + Status mStatus; + mutable std::string mMessage; + +public: + OnnxTrtException(Status status); + + Status getStatus() const noexcept; + + virtual char const* what() const noexcept override; + + virtual ~OnnxTrtException() {} +}; + } // namespace onnx2trt diff --git a/importerUtils.cpp b/importerUtils.cpp index 322db31..c130889 100644 --- a/importerUtils.cpp +++ b/importerUtils.cpp @@ -133,10 +133,10 @@ NodeImportResult argMinMaxHelper(ImporterContext* ctx, const ::ONNX_NAMESPACE::N auto const dimOnAxis = gather(ctx, dims, axisTensor); // Create constant of shape indicesDims with values tensor.shape[axis] - auto const tensorDimOnAxis = constantOfShape(ctx, node, &dimOnAxis.tensor(ctx), &indicesDims.tensor(ctx)); + auto const tensorDimOnAxis = constantOfShape(ctx, &dimOnAxis.tensor(ctx), &indicesDims.tensor(ctx)); // Create constant of shape indicesDims with values of 1 - auto const ones = constantOfShape(ctx, node, &shapeVector(1).tensor(ctx), &indicesDims.tensor(ctx)); + auto const ones = constantOfShape(ctx, &shapeVector(1).tensor(ctx), &indicesDims.tensor(ctx)); std::vector newInputs{tensorDimOnAxis, indices, ones}; std::vector indicesUpdate; @@ -285,8 +285,7 @@ nvinfer1::ITensor* castHelper(ImporterContext* ctx, nvinfer1::ITensor* input, nv return N_CHECK(cast->getOutput(0)); } -nvinfer1::ITensor* constantOfShape(ImporterContext* ctx, const ::ONNX_NAMESPACE::NodeProto& node, - nvinfer1::ITensor* constant, nvinfer1::ITensor* shape) +nvinfer1::ITensor* constantOfShape(ImporterContext* ctx, nvinfer1::ITensor* constant, nvinfer1::ITensor* shape) { ShapeTensor shapeT{*shape}; ShapeTensor zeros = similar(ctx, shapeT, 0); @@ -398,17 +397,10 @@ onnx2trt::ShapedWeights createZeroShifts(onnx2trt::ShapedWeights const& shiftInt nvinfer1::ITensor* createZeroTensor(ImporterContext* ctx, nvinfer1::ITensor* data) { - auto* zeroLayer = N_CHECK(addConstant(ctx, std::vector{0.f}, ::ONNX_NAMESPACE::TensorProto::FLOAT, {0, {1}})); - auto* zeroTensor = N_CHECK(zeroLayer->getOutput(0)); - zeroTensor = castHelper(ctx, zeroTensor, data->getType()); - auto result = broadcastTensors(ctx, zeroTensor, data); - if (result.is_error()) - { - return nullptr; - } - auto* zeroBroadcastLayer = N_CHECK(ctx->network()->addElementWise(*data, *zeroTensor, nvinfer1::ElementWiseOperation::kPROD)); - ctx->registerLayer(zeroBroadcastLayer, "ONNXTRT_createZeroTensor", nullptr); - return N_CHECK(zeroBroadcastLayer->getOutput(0)); + auto shape = shapeOf(*data); + auto* zeros = N_CHECK(addConstantScalar(ctx, 0.0F, ::ONNX_NAMESPACE::TensorProto::FLOAT)->getOutput(0)); + zeros = castHelper(ctx, zeros, data->getType()); + return constantOfShape(ctx, zeros, &shape.tensor(ctx)); } nvinfer1::ITensor* convertToScalar(ImporterContext* ctx, nvinfer1::ITensor* inpTensor) @@ -1157,13 +1149,13 @@ NodeImportResult modulatedDeformableConvPluginHelper(ImporterContext* ctx, ::ONN { static_cast(defaultMaskWeights.values)[0] = 1.0; auto maskTensor = TensorOrWeights{defaultMaskWeights}; - maskPtr = constantOfShape(ctx, node, &convertToTensor(maskTensor, ctx), &maskShape); + maskPtr = constantOfShape(ctx, &convertToTensor(maskTensor, ctx), &maskShape); } else { static_cast(defaultMaskWeights.values)[0] = 1.F; auto maskTensor = TensorOrWeights{defaultMaskWeights}; - maskPtr = constantOfShape(ctx, node, &convertToTensor(maskTensor, ctx), &maskShape); + maskPtr = constantOfShape(ctx, &convertToTensor(maskTensor, ctx), &maskShape); } } @@ -1224,7 +1216,7 @@ NodeImportResult instanceNormPluginHelper(ImporterContext* ctx, ::ONNX_NAMESPACE // Populate instanceNormalization plugin properties. std::string const pluginName = "InstanceNormalization_TRT"; - std::string const pluginVersion = "1"; + std::string const pluginVersion = "3"; std::vector f; // get the values of constant inputs and cast them to float32 @@ -1239,12 +1231,12 @@ NodeImportResult instanceNormPluginHelper(ImporterContext* ctx, ::ONNX_NAMESPACE // Create plugin from registry auto const plugin = createPlugin(getNodeName(node), - static_cast(importPluginCreator(ctx, pluginName, pluginVersion)), f); + static_cast(importPluginCreator(ctx, pluginName, pluginVersion)), f); ASSERT_NODE(plugin != nullptr, "InstanceNormalization plugin was not found in the plugin registry!", node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE); - auto* layer = N_CHECK(ctx->network()->addPluginV2(&tensorPtr, 1, *plugin)); + auto* layer = N_CHECK(ctx->network()->addPluginV3(&tensorPtr, 1, nullptr, 0, *plugin)); ctx->registerLayer(layer, node); tensorPtr = N_CHECK(layer->getOutput(0)); diff --git a/importerUtils.hpp b/importerUtils.hpp index 2b20f85..94699ac 100644 --- a/importerUtils.hpp +++ b/importerUtils.hpp @@ -127,8 +127,7 @@ std::vector calculatePitches(nvinfer1::Dims const& inputDims); nvinfer1::ITensor* castHelper(ImporterContext* ctx, nvinfer1::ITensor* input, nvinfer1::DataType dtype); // Helper function for constantOfShape operator. Input shape must be a shape tensor -nvinfer1::ITensor* constantOfShape(ImporterContext* ctx, const ::ONNX_NAMESPACE::NodeProto& node, - nvinfer1::ITensor* constant, nvinfer1::ITensor* shape); +nvinfer1::ITensor* constantOfShape(ImporterContext* ctx, nvinfer1::ITensor* constant, nvinfer1::ITensor* shape); // Helper function to convert an ONNX axis into a TRT axis Status convertAxis(int32_t& axis, int32_t const nbDims, ::ONNX_NAMESPACE::NodeProto const& node, size_t const nodeIdx); diff --git a/onnxOpImporters.cpp b/onnxOpImporters.cpp index 6b8a1dd..547f348 100644 --- a/onnxOpImporters.cpp +++ b/onnxOpImporters.cpp @@ -731,7 +731,7 @@ DEFINE_BUILTIN_OP_IMPORTER(ConstantOfShape) static_cast(zeroWeights.values)[0] = 0.f; auto valueWeights = TensorOrWeights{attrs.get("value", zeroWeights)}; nvinfer1::ITensor* value = &convertToTensor(valueWeights, ctx); - return {{constantOfShape(ctx, node, value, shape)}}; + return {{constantOfShape(ctx, value, shape)}}; } DEFINE_BUILTIN_OP_IMPORTER(Conv) @@ -2374,7 +2374,7 @@ DEFINE_BUILTIN_OP_IMPORTER(GRU) { return &convertToTensor(inputs.at(inputIdx), ctx); } - return constantOfShape(ctx, node, + return constantOfShape(ctx, addConstantScalar(ctx, 0.f, ::ONNX_NAMESPACE::TensorProto_DataType_FLOAT, Dims{1, {1}})->getOutput(0), gateOutputShape); }; @@ -2491,12 +2491,12 @@ DEFINE_BUILTIN_OP_IMPORTER(GRU) LOG_VERBOSE("h(t) -> " << ht->getDimensions()); // H(t) = (1 - z(t)) . h(t) + (z(t) . H(t-1)) + // Constant `1` needs to be the same type as the inputs, either FP16 or FP32. + auto onnxType = zt->getType() == nvinfer1::DataType::kHALF ? ::ONNX_NAMESPACE::TensorProto::FLOAT16 + : ::ONNX_NAMESPACE::TensorProto::FLOAT; + auto* constOne = N_CHECK(addConstantScalar(ctx, 1.f, onnxType, Dims3{1, 1, 1})->getOutput(0)); nvinfer1::ITensor* Ht = getElementWiseResult(ctx, - *getElementWiseResult(ctx, - *getElementWiseResult(ctx, - *addConstantScalar(ctx, 1.f, ::ONNX_NAMESPACE::TensorProto::FLOAT, Dims3{1, 1, 1})->getOutput(0), *zt, - eOp::kSUB), - *ht, eOp::kPROD), + *getElementWiseResult(ctx, *getElementWiseResult(ctx, *constOne, *zt, eOp::kSUB), *ht, eOp::kPROD), *getElementWiseResult(ctx, *zt, *Ht1Output, eOp::kPROD), eOp::kSUM); // singlePassShape = (1, batchSize, hiddenSize) @@ -3051,7 +3051,7 @@ DEFINE_BUILTIN_OP_IMPORTER(LSTM) { return &convertToTensor(inputs.at(inputIdx), ctx); } - return constantOfShape(ctx, node, + return constantOfShape(ctx, addConstantScalar(ctx, 0.f, ::ONNX_NAMESPACE::TensorProto_DataType_FLOAT, nvinfer1::Dims{1, {1}}) ->getOutput(0), gateOutputShape); @@ -4217,6 +4217,11 @@ DEFINE_BUILTIN_OP_IMPORTER(Range) delta = ShapeTensor{*input2}; } + // In reality, although the ONNX spec requires scalars the inputs may be a vector of rank 1. Squeeze here if necessary. + start = start.rank() == 1 ? convertTo0D(ctx, start) : start; + limit = limit.rank() == 1 ? convertTo0D(ctx, limit) : limit; + delta = delta.rank() == 1 ? convertTo0D(ctx, delta) : delta; + // "number_of_elements = max( ceil( (limit - start) / delta ) , 0 )" // // To implement this in TensorRT using only operations allowed on @@ -4804,7 +4809,7 @@ DEFINE_BUILTIN_OP_IMPORTER(RNN) { return &convertToTensor(inputs.at(inputIdx), ctx); } - return constantOfShape(ctx, node, + return constantOfShape(ctx, N_CHECK(addConstantScalar(ctx, 0.f, ::ONNX_NAMESPACE::TensorProto_DataType_FLOAT, nvinfer1::Dims{1, {1}}) ->getOutput(0)), initialStateShape()); diff --git a/onnx_tensorrt/__init__.py b/onnx_tensorrt/__init__.py index f1c90a1..0203c93 100644 --- a/onnx_tensorrt/__init__.py +++ b/onnx_tensorrt/__init__.py @@ -4,4 +4,4 @@ from . import backend -__version__ = "10.1.0" +__version__ = "10.2.0"