Skip to content

Commit

Permalink
parser 10.2 updates (#977)
Browse files Browse the repository at this point in the history
Signed-off-by: Yuan Yao (yuanyao) <[email protected]>
  • Loading branch information
yuanyao-nv authored Jul 10, 2024
1 parent 96e7811 commit 706f02e
Show file tree
Hide file tree
Showing 12 changed files with 141 additions and 45 deletions.
3 changes: 2 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ add_definitions("-DSOURCE_LENGTH=${SOURCE_LENGTH}")
# Version information
#--------------------------------------------------
set(ONNX2TRT_MAJOR 10)
set(ONNX2TRT_MINOR 1)
set(ONNX2TRT_MINOR 2)
set(ONNX2TRT_PATCH 0)
set(ONNX2TRT_VERSION "${ONNX2TRT_MAJOR}.${ONNX2TRT_MINOR}.${ONNX2TRT_PATCH}" CACHE STRING "ONNX2TRT version")

Expand Down Expand Up @@ -56,6 +56,7 @@ set(IMPORTER_SOURCES
weightUtils.cpp
WeightsContext.cpp
TensorOrWeights.cpp
errorHelpers.cpp
)

if (BUILD_ONNXIFI)
Expand Down
4 changes: 4 additions & 0 deletions NvOnnxParser.h
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,8 @@ class IParserError
//!
//! \brief an object for parsing ONNX models into a TensorRT network definition
//!
//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
//!
class IParser
{
public:
Expand Down Expand Up @@ -406,6 +408,8 @@ class IParser
//!
//! \brief An interface designed to refit weights from an ONNX model.
//!
//! \warning Do not inherit from this class, as doing so will break forward-compatibility of the API and ABI.
//!
class IParserRefitter
{
public:
Expand Down
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ For press and other inquiries, please contact Hector Marinez at hmarinez@nvidia.

## Supported TensorRT Versions

Development on the `main` branch is for the latest version of [TensorRT 10.0](https://developer.nvidia.com/nvidia-tensorrt-download) with full-dimensions and dynamic shape support.
Development on the this branch is for the latest version of [TensorRT 10.2](https://developer.nvidia.com/nvidia-tensorrt-download) with full-dimensions and dynamic shape support.

For previous versions of TensorRT, refer to their respective branches.

Expand All @@ -29,8 +29,8 @@ Current supported ONNX operators are found in the [operator support matrix](docs
### Dependencies

- [Protobuf >= 3.0.x](https://github.com/google/protobuf/releases)
- [TensorRT 10.1](https://developer.nvidia.com/tensorrt)
- [TensorRT 10.1 open source libaries] (https://github.com/NVIDIA/TensorRT/)
- [TensorRT 10.2](https://developer.nvidia.com/tensorrt)
- [TensorRT 10.2 open source libaries] (https://github.com/NVIDIA/TensorRT/)

### Building

Expand Down
15 changes: 15 additions & 0 deletions ShapeTensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -443,6 +443,21 @@ ShapeTensor convertTo1D(ImporterContext* ctx, const ShapeTensor& tensor)
return ShapeTensor(*N_CHECK(addShuffle(ctx, tensor.tensor(ctx), shapeVector(1))->getOutput(0)));
}

ShapeTensor convertTo0D(ImporterContext* ctx, const ShapeTensor& tensor)
{
if (tensor.size() != 1)
{
throw std::runtime_error("Cannot convert a tensor with size > 1 to a scalar!");
}
if (tensor.valueKnown(0))
{
return shapeScalar(tensor[0]);
}
auto* layer = N_CHECK(ctx->network()->addShuffle(tensor.tensor(ctx)));
layer->setReshapeDimensions(nvinfer1::Dims{0});
return ShapeTensor(*N_CHECK(layer->getOutput(0)));
}

//! If all values of x are known, return Dims with those values,
//! but throw exception if any value is outside specified bounds.
//! Otherwise return Dims with zeros.
Expand Down
3 changes: 3 additions & 0 deletions ShapeTensor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,9 @@ ShapeTensor shapeOf(TensorOrWeights& t);
//! Reshape 0D tensor to 1D tensor.
ShapeTensor convertTo1D(ImporterContext* ctx, const ShapeTensor& tensor);

//! Reshape single value 1D tensor to a 0D tensor.
ShapeTensor convertTo0D(ImporterContext* ctx, const ShapeTensor& tensor);

//! Add an ISliceLayer.
nvinfer1::ISliceLayer* addSlice(ImporterContext* ctx, nvinfer1::ITensor& data, const ShapeTensor& starts,
const ShapeTensor& sizes, const ShapeTensor& strides);
Expand Down
1 change: 0 additions & 1 deletion Status.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,6 @@ static std::ostream& operator<<(std::ostream& stream, nvinfer1::DataType const&
case nvinfer1::DataType::kBOOL: return stream << "bool";
case nvinfer1::DataType::kFP8: return stream << "float8";
case nvinfer1::DataType::kINT4: return stream << "int4";

default: throw std::runtime_error("Unknown dtype");
}
}
Expand Down
8 changes: 7 additions & 1 deletion docs/Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,14 @@

# ONNX-TensorRT Changelog

# TensorRT 10.2 GA Release - 2024-7-10
For more details, see the 10.2 GA release notes.

- Improved error handling with new macros and classes
- Minor changes to op importers for `GRU` and `Squeeze`

# TensorRT 10.1 GA Release - 2024-6-17
For more details, see the 10.0 GA release notes.
For more details, see the 10.1 GA release notes.

- Added `supportsModelV2` API
- Added support for `DeformConv` operation
Expand Down
86 changes: 79 additions & 7 deletions errorHelpers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,35 +5,107 @@

#include "Status.hpp"
#include <NvInferRuntime.h>
#include <exception>
#include <sstream>
#include <stdexcept>

#define ONNXTRT_TRY try

#define ONNXTRT_CATCH_RECORD \
catch (OnnxTrtException & e) \
{ \
Status status = e.getStatus(); \
mImporterCtx.getErrorRecorder()->reportError(errorCodeToTrtCode(status.code()), e.what()); \
mErrors.push_back(status); \
} \
catch (std::exception & e) \
{ \
mImporterCtx.getErrorRecorder()->reportError(nvinfer1::ErrorCode::kINTERNAL_ERROR, e.what()); \
mImporterCtx.getErrorRecorder()->reportError(nvinfer1::ErrorCode::kUNSPECIFIED_ERROR, e.what()); \
mErrors.push_back(Status{ErrorCode::kINTERNAL_ERROR, e.what()}); \
}

#define ONNXTRT_CATCH_LOG(logger) \
catch (OnnxTrtException & e) \
{ \
Status status = e.getStatus(); \
(logger)->log(nvinfer1::ILogger::Severity::kINTERNAL_ERROR, e.what()); \
mErrors.push_back(status); \
} \
catch (std::exception & e) \
{ \
(logger)->log(nvinfer1::ILogger::Severity::kINTERNAL_ERROR, e.what()); \
mErrors.push_back(Status{ErrorCode::kINTERNAL_ERROR, e.what()}); \
}

#define ONNXTRT_THROW(status) throw OnnxTrtException(status)

#define ONNXTRT_CHECK(cond, status) \
if (!(cond)) \
{ \
std::ostringstream ss; \
ss << "Assertion failed: " << #cond; \
ONNXTRT_THROW(status); \
}

#define ONNXTRT_CHECK_NODE(cond, desc, node, nodeIdx, code) \
if (!(cond)) \
{ \
ONNXTRT_THROW(MAKE_NODE_ERROR((ss.str()), (code), (node), (nodeIdx))); \
}

namespace onnx2trt
{
inline void ONNXTRT_CHECK(bool cond, Status status)
inline char const* errorCodeStr(ErrorCode code)
{
if (!cond)
switch (code)
{
std::ostringstream os;
os << "[" << status.file() << ":" << status.func() << ":" << status.line() << "] ";
os << "Error Code " << static_cast<int32_t>(status.code()) << ": " << status.desc();
case ErrorCode::kSUCCESS: return "SUCCESS";
case ErrorCode::kINTERNAL_ERROR: return "INTERNAL_ERROR";
case ErrorCode::kMEM_ALLOC_FAILED: return "MEM_ALLOC_FAILED";
case ErrorCode::kMODEL_DESERIALIZE_FAILED: return "MODEL_DESERIALIZE_FAILED";
case ErrorCode::kINVALID_VALUE: return "INVALID_VALUE";
case ErrorCode::kINVALID_GRAPH: return "INVALID_GRAPH";
case ErrorCode::kINVALID_NODE: return "INVALID_NODE";
case ErrorCode::kUNSUPPORTED_GRAPH: return "UNSUPPORTED_GRAPH";
case ErrorCode::kUNSUPPORTED_NODE: return "UNSUPPORTED_NODE";
case ErrorCode::kUNSUPPORTED_NODE_ATTR: return "UNSUPPORTED_NODE_ATTR";
case ErrorCode::kUNSUPPORTED_NODE_INPUT: return "UNSUPPORTED_NODE_INPUT";
case ErrorCode::kUNSUPPORTED_NODE_DATATYPE: return "UNSUPPORTED_NODE_DATATYPE";
case ErrorCode::kUNSUPPORTED_NODE_DYNAMIC: return "UNSUPPORTED_NODE_DYNAMIC";
case ErrorCode::kUNSUPPORTED_NODE_SHAPE: return "UNSUPPORTED_NODE_SHAPE";
case ErrorCode::kREFIT_FAILED: return "REFIT_FAILED";
}
return "UNKNOWN";
};

throw std::runtime_error(os.str());
inline std::string const parserErrorStr(nvonnxparser::IParserError const* error)
{
std::string const nodeInfo = "In node " + std::to_string(error->node()) + " with name: " + error->nodeName()
+ " and operator: " + error->nodeOperator() + " ";
std::string const errorInfo
= std::string("(") + error->func() + "): " + errorCodeStr(error->code()) + ": " + error->desc();
if (error->code() == ErrorCode::kMODEL_DESERIALIZE_FAILED || error->code() == ErrorCode::kREFIT_FAILED)
{
return errorInfo.c_str();
}
return (nodeInfo + errorInfo).c_str();
}

nvinfer1::ErrorCode errorCodeToTrtCode(ErrorCode const code);

class OnnxTrtException : public std::exception
{
Status mStatus;
mutable std::string mMessage;

public:
OnnxTrtException(Status status);

Status getStatus() const noexcept;

virtual char const* what() const noexcept override;

virtual ~OnnxTrtException() {}
};

} // namespace onnx2trt
32 changes: 12 additions & 20 deletions importerUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -133,10 +133,10 @@ NodeImportResult argMinMaxHelper(ImporterContext* ctx, const ::ONNX_NAMESPACE::N
auto const dimOnAxis = gather(ctx, dims, axisTensor);

// Create constant of shape indicesDims with values tensor.shape[axis]
auto const tensorDimOnAxis = constantOfShape(ctx, node, &dimOnAxis.tensor(ctx), &indicesDims.tensor(ctx));
auto const tensorDimOnAxis = constantOfShape(ctx, &dimOnAxis.tensor(ctx), &indicesDims.tensor(ctx));

// Create constant of shape indicesDims with values of 1
auto const ones = constantOfShape(ctx, node, &shapeVector(1).tensor(ctx), &indicesDims.tensor(ctx));
auto const ones = constantOfShape(ctx, &shapeVector(1).tensor(ctx), &indicesDims.tensor(ctx));

std::vector<TensorOrWeights> newInputs{tensorDimOnAxis, indices, ones};
std::vector<TensorOrWeights> indicesUpdate;
Expand Down Expand Up @@ -285,8 +285,7 @@ nvinfer1::ITensor* castHelper(ImporterContext* ctx, nvinfer1::ITensor* input, nv
return N_CHECK(cast->getOutput(0));
}

nvinfer1::ITensor* constantOfShape(ImporterContext* ctx, const ::ONNX_NAMESPACE::NodeProto& node,
nvinfer1::ITensor* constant, nvinfer1::ITensor* shape)
nvinfer1::ITensor* constantOfShape(ImporterContext* ctx, nvinfer1::ITensor* constant, nvinfer1::ITensor* shape)
{
ShapeTensor shapeT{*shape};
ShapeTensor zeros = similar(ctx, shapeT, 0);
Expand Down Expand Up @@ -398,17 +397,10 @@ onnx2trt::ShapedWeights createZeroShifts(onnx2trt::ShapedWeights const& shiftInt

nvinfer1::ITensor* createZeroTensor(ImporterContext* ctx, nvinfer1::ITensor* data)
{
auto* zeroLayer = N_CHECK(addConstant(ctx, std::vector<float>{0.f}, ::ONNX_NAMESPACE::TensorProto::FLOAT, {0, {1}}));
auto* zeroTensor = N_CHECK(zeroLayer->getOutput(0));
zeroTensor = castHelper(ctx, zeroTensor, data->getType());
auto result = broadcastTensors(ctx, zeroTensor, data);
if (result.is_error())
{
return nullptr;
}
auto* zeroBroadcastLayer = N_CHECK(ctx->network()->addElementWise(*data, *zeroTensor, nvinfer1::ElementWiseOperation::kPROD));
ctx->registerLayer(zeroBroadcastLayer, "ONNXTRT_createZeroTensor", nullptr);
return N_CHECK(zeroBroadcastLayer->getOutput(0));
auto shape = shapeOf(*data);
auto* zeros = N_CHECK(addConstantScalar(ctx, 0.0F, ::ONNX_NAMESPACE::TensorProto::FLOAT)->getOutput(0));
zeros = castHelper(ctx, zeros, data->getType());
return constantOfShape(ctx, zeros, &shape.tensor(ctx));
}

nvinfer1::ITensor* convertToScalar(ImporterContext* ctx, nvinfer1::ITensor* inpTensor)
Expand Down Expand Up @@ -1157,13 +1149,13 @@ NodeImportResult modulatedDeformableConvPluginHelper(ImporterContext* ctx, ::ONN
{
static_cast<half_float::half*>(defaultMaskWeights.values)[0] = 1.0;
auto maskTensor = TensorOrWeights{defaultMaskWeights};
maskPtr = constantOfShape(ctx, node, &convertToTensor(maskTensor, ctx), &maskShape);
maskPtr = constantOfShape(ctx, &convertToTensor(maskTensor, ctx), &maskShape);
}
else
{
static_cast<float*>(defaultMaskWeights.values)[0] = 1.F;
auto maskTensor = TensorOrWeights{defaultMaskWeights};
maskPtr = constantOfShape(ctx, node, &convertToTensor(maskTensor, ctx), &maskShape);
maskPtr = constantOfShape(ctx, &convertToTensor(maskTensor, ctx), &maskShape);
}
}

Expand Down Expand Up @@ -1224,7 +1216,7 @@ NodeImportResult instanceNormPluginHelper(ImporterContext* ctx, ::ONNX_NAMESPACE

// Populate instanceNormalization plugin properties.
std::string const pluginName = "InstanceNormalization_TRT";
std::string const pluginVersion = "1";
std::string const pluginVersion = "3";
std::vector<nvinfer1::PluginField> f;

// get the values of constant inputs and cast them to float32
Expand All @@ -1239,12 +1231,12 @@ NodeImportResult instanceNormPluginHelper(ImporterContext* ctx, ::ONNX_NAMESPACE

// Create plugin from registry
auto const plugin = createPlugin(getNodeName(node),
static_cast<nvinfer1::IPluginCreator*>(importPluginCreator(ctx, pluginName, pluginVersion)), f);
static_cast<nvinfer1::IPluginCreatorV3One*>(importPluginCreator(ctx, pluginName, pluginVersion)), f);

ASSERT_NODE(plugin != nullptr, "InstanceNormalization plugin was not found in the plugin registry!", node, nodeIdx,
ErrorCode::kUNSUPPORTED_NODE);

auto* layer = N_CHECK(ctx->network()->addPluginV2(&tensorPtr, 1, *plugin));
auto* layer = N_CHECK(ctx->network()->addPluginV3(&tensorPtr, 1, nullptr, 0, *plugin));
ctx->registerLayer(layer, node);
tensorPtr = N_CHECK(layer->getOutput(0));

Expand Down
3 changes: 1 addition & 2 deletions importerUtils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -127,8 +127,7 @@ std::vector<int32_t> calculatePitches(nvinfer1::Dims const& inputDims);
nvinfer1::ITensor* castHelper(ImporterContext* ctx, nvinfer1::ITensor* input, nvinfer1::DataType dtype);

// Helper function for constantOfShape operator. Input shape must be a shape tensor
nvinfer1::ITensor* constantOfShape(ImporterContext* ctx, const ::ONNX_NAMESPACE::NodeProto& node,
nvinfer1::ITensor* constant, nvinfer1::ITensor* shape);
nvinfer1::ITensor* constantOfShape(ImporterContext* ctx, nvinfer1::ITensor* constant, nvinfer1::ITensor* shape);

// Helper function to convert an ONNX axis into a TRT axis
Status convertAxis(int32_t& axis, int32_t const nbDims, ::ONNX_NAMESPACE::NodeProto const& node, size_t const nodeIdx);
Expand Down
23 changes: 14 additions & 9 deletions onnxOpImporters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -731,7 +731,7 @@ DEFINE_BUILTIN_OP_IMPORTER(ConstantOfShape)
static_cast<float*>(zeroWeights.values)[0] = 0.f;
auto valueWeights = TensorOrWeights{attrs.get("value", zeroWeights)};
nvinfer1::ITensor* value = &convertToTensor(valueWeights, ctx);
return {{constantOfShape(ctx, node, value, shape)}};
return {{constantOfShape(ctx, value, shape)}};
}

DEFINE_BUILTIN_OP_IMPORTER(Conv)
Expand Down Expand Up @@ -2374,7 +2374,7 @@ DEFINE_BUILTIN_OP_IMPORTER(GRU)
{
return &convertToTensor(inputs.at(inputIdx), ctx);
}
return constantOfShape(ctx, node,
return constantOfShape(ctx,
addConstantScalar(ctx, 0.f, ::ONNX_NAMESPACE::TensorProto_DataType_FLOAT, Dims{1, {1}})->getOutput(0),
gateOutputShape);
};
Expand Down Expand Up @@ -2491,12 +2491,12 @@ DEFINE_BUILTIN_OP_IMPORTER(GRU)
LOG_VERBOSE("h(t) -> " << ht->getDimensions());

// H(t) = (1 - z(t)) . h(t) + (z(t) . H(t-1))
// Constant `1` needs to be the same type as the inputs, either FP16 or FP32.
auto onnxType = zt->getType() == nvinfer1::DataType::kHALF ? ::ONNX_NAMESPACE::TensorProto::FLOAT16
: ::ONNX_NAMESPACE::TensorProto::FLOAT;
auto* constOne = N_CHECK(addConstantScalar(ctx, 1.f, onnxType, Dims3{1, 1, 1})->getOutput(0));
nvinfer1::ITensor* Ht = getElementWiseResult(ctx,
*getElementWiseResult(ctx,
*getElementWiseResult(ctx,
*addConstantScalar(ctx, 1.f, ::ONNX_NAMESPACE::TensorProto::FLOAT, Dims3{1, 1, 1})->getOutput(0), *zt,
eOp::kSUB),
*ht, eOp::kPROD),
*getElementWiseResult(ctx, *getElementWiseResult(ctx, *constOne, *zt, eOp::kSUB), *ht, eOp::kPROD),
*getElementWiseResult(ctx, *zt, *Ht1Output, eOp::kPROD), eOp::kSUM);

// singlePassShape = (1, batchSize, hiddenSize)
Expand Down Expand Up @@ -3051,7 +3051,7 @@ DEFINE_BUILTIN_OP_IMPORTER(LSTM)
{
return &convertToTensor(inputs.at(inputIdx), ctx);
}
return constantOfShape(ctx, node,
return constantOfShape(ctx,
addConstantScalar(ctx, 0.f, ::ONNX_NAMESPACE::TensorProto_DataType_FLOAT, nvinfer1::Dims{1, {1}})
->getOutput(0),
gateOutputShape);
Expand Down Expand Up @@ -4217,6 +4217,11 @@ DEFINE_BUILTIN_OP_IMPORTER(Range)
delta = ShapeTensor{*input2};
}

// In reality, although the ONNX spec requires scalars the inputs may be a vector of rank 1. Squeeze here if necessary.
start = start.rank() == 1 ? convertTo0D(ctx, start) : start;
limit = limit.rank() == 1 ? convertTo0D(ctx, limit) : limit;
delta = delta.rank() == 1 ? convertTo0D(ctx, delta) : delta;

// "number_of_elements = max( ceil( (limit - start) / delta ) , 0 )"
//
// To implement this in TensorRT using only operations allowed on
Expand Down Expand Up @@ -4804,7 +4809,7 @@ DEFINE_BUILTIN_OP_IMPORTER(RNN)
{
return &convertToTensor(inputs.at(inputIdx), ctx);
}
return constantOfShape(ctx, node,
return constantOfShape(ctx,
N_CHECK(addConstantScalar(ctx, 0.f, ::ONNX_NAMESPACE::TensorProto_DataType_FLOAT, nvinfer1::Dims{1, {1}})
->getOutput(0)),
initialStateShape());
Expand Down
2 changes: 1 addition & 1 deletion onnx_tensorrt/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@

from . import backend

__version__ = "10.1.0"
__version__ = "10.2.0"

0 comments on commit 706f02e

Please sign in to comment.