Skip to content

Commit

Permalink
ONNX-TensorRT 10.6 GA Release (#1000)
Browse files Browse the repository at this point in the history
Signed-off-by: Kevin Chen <[email protected]>
  • Loading branch information
kevinch-nv authored Nov 5, 2024
1 parent 886aff9 commit 4442153
Show file tree
Hide file tree
Showing 20 changed files with 355 additions and 223 deletions.
2 changes: 1 addition & 1 deletion .gitmodules
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
[submodule "third_party/onnx"]
path = third_party/onnx
url = https://github.com/onnx/onnx.git
branch = v1.16.0
branch = v1.17.0
14 changes: 10 additions & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ add_definitions("-DSOURCE_LENGTH=${SOURCE_LENGTH}")
# Version information
#--------------------------------------------------
set(ONNX2TRT_MAJOR 10)
set(ONNX2TRT_MINOR 5)
set(ONNX2TRT_MINOR 6)
set(ONNX2TRT_PATCH 0)
set(ONNX2TRT_VERSION "${ONNX2TRT_MAJOR}.${ONNX2TRT_MINOR}.${ONNX2TRT_PATCH}" CACHE STRING "ONNX2TRT version")

Expand Down Expand Up @@ -106,6 +106,12 @@ find_path(TENSORRT_INCLUDE_DIR NvInfer.h
PATH_SUFFIXES include)
MESSAGE(STATUS "Found TensorRT headers at ${TENSORRT_INCLUDE_DIR}")

# TensorRT Python Headers
find_path(TENSORRT_PYTHON_INCLUDE_DIR plugin.h
HINTS ${TENSORRT_ROOT}
PATH_SUFFIXES python/include/impl)
message(NOTICE "Found TensorRT Python headers at ${TENSORRT_PYTHON_INCLUDE_DIR}")

# Output dynamic library names depends on platform:
if (MSVC)
set(nvonnxparser_lib_name "nvonnxparser_${ONNX2TRT_MAJOR}")
Expand All @@ -119,7 +125,7 @@ set(nvonnxparser_lib_name_static "nvonnxparser_static")
# Importer library
# --------------------------------
add_library(${nvonnxparser_lib_name} SHARED ${IMPORTER_SOURCES})
target_include_directories(${nvonnxparser_lib_name} PUBLIC ${ONNX_INCLUDE_DIRS} ${TENSORRT_INCLUDE_DIR} ${CUDA_INCLUDE_DIR})
target_include_directories(${nvonnxparser_lib_name} PUBLIC ${ONNX_INCLUDE_DIRS} ${TENSORRT_INCLUDE_DIR} ${TENSORRT_PYTHON_INCLUDE_DIR} ${CUDA_INCLUDE_DIR})
target_link_libraries(${nvonnxparser_lib_name} PUBLIC onnx_proto ${PROTOBUF_LIBRARY})
set_target_properties(${nvonnxparser_lib_name} PROPERTIES
VERSION ${ONNX2TRT_VERSION}
Expand All @@ -131,7 +137,7 @@ set_target_properties(${nvonnxparser_lib_name} PROPERTIES
RUNTIME_OUTPUT_DIRECTORY "${TRT_OUT_DIR}"
)
add_library(${nvonnxparser_lib_name_static} STATIC ${IMPORTER_SOURCES})
target_include_directories(${nvonnxparser_lib_name_static} PUBLIC ${ONNX_INCLUDE_DIRS} ${TENSORRT_INCLUDE_DIR} ${CUDA_INCLUDE_DIR})
target_include_directories(${nvonnxparser_lib_name_static} PUBLIC ${ONNX_INCLUDE_DIRS} ${TENSORRT_INCLUDE_DIR} ${TENSORRT_PYTHON_INCLUDE_DIR} ${CUDA_INCLUDE_DIR})
target_link_libraries(${nvonnxparser_lib_name_static} PUBLIC onnx_proto ${PROTOBUF_LIBRARY})
set_target_properties(${nvonnxparser_lib_name_static} PROPERTIES
ARCHIVE_OUTPUT_DIRECTORY "${TRT_OUT_DIR}"
Expand All @@ -143,7 +149,7 @@ set_target_properties(${nvonnxparser_lib_name_static} PROPERTIES
# --------------------------------
if(BUILD_ONNXIFI)
add_library(trt_onnxify SHARED ${ONNXIFI_SOURCES})
target_include_directories(trt_onnxify PUBLIC ${CUDA_INCLUDE_DIR} ${ONNX_INCLUDE_DIRS} ${TENSORRT_INCLUDE_DIR})
target_include_directories(trt_onnxify PUBLIC ${CUDA_INCLUDE_DIR} ${ONNX_INCLUDE_DIRS} ${TENSORRT_INCLUDE_DIR} ${TENSORRT_PYTHON_INCLUDE_DIR})
target_link_libraries(trt_onnxify PUBLIC ${nvonnxparser_lib_name_static} ${CMAKE_THREAD_LIBS_INIT} ${CMAKE_DL_LIBS})
endif()

Expand Down
110 changes: 15 additions & 95 deletions ConditionalHelpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,32 +10,17 @@
namespace onnx2trt
{

using NodeName = std::string;
using LayerName = std::string;
using InputIndex = int32_t;

// A SubgraphPortsMap maps either the inputs or outputs ports of each node in an ONNX graph.
using SubgraphPortsMap = std::unordered_map<nvinfer1::ITensor*, std::set<InputIndex>>;

// An InputsMap tracks which IIfConditionalInputLayer we've added to a layer's inputs,
// so that we can reuse them if needed.
using InputsMap = std::unordered_map<LayerName, nvinfer1::IIfConditionalInputLayer*>;

// Search for a network Layer name in a SubgraphPortsMap using partial (prefix) name matching.
// ONNX nodes are matched to network layers using prefix-matching because an ONNX node may have
// several network layers associcated with it.
// Search for a network Layer name in a SubgraphPortsMap.
SubgraphPortsMap::const_iterator findLayer(const SubgraphPortsMap& inputs, const std::string layerName)
{
return std::find_if(inputs.begin(), inputs.end(), [&](const auto& item) {
std::string const key = item.first->getName();
return layerName.compare(0, key.size(), key) == 0;
});
return std::find_if(
inputs.begin(), inputs.end(), [&](const auto& item) { return layerName == item.first->getName(); });
}

// Add an ConditionalInputLayer between `layer` and its inputs.
// I.e. input[inIdx] -> layer ==> input[inIdx] -> ConditionalInputLayer -> layer.
void addConditionalInputLayer(ImporterContext* ctx, nvinfer1::IIfConditional* conditional, InputsMap& inputsMap,
nvinfer1::ILayer& layer, int32_t inIdx)
nvinfer1::ILayer& layer, int32_t inIdx, ::ONNX_NAMESPACE::NodeProto const* node)
{
auto input = layer.getInput(inIdx);
if (input == nullptr)
Expand All @@ -57,7 +42,7 @@ void addConditionalInputLayer(ImporterContext* ctx, nvinfer1::IIfConditional* co
inputLayer = N_CHECK(conditional->addInput(*input));
inputsMap[name] = inputLayer;
const std::string inputLayerName(name);
ctx->registerLayer(inputLayer, inputLayerName + "_InputLayer", nullptr);
ctx->registerLayer(inputLayer, inputLayerName + "_InputLayer", node);
// Note: Since multiple conditionals may use the same external tensor, check unique names for output tensors of
// IfConditionalInputLayers to avoid tensor name duplication.
ctx->registerTensor(
Expand Down Expand Up @@ -100,7 +85,7 @@ void importSubgraph(ImporterContext* ctx, ::ONNX_NAMESPACE::GraphProto const& su

// Add an IConditionalInputLayer to `layer`'s inputs, if they don't already exist.
void addConditionalInputIfNeeded(ImporterContext* ctx, nvinfer1::IIfConditional* conditional, InputsMap& inputsMap,
nvinfer1::ILayer& layer, SubgraphPortsMap subgraphInputsMap)
nvinfer1::ILayer& layer, SubgraphPortsMap subgraphInputsMap, ::ONNX_NAMESPACE::NodeProto const* node)
{
// Return all of the layer's inputs that are external to the subgraph that
// that the layer belongs to.
Expand All @@ -120,36 +105,29 @@ void addConditionalInputIfNeeded(ImporterContext* ctx, nvinfer1::IIfConditional*
for (auto inIdx : inIndices)
{
LOG_VERBOSE("Adding Input layer for " << layer.getName());
addConditionalInputLayer(ctx, conditional, inputsMap, layer, inIdx);
addConditionalInputLayer(ctx, conditional, inputsMap, layer, inIdx, node);
}
}

// Add IConditionalInputLayers to `layer`'s inputs.
void addIfInputLayers(ImporterContext* ctx, nvinfer1::IIfConditional* conditional, InputsMap& inputsMap,
const std::vector<nvinfer1::ILayer*>& newLayers)
const std::vector<nvinfer1::ILayer*>& newLayers, ::ONNX_NAMESPACE::NodeProto const* node)
{
// Find all of the tensors entering the subgraph.
// The node-names are from the ONNX context.
using InputIndex = int32_t;
std::unordered_map<nvinfer1::ITensor*, std::set<InputIndex>> subgraphInputsMap;
getSubgraphInputs(newLayers, subgraphInputsMap);
SubgraphPortsMap externalInputs;
getSubgraphInputs(newLayers, externalInputs);

// Add a ConditionalInputLayer in front of each input that is external to the subgraph.
for (const auto& layer : newLayers)
{
addConditionalInputIfNeeded(ctx, conditional, inputsMap, *layer, subgraphInputsMap);
addConditionalInputIfNeeded(ctx, conditional, inputsMap, *layer, externalInputs, node);
}
}

// Given a subgraph, find all of its external inputs/outputs (tensors entering/exiting the subgraph).
void getSubgraphTensors(const std::vector<nvinfer1::ILayer*>& newLayers,
std::unordered_map<nvinfer1::ITensor*, std::set<int32_t>>& externalOutputs, bool extractOutputs,
const std::vector<std::string>* reportedOutputs = nullptr)
// Given a subgraph, find all of its external inputs (tensors entering the subgraph).
void getSubgraphInputs(const std::vector<nvinfer1::ILayer*>& newLayers, SubgraphPortsMap& externalInputs)
{
using NodeName = std::string;
using TensorName = std::string;
using PortIndex = int32_t;
using Port = std::pair<NodeName, PortIndex>;
using TensorsSet = std::unordered_set<nvinfer1::ITensor*>;
TensorsSet outputTensors;
TensorsSet inputTensors;
Expand All @@ -176,20 +154,14 @@ void getSubgraphTensors(const std::vector<nvinfer1::ILayer*>& newLayers,
}

using TensorsVec = std::vector<nvinfer1::ITensor*>;
auto getOutputs = [&](nvinfer1::ILayer const* l, TensorsVec& res) {
getTensors(l, false, [&](nvinfer1::ITensor* t) { res.emplace_back(t); });
};

auto getInputs = [&](nvinfer1::ILayer const* l, TensorsVec& res) {
getTensors(l, true, [&](nvinfer1::ITensor* t) { res.emplace_back(t); });
};

// Retrieve the list of tensors either exiting or entering the subgraph.
std::unordered_map<nvinfer1::ITensor*, std::vector<Port>> externalPortsMap;
auto filterTensors = [&](TensorsSet const& tensors, auto getNodeAccessor) {
for (nvinfer1::ILayer const* l : newLayers)
{
const auto& nodeName = l->getName();
PortIndex i = 0;

TensorsVec nodeAccessor;
Expand All @@ -202,66 +174,14 @@ void getSubgraphTensors(const std::vector<nvinfer1::ILayer*>& newLayers,
}
if (tensors.count(tensor) == 0)
{
TensorName tensorName = tensor->getName();
auto prefixFound = false;
if (reportedOutputs)
{
// reportedOutputs are the names of the outputs as reported by the ONNX parser and help
// us further filter the output tensors.
// Exiting tensors := {outputs} - {inputs} - {unreported tensors}
// An example: a Split node is internal to a subgraph and has 4 outputs, but only two are
// connected to the rest of the graph. To prevent mistaking the 2 unused outputs as subgraph
// outputs, we look for them in reportedOutputs which leads us to ignore the 2 tensors.
const auto iter = std::find_if(
reportedOutputs->begin(), reportedOutputs->end(), [&](const auto& outputName) {
// Prefix name matching.
return tensorName.compare(0, outputName.size(), outputName) == 0;
});
prefixFound = iter != reportedOutputs->end();
}
if (!reportedOutputs || prefixFound)
{
externalPortsMap[tensor].push_back(std::make_pair(nodeName, i));
}
externalInputs[l].insert(i);
}
i++;
}
}
};

if (extractOutputs)
{
filterTensors(inputTensors, getOutputs);
}
else
{
filterTensors(outputTensors, getInputs);
}

// Create the user's view of the external inputs, which uses the node-name as the key for
// looking up input/output port index.
for (auto const& input : externalPortsMap)
{
for (const Port& inPort : input.second)
{
auto* tensor = input.first;
auto const portIndex = inPort.second;
externalOutputs[tensor].insert(portIndex);
}
}
}

void getSubgraphOutputs(const std::vector<nvinfer1::ILayer*>& newLayers,
std::unordered_map<nvinfer1::ITensor*, std::set<int32_t>>& externalOutputs,
const std::vector<std::string>& reportedOutputs)
{
getSubgraphTensors(newLayers, externalOutputs, true, &reportedOutputs);
}

void getSubgraphInputs(const std::vector<nvinfer1::ILayer*>& newLayers,
std::unordered_map<nvinfer1::ITensor*, std::set<int32_t>>& externalInputs)
{
getSubgraphTensors(newLayers, externalInputs, false);
filterTensors(outputTensors, getInputs);
}

} // namespace onnx2trt
25 changes: 12 additions & 13 deletions ConditionalHelpers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,28 +18,27 @@
namespace onnx2trt
{

using NodeName = std::string;
using LayerName = std::string;
using InputIndex = int32_t;

// A SubgraphPortsMap maps inputs' ports of each layer in an ONNX graph.
using SubgraphPortsMap = std::unordered_map<const nvinfer1::ILayer*, std::unordered_set<InputIndex>>;

// Given a subgraph, find all of its external inputs (tensors entering the subgraph).
// The result is returned in `subgraphInputs`, which is a map indexed by ITensor (a tensor entering the subgraph) and
// with values indicating a set of external input indices.
void getSubgraphInputs(std::vector<nvinfer1::ILayer*> const& newLayers,
std::unordered_map<nvinfer1::ITensor*, std::set<int32_t>>& subgraphInputs);

// Given a subgraph, find all of its external outputs (tensors exiting the subgraph).
// The result is returned in `subgraphInputs`, which is a map indexed by ITensor (a tensor exiting the subgraph) and
// with values indicating a set of external outputs indices.
void getSubgraphOutputs(const std::vector<nvinfer1::ILayer*>& newLayers,
std::unordered_map<nvinfer1::ITensor*, std::set<int32_t>>& subgraphOutputs,
const std::vector<std::string>& reportedOutputs);
void getSubgraphInputs(const std::vector<nvinfer1::ILayer*>& newLayers, SubgraphPortsMap& externalInputs);

// Take a snapshot of the network before and after parsing the subgraph and return a list
// of newly added network layers.
void importSubgraph(ImporterContext* ctx, ::ONNX_NAMESPACE::GraphProto const& subgraph,
std::vector<nvinfer1::ILayer*>& newLayers, std::vector<TensorOrWeights>& subgraphTensors);

using InputsMap = std::unordered_map<std::string, nvinfer1::IIfConditionalInputLayer*>;
// An InputsMap tracks which IIfConditionalInputLayer we've added to a layer's inputs,
// so that we can reuse them if needed.
using InputsMap = std::unordered_map<LayerName, nvinfer1::IIfConditionalInputLayer*>;

// Add IIfConditionalInputLayers to the inputs of the subgraph indicated by `subgraph`.
void addIfInputLayers(ImporterContext* ctx, nvinfer1::IIfConditional* conditional, InputsMap& inputsMap,
const std::vector<nvinfer1::ILayer*>& newLayers);
const std::vector<nvinfer1::ILayer*>& newLayers, ::ONNX_NAMESPACE::NodeProto const* node);

} // namespace onnx2trt
13 changes: 11 additions & 2 deletions ImporterContext.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -138,8 +138,17 @@ class ImporterContext
//! Map holding FunctionProtos
StringMap<::ONNX_NAMESPACE::FunctionProto> mLocalFunctions;

//! Data type to keep track of a local function in mLocalFunctionStack.
//! It is a tuple of three elements: (1) function name (2) node name and (3) function attributes.
struct LocalFunctionMetadata
{
std::string functionName;
std::string nodeName;
StringMap<::ONNX_NAMESPACE::AttributeProto const*> attrs;
};

//! Vector to hold current local function names and attributes
std::vector<std::pair<std::string, StringMap<::ONNX_NAMESPACE::AttributeProto const*>>> mLocalFunctionStack;
std::vector<LocalFunctionMetadata> mLocalFunctionStack;

//! Vector to hold the local function names at each error
std::vector<std::vector<std::string>> mLocalFunctionErrors;
Expand Down Expand Up @@ -325,7 +334,7 @@ class ImporterContext
{
return mLocalFunctions;
}
std::vector<std::pair<std::string, StringMap<::ONNX_NAMESPACE::AttributeProto const*>>>& localFunctionStack()
std::vector<LocalFunctionMetadata>& localFunctionStack()
{
return mLocalFunctionStack;
}
Expand Down
19 changes: 14 additions & 5 deletions ModelImporter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -221,19 +221,27 @@ void parseNode(
}
}

ONNXTRT_CHECK_NODE((node.output().size() <= static_cast<int32_t>(outputs.size())),
int32_t nonEmptyOutputs
= std::count_if(node.output().begin(), node.output().end(), [](std::string const& str) { return !str.empty(); });
ONNXTRT_CHECK_NODE(nonEmptyOutputs == static_cast<int32_t>(outputs.size()),
"Node has more output tensors than TRT expected, expected output size is "
<< outputs.size() << ", actual output size is " << node.output().size() << ".",
<< outputs.size() << ", actual output size is " << nonEmptyOutputs << ".",
node, nodeIdx, ErrorCode::kINVALID_GRAPH);

// Set output names and register outputs with the context.
std::ostringstream ssOutputs{};
ssOutputs << nodeName << " [" << node.op_type() << "] outputs: ";
for (int32_t i = 0; i < node.output().size(); ++i)
for (int32_t i = 0, trtCnt = 0; i < node.output().size(); ++i)
{
auto const& outputName = node.output(i);
auto& output = outputs.at(i);
ssOutputs << "[" << outputName << " -> " << output.shape() << "[" << output.getType() << "]" << "], ";
// Empty strings denote null-tensor outputs. Ignore these.
if (outputName.empty())
{
continue;
}
auto& output = outputs.at(trtCnt);
ssOutputs << "[" << outputName << " -> " << output.shape() << "[" << output.getType() << "]"
<< "], ";
// Note: This condition is to allow ONNX outputs to be ignored
// Always register output weights (even empty ones) as it may be mapped to an unused input
if ((output || output.is_weights()) && !outputName.empty())
Expand All @@ -255,6 +263,7 @@ void parseNode(
ONNXTRT_CHECK_NODE(legalUINT8, "TensorRT does not support UINT8 types for intermediate tensors!", node,
nodeIdx, ErrorCode::kUNSUPPORTED_NODE);
}
trtCnt++;
}
LOG_VERBOSE(ssOutputs.str());
}
Expand Down
Loading

0 comments on commit 4442153

Please sign in to comment.