TensorRT 7.2.1 open source release

Signed-off-by: Kevin Chen <[email protected]>
onnx · Oct 20, 2020 · eb559b6 · eb559b6
1 parent a3a4e38
commit eb559b6
Show file tree

Hide file tree

Showing 24 changed files with 1,319 additions and 1,313 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -42,8 +42,8 @@ set(PARSER_LINKER_SCRIPT  ${ONNX2TRT_ROOT}/libnvonnxparser.version)
 # Version information
 #--------------------------------------------------
 set(ONNX2TRT_MAJOR 7)
-set(ONNX2TRT_MINOR 1)
-set(ONNX2TRT_PATCH 0)
+set(ONNX2TRT_MINOR 2)
+set(ONNX2TRT_PATCH 1)
 
 #--------------------------------------------------
 # Build configurations, global to all projects

diff --git a/Changelog.md b/Changelog.md
@@ -0,0 +1,19 @@
+# ONNX-TensorRT Changelog
+
+## TensorRT 7.2.1 Release - 2020-10-20
+
+### Added
+- Added support for parsing large models with external data
+- Added API for interfacing with TensorRT's refit feature
+- Updated `onnx_tensorrt` backend to support dynamic shapes
+- Added support for 3D instance normalizations [#515](https://github.com/onnx/onnx-tensorrt/pull/515)
+- Improved clarity on the resize modes TRT supports [#512](https://github.com/onnx/onnx-tensorrt/pull/521)
+- Added Changelog
+
+### Changed
+- Unified docker usage between ONNX-TensorRT and TensorRT.
+
+## Removed
+- Removed deprecated docker files.
+- Removed deprecated `setup.py`. 
+
diff --git a/ImporterContext.hpp b/ImporterContext.hpp
@@ -26,12 +26,11 @@
 #include "onnx2trt_utils.hpp"
 
 #include <list>
-#include <set>
-#include <string>
 #include <unordered_map>
 
 namespace onnx2trt
 {
+
 class ImporterContext final : public IImporterContext
 {
     nvinfer1::INetworkDefinition* _network;
@@ -45,22 +44,20 @@ class ImporterContext final : public IImporterContext
     StringMap<float> mTensorRangeMins;
     StringMap<float> mTensorRangeMaxes;
     StringMap<nvinfer1::DataType> mLayerPrecisions;
-    std::set<std::string> mTensorNames;                       // keep track of tensor names used so far,
-                                                              // to avoid duplicate naming in TRT.
-    std::set<std::string> mLayerNames;                        // keep track of layer names used so far,
-                                                              // to avoid duplicate naming in TRT.
-    int64_t mSuffixCounter = 0;                               // increasing suffix counter used to uniquify layer names.
-    std::unordered_set<std::string> mUnsupportedShapeTensors; // Container to hold any shape tensors that are
-                                                              // the output of layers that do not support
-                                                              // shape tensors.
-    StringMap<std::string> mLoopTensors;                      // Container to map subgraph tensors to
-                                                              // their original outer graph names.
-    std::string mOnnxFileLocation;                            // Keep track of the directory of the parsed ONNX file
+    std::set<std::string> mTensorNames; // Keep track of how many times a tensor name shows up, to avoid duplicate naming in TRT.
+    std::set<std::string> mLayerNames; // Keep track of how many times a tensor name shows up, to avoid duplicate naming in TRT.
+    int64_t mSuffixCounter = 0; // increasing suffix counter used to uniquify layer names.
+    std::unordered_set<std::string> mUnsupportedShapeTensors; // Container to hold output tensor names of layers that produce shape tensor outputs but do not natively support them.
+    StringMap<std::string> mLoopTensors; // Container to map subgraph tensors to their original outer graph names.
+    std::string mOnnxFileLocation; // Keep track of the directory of the parsed ONNX file
+    std::list<std::string> mInitializerNames; // Keep track of unique names of any initializers
+    RefitMap_t* mRefitMap; // Keep track of names of ONNX refittable weights with their corresponding TRT layer and role
 
 public:
-    ImporterContext(nvinfer1::INetworkDefinition* network, nvinfer1::ILogger* logger)
+    ImporterContext(nvinfer1::INetworkDefinition* network, nvinfer1::ILogger* logger, RefitMap_t* refitMap)
         : _network(network)
         , _logger(logger)
+        , mRefitMap(refitMap)
     {
     }
     virtual nvinfer1::INetworkDefinition* network() override
@@ -103,8 +100,11 @@ class ImporterContext final : public IImporterContext
     {
         return mOnnxFileLocation;
     }
-    // This actually handles weights as well, but is named this way to be
-    // consistent with the tensors()
+    virtual void insertRefitMap(std::string weightsName, std::string layerName, nvinfer1::WeightsRole role) override
+    {
+        (*mRefitMap)[weightsName] = WeightsPair_t{layerName, role};
+    }
+    // This actually handles weights as well, but is named this way to be consistent with the tensors()
     virtual void registerTensor(TensorOrWeights tensor, const std::string& basename) override
     {
         // TRT requires unique tensor names.
@@ -119,16 +119,20 @@ class ImporterContext final : public IImporterContext
 
                 LOG_VERBOSE("Registering tensor: " << uniqueName << " for ONNX tensor: " << basename);
             }
-            else if (tensor.is_weights() && tensor.weights().type == ::ONNX_NAMESPACE::TensorProto::INT64)
+            else if (tensor.is_weights())
             {
+                mInitializerNames.push_back(uniqueName);
                 const auto& weights = tensor.weights();
-                tensor = ShapedWeights{::ONNX_NAMESPACE::TensorProto::INT32,
-                    convertINT64(reinterpret_cast<int64_t*>(weights.values), weights.shape, ctx), weights.shape};
+                if (tensor.weights().type == ::ONNX_NAMESPACE::TensorProto::INT64)
+                {
+                    tensor = ShapedWeights{::ONNX_NAMESPACE::TensorProto::INT32,
+                        convertINT64(reinterpret_cast<int64_t*>(weights.values), weights.shape, ctx), weights.shape};
+                }
+                tensor.weights().setName(mInitializerNames.back().c_str());
             }
         }
-        // Overwrite previous tensors registered with the same name (this only
-        // happens when there are subgraphs, and in that case, overwriting is the
-        // desired behavior).
+        // Overwrite previous tensors registered with the same name (this only happens when there are subgraphs,
+        // and in that case, overwriting is the desired behavior).
         this->tensors()[basename] = std::move(tensor);
     }
 
@@ -138,11 +142,17 @@ class ImporterContext final : public IImporterContext
         if (layer)
         {
             const std::string name = basename.empty() ? layer->getName() : basename;
-            const std::string uniqueName = generateUniqueName(mLayerNames, basename);
+            const std::string uniqueName = generateUniqueName(mLayerNames, name);
 
             auto* ctx = this; // To enable logging.
-            LOG_VERBOSE("Registering layer: " << name << " for ONNX node: " << basename);
-
+            if (layer->getType() == nvinfer1::LayerType::kCONSTANT)
+            {
+                LOG_VERBOSE("Registering constant layer: " << uniqueName << " for ONNX initializer: " << basename);
+            }
+            else
+            {
+                LOG_VERBOSE("Registering layer: " << uniqueName << " for ONNX node: " << basename);
+            }
             layer->setName(uniqueName.c_str());
         }
     }
@@ -228,7 +238,6 @@ class ImporterContext final : public IImporterContext
             return _opsets.at(domain);
         }
     }
-
 private:
     std::string generateUniqueName(std::set<std::string>& namesSet, const std::string& basename)
     {

diff --git a/ModelImporter.cpp b/ModelImporter.cpp
@@ -242,14 +242,16 @@ Status importInputs(ImporterContext* ctx, ::ONNX_NAMESPACE::GraphProto const& gr
             ASSERT(weight_desc.memoryType == ONNXIFI_MEMORY_TYPE_CPU, ErrorCode::kINVALID_VALUE);
             ASSERT(convertWeightDescriptor(weight_desc, &weights, ctx), ErrorCode::kUNSUPPORTED_NODE);
             tensor = weights;
+            ctx->registerTensor(std::move(tensor), input.name());
         }
+        // Do not register any initializers
         else if (!initializers.count(input.name()))
         {
             nvinfer1::ITensor* tensor_ptr;
             TRT_CHECK(importInput(ctx, input, &tensor_ptr));
             tensor = tensor_ptr;
+            ctx->registerTensor(std::move(tensor), input.name());
         }
-        ctx->registerTensor(std::move(tensor), input.name());
     }
 
     return Status::success();
@@ -335,7 +337,6 @@ bool ModelImporter::supportsModel(
         }
     }
     auto* ctx = &_importer_ctx;
-
     auto checkForInput = [&input_node, &ctx](::ONNX_NAMESPACE::NodeProto const& node) {
         for (auto input : node.input())
         {
@@ -347,7 +348,7 @@ bool ModelImporter::supportsModel(
         return false;
     };
 
-    auto checkForShapeTensors = [&ctx](::ONNX_NAMESPACE::NodeProto const& node){
+    auto checkShapeTensorType = [&ctx](::ONNX_NAMESPACE::NodeProto const& node){
         for (int i = 0; i < ctx->network()->getNbInputs(); i++)
         {
             auto input = ctx->network()->getInput(i);
@@ -374,7 +375,7 @@ bool ModelImporter::supportsModel(
     std::vector<size_t> topological_order;
     if (!toposort(model.graph().node(), &topological_order))
     {
-        LOG_ERROR("Failed to sort model topologically, exiting ...");
+        cout << "Failed to sort model topologically, exiting ..." << endl;
         return false;
     }
 
@@ -386,14 +387,13 @@ bool ModelImporter::supportsModel(
         //     2. It is not directly connected to an unsupported input
         //     3. It is not directly connected to an unsupported shape tensor input
         //     4. It did not illegally produce a shape tensor output
-        //     5. It was sucessfully parsed
+        //     5. The importer function did not throw an assertion
         bool registered = supportsOperator(node.op_type().c_str());
-        bool containsInput = (input_node.empty()) ? false : checkForInput(node);
-        bool containsShapeInput = checkForShapeTensors(node);
-        auto tensorName = node.name();
-        bool supportedShapeTensor = ctx->unsupportedShapeTensors().count(tensorName) == 0 ? true : false;
-        bool containsIndex = node_idx == error_node;
-        if (registered && !containsInput && !containsShapeInput && supportedShapeTensor && !containsIndex)
+        bool unsupportedInput = (input_node.empty()) ? false : checkForInput(node);
+        bool unsupportedShapeType = checkShapeTensorType(node);
+        bool unsupportedShapeTensor = ctx->unsupportedShapeTensors().count(node.name()) > 0 ? true : false;
+        bool unsuccessfulParse = node_idx == error_node;
+        if (registered && !unsupportedInput && !unsupportedShapeType && !unsupportedShapeTensor && !unsuccessfulParse)
         {
             if (newSubGraph)
             {
@@ -408,7 +408,6 @@ bool ModelImporter::supportsModel(
         }
         else
         {
-            LOG_WARNING("Found unsupported node: " << tensorName);
             // This is not a supported node, reset newSubGraph
             newSubGraph = true;
             allSupported = false;
@@ -468,10 +467,12 @@ void removeShapeTensorCasts(IImporterContext* ctx)
         nvinfer1::ILayer* layer = ctx->network()->getLayer(i);
         if (layer->getNbOutputs() > 0 && layer->getOutput(0)->isShapeTensor())
         {
+            layer->resetPrecision();
             layer->resetOutputType(0);
             nvinfer1::ITensor& t = *layer->getOutput(0);
             // Assume that boolean tensors were not cast, and thus have their type correctly set.
             const nvinfer1::DataType shapeTensorType = t.getType() == nvinfer1::DataType::kBOOL ? nvinfer1::DataType::kBOOL : nvinfer1::DataType::kINT32;
+            layer->setPrecision(shapeTensorType);
             layer->setOutputType(0, shapeTensorType);
             // Set type only if necessary, to avoid TensorRT warnings
             // about setting type of non-input/output tensors.
@@ -486,9 +487,9 @@ void removeShapeTensorCasts(IImporterContext* ctx)
             auto reduceOp = type == nvinfer1::LayerType::kREDUCE ? (static_cast<nvinfer1::IReduceLayer*>(layer))->getOperation() : nvinfer1::ReduceOperation::kSUM;
             if (!supportsShapeTensor(type, elementwiseOp, reduceOp))
             {
-                auto name = layer->getName();
+                auto name = layer->getOutput(0)->getName();
                 ctx->unsupportedShapeTensors().insert(name);
-                LOG_ERROR("Found unsupported shape-tensor producing layer:" << name);
+                LOG_ERROR("Found " << name << " as a shape tensor output from a layer that does not support it!");
             }
         }
     }
@@ -526,14 +527,17 @@ Status ModelImporter::importModel(
     TRT_CHECK(importInputs(&_importer_ctx, graph, &_importer_ctx.tensors(), weight_count, weight_descriptors));
     TRT_CHECK(parseGraph(&_importer_ctx, graph, model.producer_name() == "TensorRT", &_current_node));
 
+    _current_node = -1;
     // Mark outputs defined in the ONNX model (unless tensors are user-requested)
     for (::ONNX_NAMESPACE::ValueInfoProto const& output : graph.output())
     {
         ASSERT(_importer_ctx.tensors().count(output.name()), ErrorCode::kINVALID_GRAPH);
-        ASSERT(_importer_ctx.tensors().at(output.name()).is_tensor(), ErrorCode::kUNSUPPORTED_GRAPH);
-        nvinfer1::ITensor* output_tensor_ptr = &_importer_ctx.tensors().at(output.name()).tensor();
+
+        nvinfer1::ITensor* output_tensor_ptr
+            = &convertToTensor(_importer_ctx.tensors().at(output.name()), &_importer_ctx);
         LOG_VERBOSE("Marking " << output_tensor_ptr->getName() << " as output: " << output.name());
         output_tensor_ptr->setName(output.name().c_str());
+
         if (output_tensor_ptr->isNetworkInput())
         {
             // HACK WAR for TRT not allowing input == output
@@ -543,6 +547,7 @@ Status ModelImporter::importModel(
             ASSERT(output_tensor_ptr, ErrorCode::kUNSUPPORTED_NODE);
             output_tensor_ptr->setName(output.name().c_str());
         }
+
         nvinfer1::ITensor** user_output = _importer_ctx.getUserOutput(output.name().c_str());
         if (!user_output)
         {
@@ -640,24 +645,24 @@ Status ModelImporter::importModel(
     return Status::success();
 }
 
-bool ModelImporter::parseFromFile(const char* onnxModelFile, int verbosity)
+bool ModelImporter::parseFromFile(const char* onnxModelFile, int32_t verbosity)
 {
     GOOGLE_PROTOBUF_VERIFY_VERSION;
     ::ONNX_NAMESPACE::ModelProto onnx_model;
 
-    bool is_binary = ParseFromFile_WAR(&onnx_model, onnxModelFile);
+    const bool is_binary = ParseFromFile_WAR(&onnx_model, onnxModelFile);
     if (!is_binary && !ParseFromTextFile(&onnx_model, onnxModelFile))
     {
-        cerr << "Failed to parse ONNX model from file" << onnxModelFile << endl;
+        cerr << "Failed to parse ONNX model from file: " << onnxModelFile << endl;
         return EXIT_FAILURE;
     }
 
     // Keep track of the absolute path to the ONNX file.
     _importer_ctx.setOnnxFileLocation(onnxModelFile);
 
-    if (verbosity >= (int) nvinfer1::ILogger::Severity::kWARNING)
+    if (verbosity >= static_cast<int32_t>(nvinfer1::ILogger::Severity::kWARNING))
     {
-        int64_t opset_version = (onnx_model.opset_import().size() ? onnx_model.opset_import(0).version() : 0);
+        const int64_t opset_version = (onnx_model.opset_import().size() ? onnx_model.opset_import(0).version() : 0);
         cout << "----------------------------------------------------------------" << endl;
         cout << "Input filename:   " << onnxModelFile << endl;
         cout << "ONNX IR version:  " << onnx_ir_version_string(onnx_model.ir_version()) << endl;
@@ -672,30 +677,30 @@ bool ModelImporter::parseFromFile(const char* onnxModelFile, int verbosity)
 
     { //...Read input file, parse it
         std::ifstream onnx_file(onnxModelFile, std::ios::binary | std::ios::ate);
-        std::streamsize file_size = onnx_file.tellg();
+        const std::streamsize file_size = onnx_file.tellg();
         onnx_file.seekg(0, std::ios::beg);
         std::vector<char> onnx_buf(file_size);
         if (!onnx_file.read(onnx_buf.data(), onnx_buf.size()))
         {
-            cerr << "ERROR: Failed to read from file " << onnxModelFile << endl;
+            cerr << "ERROR: Failed to read from file: " << onnxModelFile << endl;
             return false;
         }
         if (!parse(onnx_buf.data(), onnx_buf.size()))
         {
-            int nerror = getNbErrors();
-            for (int i = 0; i < nerror; ++i)
+            const int32_t nerror = getNbErrors();
+            for (int32_t i = 0; i < nerror; ++i)
             {
                 nvonnxparser::IParserError const* error = getError(i);
                 if (error->node() != -1)
                 {
                     ::ONNX_NAMESPACE::NodeProto const& node = onnx_model.graph().node(error->node());
                     cerr << "While parsing node number " << error->node() << " [" << node.op_type();
-                    if (node.output().size() && verbosity >= (int) nvinfer1::ILogger::Severity::kVERBOSE)
+                    if (node.output().size() && verbosity >= static_cast<int32_t>(nvinfer1::ILogger::Severity::kVERBOSE))
                     {
                         cerr << " -> \"" << node.output(0) << "\"";
                     }
                     cerr << "]:" << endl;
-                    if (verbosity >= (int) nvinfer1::ILogger::Severity::kVERBOSE)
+                    if (verbosity >= static_cast<int32_t>(nvinfer1::ILogger::Severity::kVERBOSE))
                     {
                         cout << "--- Begin node ---" << endl;
                         cout << node << endl;
@@ -708,7 +713,7 @@ bool ModelImporter::parseFromFile(const char* onnxModelFile, int verbosity)
             return false;
         }
 
-        if (verbosity >= (int) nvinfer1::ILogger::Severity::kVERBOSE)
+        if (verbosity >= static_cast<int32_t>(nvinfer1::ILogger::Severity::kVERBOSE))
         {
             cout << " ----- Parsing of ONNX model " << onnxModelFile << " is Done ---- " << endl;
         }