~suntianyu/baseDetector.git

parent: 4eb29b8e | 补丁 | 提交 | ignore whitespace

Scheaven

2021-09-08 ec3cf462234c96bf9d6c648db3e8fc1d781b2fe7

update

3个文件已添加

8个文件已修改

	config.json	3 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	readme.txt	4 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	src/config.h	2 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	src/detecter_tools/calibrator.cpp	114 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	src/detecter_tools/calibrator.h	62 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	src/detecter_tools/detector.cpp	13 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	src/detecter_tools/model.cpp	524 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	src/detecter_tools/model.h	13 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	src/detecter_tools/trt_utils.cpp	580 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	src/detecter_tools/trt_utils.h	76 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	src/h_interface.cpp	6 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史

 config.json

@@ -3,6 +3,9 @@
  "runtime": "/opt/vasystem/libs/Detect:/usr/local/cuda-11.1/lib64:",

  "param": {

    "model_path": "/opt/vasystem/bin/models/baseDetector/baseDetector.bin",

    "model_cfg": "/opt/vasystem/bin/models/baseDetector/baseDetector.cfg", // para里边自己算法可能用到的参数

    "model_wts": "/opt/vasystem/bin/models/baseDetector/baseDetector.weights", // para里边自己算法可能用到的参数

    //"model_path": "/data/disk1/project/model_dump/02_yolo/baseDetetor_small.bin", // para里边自己算法可能用到的参数

    "type":1,

    "max_cam_num": 8,

    "wander_time": 5,


 readme.txt

New file
@@ -0,0 +1,4 @@
config.json 是配置信息的文件
model_path 是转化号的trt模型路径
如果没有转化好的模型文件，需要配置model_cfg：cfg文件和model_wts：weights权重文件。如果model_path存在则不需要
"type"字段如果是1，则为正常版本，如果为2，则是tiny版本

 src/config.h

@@ -51,6 +51,8 @@
typedef struct m_staticStruct

{

    static std::string model_path;

    static std::string model_cfg;

    static std::string model_wts;

    static int type;

    static bool isTrack;

    static int max_cam_num;


 src/detecter_tools/calibrator.cpp

New file
@@ -0,0 +1,114 @@
/**
MIT License

Copyright (c) 2018 NVIDIA CORPORATION. All rights reserved.

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
*
*/

#include "calibrator.h"
#include <fstream>
#include <iostream>
#include <iterator>

Int8EntropyCalibrator::Int8EntropyCalibrator(const uint32_t& batchSize, const std::string& calibImages,
    const std::string& calibImagesPath,
    const std::string& calibTableFilePath,
    const uint64_t& inputSize, const uint32_t& inputH,
    const uint32_t& inputW, const std::string& inputBlobName,
    const std::string &s_net_type_) :
    m_BatchSize(batchSize),
    m_InputH(inputH),
    m_InputW(inputW),
    m_InputSize(inputSize),
    m_InputCount(batchSize * inputSize),
    m_InputBlobName(inputBlobName),
    m_CalibTableFilePath(calibTableFilePath),
    m_ImageIndex(0),
    _s_net_type(s_net_type_)
{
    if (!fileExists(m_CalibTableFilePath, false))
    {
        m_ImageList = loadImageList(calibImages, calibImagesPath);
        m_ImageList.resize(static_cast<int>(m_ImageList.size() / m_BatchSize) * m_BatchSize);
        std::random_shuffle(m_ImageList.begin(), m_ImageList.end(),
                            [](int i) { return rand() % i; });
    }

    NV_CUDA_CHECK(cudaMalloc(&m_DeviceInput, m_InputCount * sizeof(float)));
}

Int8EntropyCalibrator::~Int8EntropyCalibrator() { NV_CUDA_CHECK(cudaFree(m_DeviceInput)); }

bool Int8EntropyCalibrator::getBatch(void* bindings[], const char* names[], int nbBindings)
{
    if (m_ImageIndex + m_BatchSize >= m_ImageList.size()) return false;

    // Load next batch
    std::vector<DsImage> dsImages(m_BatchSize);
    for (uint32_t j = m_ImageIndex; j < m_ImageIndex + m_BatchSize; ++j)
    {
        dsImages.at(j - m_ImageIndex) = DsImage(m_ImageList.at(j), _s_net_type, m_InputH, m_InputW);
    }
    m_ImageIndex += m_BatchSize;

    cv::Mat trtInput = blobFromDsImages(dsImages, m_InputH, m_InputW);

    NV_CUDA_CHECK(cudaMemcpy(m_DeviceInput, trtInput.ptr<float>(0), m_InputCount * sizeof(float),
                             cudaMemcpyHostToDevice));
    assert(!strcmp(names[0], m_InputBlobName.c_str()));
    bindings[0] = m_DeviceInput;
    return true;
}

const void* Int8EntropyCalibrator::readCalibrationCache(size_t& length)
{
    void* output;
    m_CalibrationCache.clear();
    assert(!m_CalibTableFilePath.empty());
    std::ifstream input(m_CalibTableFilePath, std::ios::binary | std::ios::in);
    input >> std::noskipws;
    if (m_ReadCache && input.good())
        std::copy(std::istream_iterator<char>(input), std::istream_iterator<char>(),
                  std::back_inserter(m_CalibrationCache));

    length = m_CalibrationCache.size();
    if (length)
    {
        std::cout << "Using cached calibration table to build the engine" << std::endl;
        output = &m_CalibrationCache[0];
    }

    else
    {
        std::cout << "New calibration table will be created to build the engine" << std::endl;
        output = nullptr;
    }

    return output;
}

void Int8EntropyCalibrator::writeCalibrationCache(const void* cache, size_t length)
{
    assert(!m_CalibTableFilePath.empty());
    std::ofstream output(m_CalibTableFilePath, std::ios::binary);
    output.write(reinterpret_cast<const char*>(cache), length);
    output.close();
}

 src/detecter_tools/calibrator.h

New file
@@ -0,0 +1,62 @@
/**
MIT License

Copyright (c) 2018 NVIDIA CORPORATION. All rights reserved.

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
*
*/
#ifndef _CALIBRATOR_H_
#define _CALIBRATOR_H_

#include "NvInfer.h"
#include "ds_image.h"
#include "trt_utils.h"

class Int8EntropyCalibrator : public nvinfer1::IInt8EntropyCalibrator2
{
public:
    Int8EntropyCalibrator(const uint32_t& batchSize, const std::string& calibImages,
                          const std::string& calibImagesPath, const std::string& calibTableFilePath,
                          const uint64_t& inputSize, const uint32_t& inputH, const uint32_t& inputW,
              const std::string& inputBlobName,const std::string &s_net_type_);
    virtual ~Int8EntropyCalibrator();

    int getBatchSize() const override { return m_BatchSize; }
    bool getBatch(void* bindings[], const char* names[], int nbBindings) override;
    const void* readCalibrationCache(size_t& length) override;
    void writeCalibrationCache(const void* cache, size_t length) override;

private:
    const uint32_t m_BatchSize;
    const uint32_t m_InputH;
    const uint32_t m_InputW;
    const uint64_t m_InputSize;
    const uint64_t m_InputCount;
    const std::string m_InputBlobName;
  const std::string _s_net_type;
    const std::string m_CalibTableFilePath{nullptr};
    uint32_t m_ImageIndex;
    bool m_ReadCache{true};
    void* m_DeviceInput{nullptr};
    std::vector<std::string> m_ImageList;
    std::vector<char> m_CalibrationCache;
};

#endif

 src/detecter_tools/detector.cpp

@@ -31,10 +31,10 @@
        vec_ds_images.emplace_back(img, _vec_net_type[_config.net_type], _p_net->getInputH(), _p_net->getInputW());
    }
    cv::Mat trtInput = blobFromDsImages(vec_ds_images, _p_net->getInputH(),_p_net->getInputW());
    timer.out("pre");
    timer.out("eve pre detect ");

    _p_net->doInference(trtInput.data, vec_ds_images.size());
    timer.reset();
    _p_net->doInference(trtInput.data, vec_ds_images.size());
    for (uint32_t i = 0; i < vec_ds_images.size(); ++i)
    {
        auto curImage = vec_ds_images.at(i);
@@ -65,9 +65,8 @@
        }
        vec_batch_result[i] = vec_result;
    }
    timer.out("post");
    timer.out("eve pre detect post");
    DEBUG("--detect over--" );

}

void Detector::set_gpu_id(const int id)
@@ -95,9 +94,9 @@

void Detector::build_net()
{
    if(_config.net_type == SMALL)
        _p_net = std::unique_ptr<Detecter>{new Detecter(_info,_infer_param,2)};
    else{
    if(_config.net_type == COMMON)
        _p_net = std::unique_ptr<Detecter>{new Detecter(_info,_infer_param,1)};
    else{
        _p_net = std::unique_ptr<Detecter>{new Detecter(_info,_infer_param,2)};
    }
}

 src/detecter_tools/model.cpp

@@ -11,6 +11,7 @@


Detecter::Detecter( const NetworkInfo& networkInfo, const InferParams& inferParams, int type) :
    m_NetworkType(networkInfo.networkType),
    m_InputBlobName(networkInfo.inputBlobName),
    m_InputH(416),
    m_InputW(416),
@@ -26,10 +27,17 @@
    m_Context(nullptr),
    m_InputBindingIndex(-1),
    m_CudaStream(nullptr),
    m_PluginFactory(new PluginFactory)
    m_PluginFactory(new PluginFactory),
    m_TinyMaxpoolPaddingFormula(new YoloTinyMaxpoolPaddingFormula)
{
    setOutput(type);
    m_EnginePath = m_staticStruct::model_path;
    if(!fileExists(m_EnginePath))
    {
        m_configBlocks = parseConfigFile(m_staticStruct::model_cfg);
        parseConfigBlocks();
        createYOLOEngine();
    }
    setOutput(type);
    DEBUG((boost::format("m_EnginePath:%s")%m_EnginePath).str());
    assert(m_PluginFactory != nullptr);
    m_Engine = loadTRTEngine(m_EnginePath, m_PluginFactory, m_Logger);
@@ -67,7 +75,482 @@
        m_PluginFactory = nullptr;
    }

    // m_TinyMaxpoolPaddingFormula.reset();
    m_TinyMaxpoolPaddingFormula.reset();
}

std::vector<std::map<std::string, std::string>> Detecter::parseConfigFile(const std::string cfgFilePath)
{
    std::cout << "::::::::::" << cfgFilePath <<std::endl;
    assert(fileExists(cfgFilePath));
    std::ifstream file(cfgFilePath);
    assert(file.good());
    std::string line;
    std::vector<std::map<std::string, std::string>> blocks;
    std::map<std::string, std::string> block;

    while (getline(file, line))
    {
        if (line.empty()) continue;
        if (line.front() == '#') continue;
        line = trim(line);
        if (line.front() == '[')
        {
            if (!block.empty())
            {
                blocks.push_back(block);
                block.clear();
            }
            std::string key = "type";
            std::string value = trim(line.substr(1, line.size() - 2));
            block.insert(std::pair<std::string, std::string>(key, value));
        }
        else
        {
            size_t cpos = line.find('=');
            std::string key = trim(line.substr(0, cpos));
            std::string value = trim(line.substr(cpos + 1));
            block.insert(std::pair<std::string, std::string>(key, value));
        }
    }
    blocks.push_back(block);
    return blocks;
}

void Detecter::parseConfigBlocks()
{
    for (auto block : m_configBlocks)
    {
        if (block.at("type") == "net")
        {
            assert((block.find("height") != block.end())
                   && "Missing 'height' param in network cfg");
            assert((block.find("width") != block.end()) && "Missing 'width' param in network cfg");
            assert((block.find("channels") != block.end())
                   && "Missing 'channels' param in network cfg");
            assert((block.find("batch") != block.end())
                   && "Missing 'batch' param in network cfg");

            m_InputH = std::stoul(trim(block.at("height")));
            m_InputW = std::stoul(trim(block.at("width")));
            m_InputC = std::stoul(trim(block.at("channels")));
            m_BatchSize = std::stoi(trim(block.at("batch")));
         //   assert(m_InputW == m_InputH);
            m_InputSize = m_InputC * m_InputH * m_InputW;
        }
        else if ((block.at("type") == "region") || (block.at("type") == "yolo"))
        {
            assert((block.find("num") != block.end())
                   && std::string("Missing 'num' param in " + block.at("type") + " layer").c_str());
            assert((block.find("classes") != block.end())
                   && std::string("Missing 'classes' param in " + block.at("type") + " layer")
                          .c_str());
            assert((block.find("anchors") != block.end())
                   && std::string("Missing 'anchors' param in " + block.at("type") + " layer")
                          .c_str());

            TensorInfo outputTensor;
            std::string anchorString = block.at("anchors");
            while (!anchorString.empty())
            {
                size_t npos = anchorString.find_first_of(',');
                if (npos != std::string::npos)
                {
                    float anchor = std::stof(trim(anchorString.substr(0, npos)));
                    outputTensor.anchors.push_back(anchor);
                    anchorString.erase(0, npos + 1);
                }
                else
                {
                    float anchor = std::stof(trim(anchorString));
                    outputTensor.anchors.push_back(anchor);
                    break;
                }
            }

            assert((block.find("mask") != block.end())
                   && std::string("Missing 'mask' param in " + block.at("type") + " layer")
                          .c_str());

            std::string maskString = block.at("mask");
            while (!maskString.empty())
            {
                size_t npos = maskString.find_first_of(',');
                if (npos != std::string::npos)
                {
                    uint32_t mask = std::stoul(trim(maskString.substr(0, npos)));
                    outputTensor.masks.push_back(mask);
                    maskString.erase(0, npos + 1);
                }
                else
                {
                    uint32_t mask = std::stoul(trim(maskString));
                    outputTensor.masks.push_back(mask);
                    break;
                }
            }

            outputTensor.numBBoxes = outputTensor.masks.size() > 0
                ? outputTensor.masks.size()
                : std::stoul(trim(block.at("num")));
            outputTensor.numClasses = std::stoul(block.at("classes"));
            if (m_ClassNames.empty())
            {
                for (uint32_t i=0;i< outputTensor.numClasses;++i)
                {
                    m_ClassNames.push_back(std::to_string(i));
                }
            }
            outputTensor.blobName = "yolo_" + std::to_string(_n_yolo_ind);
            outputTensor.gridSize = (m_InputH / 32) * pow(2, _n_yolo_ind);
            outputTensor.grid_h = (m_InputH / 32) * pow(2, _n_yolo_ind);
            outputTensor.grid_w = (m_InputW / 32) * pow(2, _n_yolo_ind);
            outputTensor.gridSize = (m_InputH / 32) * pow(2, 2-_n_yolo_ind);
            outputTensor.grid_h = (m_InputH / 32) * pow(2, 2-_n_yolo_ind);
            outputTensor.grid_w = (m_InputW / 32) * pow(2, 2-_n_yolo_ind);
            outputTensor.stride = m_InputH / outputTensor.gridSize;
            outputTensor.stride_h = m_InputH / outputTensor.grid_h;
            outputTensor.stride_w = m_InputW / outputTensor.grid_w;
            outputTensor.volume = outputTensor.grid_h* outputTensor.grid_w
                *(outputTensor.numBBoxes*(5 + outputTensor.numClasses));
            m_OutputTensors.push_back(outputTensor);
            _n_yolo_ind++;
        }
    }
}

void Detecter::createYOLOEngine(const nvinfer1::DataType dataType, Int8EntropyCalibrator* calibrator)
{
    if (fileExists(m_EnginePath))return;
    std::vector<float> weights = loadWeights(m_staticStruct::model_wts, m_NetworkType);
    std::vector<nvinfer1::Weights> trtWeights;
    int weightPtr = 0;
    int channels = m_InputC;
    m_Builder = nvinfer1::createInferBuilder(m_Logger);
    nvinfer1::IBuilderConfig* config = m_Builder->createBuilderConfig();
    m_Network = m_Builder->createNetworkV2(0U);
    if ((dataType == nvinfer1::DataType::kINT8 && !m_Builder->platformHasFastInt8())
        || (dataType == nvinfer1::DataType::kHALF && !m_Builder->platformHasFastFp16()))
    {
        std::cout << "Platform doesn't support this precision." << std::endl;
        assert(0);
    }

    nvinfer1::ITensor* data = m_Network->addInput(
        m_InputBlobName.c_str(), nvinfer1::DataType::kFLOAT,
        nvinfer1::DimsCHW{static_cast<int>(m_InputC), static_cast<int>(m_InputH),
                          static_cast<int>(m_InputW)});
    assert(data != nullptr);
    // Add elementwise layer to normalize pixel values 0-1
    nvinfer1::Dims divDims{
        3,
        {static_cast<int>(m_InputC), static_cast<int>(m_InputH), static_cast<int>(m_InputW)},
        {nvinfer1::DimensionType::kCHANNEL, nvinfer1::DimensionType::kSPATIAL,
         nvinfer1::DimensionType::kSPATIAL}};
    nvinfer1::Weights divWeights{nvinfer1::DataType::kFLOAT, nullptr,
                                 static_cast<int64_t>(m_InputSize)};
    float* divWt = new float[m_InputSize];
    for (uint32_t w = 0; w < m_InputSize; ++w) divWt[w] = 255.0;
    divWeights.values = divWt;
    trtWeights.push_back(divWeights);
    nvinfer1::IConstantLayer* constDivide = m_Network->addConstant(divDims, divWeights);
    assert(constDivide != nullptr);
    nvinfer1::IElementWiseLayer* elementDivide = m_Network->addElementWise(
        *data, *constDivide->getOutput(0), nvinfer1::ElementWiseOperation::kDIV);
    assert(elementDivide != nullptr);

    nvinfer1::ITensor* previous = elementDivide->getOutput(0);
    std::vector<nvinfer1::ITensor*> tensorOutputs;
    uint32_t outputTensorCount = 0;

    // build the network using the network API
    for (uint32_t i = 0; i < m_configBlocks.size(); ++i)
    {
        // check if num. of channels is correct
        assert(getNumChannels(previous) == channels);
        std::string layerIndex = "(" + std::to_string(i) + ")";

        if (m_configBlocks.at(i).at("type") == "net")
        {
            printLayerInfo("", "layer", "     inp_size", "     out_size", "weightPtr");
        }
        else if (m_configBlocks.at(i).at("type") == "convolutional")
        {
            std::string inputVol = dimsToString(previous->getDimensions());
            nvinfer1::ILayer* out;
            std::string layerType;
            //check activation
            std::string activation = "";
            if (m_configBlocks.at(i).find("activation") != m_configBlocks.at(i).end())
            {
                activation = m_configBlocks[i]["activation"];
            }
            // check if batch_norm enabled
            if ((m_configBlocks.at(i).find("batch_normalize") != m_configBlocks.at(i).end()) &&
                ("leaky" == activation))
            {
                out = netAddConvBNLeaky(i, m_configBlocks.at(i), weights, trtWeights, weightPtr,
                                        channels, previous, m_Network);
                layerType = "conv-bn-leaky";
            }
            else if ((m_configBlocks.at(i).find("batch_normalize") != m_configBlocks.at(i).end()) &&
                ("mish" == activation))
            {
                out = net_conv_bn_mish(i, m_configBlocks.at(i), weights, trtWeights, weightPtr,
                                        channels, previous, m_Network);
                layerType = "conv-bn-mish";
            }
            else// if("linear" == activation)
            {
                out = netAddConvLinear(i, m_configBlocks.at(i), weights, trtWeights, weightPtr,
                                       channels, previous, m_Network);
                layerType = "conv-linear";
            }
            previous = out->getOutput(0);
            assert(previous != nullptr);
            channels = getNumChannels(previous);
            std::string outputVol = dimsToString(previous->getDimensions());
            tensorOutputs.push_back(out->getOutput(0));
            printLayerInfo(layerIndex, layerType, inputVol, outputVol, std::to_string(weightPtr));
        }
        else if (m_configBlocks.at(i).at("type") == "shortcut")
        {
            assert(m_configBlocks.at(i).at("activation") == "linear");
            assert(m_configBlocks.at(i).find("from") != m_configBlocks.at(i).end());
            int from = stoi(m_configBlocks.at(i).at("from"));

            std::string inputVol = dimsToString(previous->getDimensions());
            // check if indexes are correct
            assert((i - 2 >= 0) && (i - 2 < tensorOutputs.size()));
            assert((i + from - 1 >= 0) && (i + from - 1 < tensorOutputs.size()));
            assert(i + from - 1 < i - 2);
            nvinfer1::IElementWiseLayer* ew
                = m_Network->addElementWise(*tensorOutputs[i - 2], *tensorOutputs[i + from - 1],
                                            nvinfer1::ElementWiseOperation::kSUM);
            assert(ew != nullptr);
            std::string ewLayerName = "shortcut_" + std::to_string(i);
            ew->setName(ewLayerName.c_str());
            previous = ew->getOutput(0);
            assert(previous != nullptr);
            std::string outputVol = dimsToString(previous->getDimensions());
            tensorOutputs.push_back(ew->getOutput(0));
            printLayerInfo(layerIndex, "skip", inputVol, outputVol, "    -");
        }
        else if (m_configBlocks.at(i).at("type") == "yolo")
        {
            nvinfer1::Dims prevTensorDims = previous->getDimensions();
           // assert(prevTensorDims.d[1] == prevTensorDims.d[2]);
            TensorInfo& curYoloTensor = m_OutputTensors.at(outputTensorCount);
            curYoloTensor.gridSize = prevTensorDims.d[1];
            curYoloTensor.grid_h = prevTensorDims.d[1];
            curYoloTensor.grid_w = prevTensorDims.d[2];
            curYoloTensor.stride = m_InputW / curYoloTensor.gridSize;
            curYoloTensor.stride_h = m_InputH / curYoloTensor.grid_h;
            curYoloTensor.stride_w = m_InputW / curYoloTensor.grid_w;
            m_OutputTensors.at(outputTensorCount).volume = curYoloTensor.grid_h
                * curYoloTensor.grid_w
                * (curYoloTensor.numBBoxes * (5 + curYoloTensor.numClasses));
            std::string layerName = "yolo_" + std::to_string(outputTensorCount);
            curYoloTensor.blobName = layerName;
            nvinfer1::IPlugin* yoloPlugin
                = new SLayerV3(m_OutputTensors.at(outputTensorCount).numBBoxes,
                                  m_OutputTensors.at(outputTensorCount).numClasses,
                                  m_OutputTensors.at(outputTensorCount).grid_h,
                                  m_OutputTensors.at(outputTensorCount).grid_w);
            assert(yoloPlugin != nullptr);
            nvinfer1::IPluginLayer* yolo = m_Network->addPlugin(&previous, 1, *yoloPlugin);
            assert(yolo != nullptr);
            yolo->setName(layerName.c_str());
            std::string inputVol = dimsToString(previous->getDimensions());
            previous = yolo->getOutput(0);
            assert(previous != nullptr);
            previous->setName(layerName.c_str());
            std::string outputVol = dimsToString(previous->getDimensions());
            m_Network->markOutput(*previous);
            channels = getNumChannels(previous);
            tensorOutputs.push_back(yolo->getOutput(0));
            printLayerInfo(layerIndex, "yolo", inputVol, outputVol, std::to_string(weightPtr));
            ++outputTensorCount;
        }
        else if (m_configBlocks.at(i).at("type") == "route")
        {
            size_t found = m_configBlocks.at(i).at("layers").find(",");
            if (found != std::string::npos)//concate multi layers
            {
                std::vector<int> vec_index = split_layer_index(m_configBlocks.at(i).at("layers"), ",");
                for (auto &ind_layer:vec_index)
                {
                    if (ind_layer < 0)
                    {
                        ind_layer = static_cast<int>(tensorOutputs.size()) + ind_layer;
                    }
                    assert(ind_layer < static_cast<int>(tensorOutputs.size()) && ind_layer >= 0);
                }
                nvinfer1::ITensor** concatInputs
                    = reinterpret_cast<nvinfer1::ITensor**>(malloc(sizeof(nvinfer1::ITensor*) * vec_index.size()));
                for (size_t ind = 0; ind < vec_index.size(); ++ind)
                {
                    concatInputs[ind] = tensorOutputs[vec_index[ind]];
                }
                nvinfer1::IConcatenationLayer* concat
                    = m_Network->addConcatenation(concatInputs, static_cast<int>(vec_index.size()));
                assert(concat != nullptr);
                std::string concatLayerName = "route_" + std::to_string(i - 1);
                concat->setName(concatLayerName.c_str());
                // concatenate along the channel dimension
                concat->setAxis(0);
                previous = concat->getOutput(0);
                assert(previous != nullptr);
                nvinfer1::Dims debug = previous->getDimensions();
                std::string outputVol = dimsToString(previous->getDimensions());
                int nums = 0;
                for (auto &indx:vec_index)
                {
                    nums += getNumChannels(tensorOutputs[indx]);
                }
                channels = nums;
                tensorOutputs.push_back(concat->getOutput(0));
                printLayerInfo(layerIndex, "route", "        -", outputVol,std::to_string(weightPtr));
            }
            else //single layer
            {
                int idx = std::stoi(trim(m_configBlocks.at(i).at("layers")));
                if (idx < 0)
                {
                    idx = static_cast<int>(tensorOutputs.size()) + idx;
                }
                assert(idx < static_cast<int>(tensorOutputs.size()) && idx >= 0);

                //route
                if (m_configBlocks.at(i).find("groups") == m_configBlocks.at(i).end())
                {
                    previous = tensorOutputs[idx];
                    assert(previous != nullptr);
                    std::string outputVol = dimsToString(previous->getDimensions());
                    // set the output volume depth
                    channels = getNumChannels(tensorOutputs[idx]);
                    tensorOutputs.push_back(tensorOutputs[idx]);
                    printLayerInfo(layerIndex, "route", "        -", outputVol, std::to_string(weightPtr));

                }
                //yolov4-tiny route split layer
                else
                {
                    if (m_configBlocks.at(i).find("group_id") == m_configBlocks.at(i).end())
                    {
                        assert(0);
                    }
                    int chunk_idx = std::stoi(trim(m_configBlocks.at(i).at("group_id")));
                    nvinfer1::ILayer* out = layer_split(i, tensorOutputs[idx], m_Network);
                    std::string inputVol = dimsToString(previous->getDimensions());
                    previous = out->getOutput(chunk_idx);
                    assert(previous != nullptr);
                    channels = getNumChannels(previous);
                    std::string outputVol = dimsToString(previous->getDimensions());
                    tensorOutputs.push_back(out->getOutput(chunk_idx));
                    printLayerInfo(layerIndex,"chunk", inputVol, outputVol, std::to_string(weightPtr));
                }
            }
        }
        else if (m_configBlocks.at(i).at("type") == "upsample")
        {
            std::string inputVol = dimsToString(previous->getDimensions());
            nvinfer1::ILayer* out = netAddUpsample(i - 1, m_configBlocks[i], weights, trtWeights,
                                                   channels, previous, m_Network);
            previous = out->getOutput(0);
            std::string outputVol = dimsToString(previous->getDimensions());
            tensorOutputs.push_back(out->getOutput(0));
            printLayerInfo(layerIndex, "upsample", inputVol, outputVol, "    -");
        }
        else if (m_configBlocks.at(i).at("type") == "maxpool")
        {
            // Add same padding layers
            if (m_configBlocks.at(i).at("size") == "2" && m_configBlocks.at(i).at("stride") == "1")
            {
                m_TinyMaxpoolPaddingFormula->addSamePaddingLayer("maxpool_" + std::to_string(i));
            }
            std::string inputVol = dimsToString(previous->getDimensions());
            nvinfer1::ILayer* out = netAddMaxpool(i, m_configBlocks.at(i), previous, m_Network);
            previous = out->getOutput(0);
            assert(previous != nullptr);
            std::string outputVol = dimsToString(previous->getDimensions());
            tensorOutputs.push_back(out->getOutput(0));
            printLayerInfo(layerIndex, "maxpool", inputVol, outputVol, std::to_string(weightPtr));
        }
        else
        {
            std::cout << "Unsupported layer type --> \"" << m_configBlocks.at(i).at("type") << "\""
                      << std::endl;
            assert(0);
        }
    }

    if (static_cast<int>(weights.size()) != weightPtr)
    {
        std::cout << "Number of unused weights left : " << static_cast<int>(weights.size()) - weightPtr << std::endl;
        assert(0);
    }

 //   std::cout << "Output blob names :" << std::endl;
 //   for (auto& tensor : m_OutputTensors) std::cout << tensor.blobName << std::endl;

    // Create and cache the engine if not already present
    if (fileExists(m_EnginePath))
    {
        std::cout << "Using previously generated plan file located at " << m_EnginePath
                  << std::endl;
        destroyNetworkUtils(trtWeights);
        return;
    }

    /*std::cout << "Unable to find cached TensorRT engine for network : " << m_NetworkType
              << " precision : " << m_Precision << " and batch size :" << m_BatchSize << std::endl;*/

    m_Builder->setMaxBatchSize(m_BatchSize);
    //m_Builder->setMaxWorkspaceSize(1 << 20);

    config->setMaxWorkspaceSize(1 << 20);
    if (dataType == nvinfer1::DataType::kINT8)
    {
        assert((calibrator != nullptr) && "Invalid calibrator for INT8 precision");
      //  m_Builder->setInt8Mode(true);
        config->setFlag(nvinfer1::BuilderFlag::kINT8);
     //   m_Builder->setInt8Calibrator(calibrator);
        config->setInt8Calibrator(calibrator);
    //  config->setTacticSources(1U << static_cast<uint32_t>(TacticSource::kCUBLAS) | 1U << static_cast<uint32_t>(TacticSource::kCUBLAS_LT));
    }
    else if (dataType == nvinfer1::DataType::kHALF)
    {
        config->setFlag(nvinfer1::BuilderFlag::kFP16);
     //   m_Builder->setHalf2Mode(true);
    }

    m_Builder->allowGPUFallback(true);
    int nbLayers = m_Network->getNbLayers();
    int layersOnDLA = 0;
 //   std::cout << "Total number of layers: " << nbLayers << std::endl;
    for (int i = 0; i < nbLayers; i++)
    {
        nvinfer1::ILayer* curLayer = m_Network->getLayer(i);
        if (m_DeviceType == "kDLA" && m_Builder->canRunOnDLA(curLayer))
        {
            m_Builder->setDeviceType(curLayer, nvinfer1::DeviceType::kDLA);
            layersOnDLA++;
            std::cout << "Set layer " << curLayer->getName() << " to run on DLA" << std::endl;
        }
    }
 //   std::cout << "Total number of layers on DLA: " << layersOnDLA << std::endl;

    // Build the engine
    std::cout << "Building the TensorRT Engine..." << std::endl;
    m_Engine = m_Builder->buildEngineWithConfig(*m_Network,*config);
    assert(m_Engine != nullptr);
    std::cout << "Building complete!" << std::endl;

    // Serialize the engine
    writePlanFileToDisk();

    // destroy
    destroyNetworkUtils(trtWeights);
}

void Detecter::doInference(const unsigned char* input, const uint32_t batchSize)
@@ -268,10 +751,10 @@
void Detecter::setOutput(int type)
{
    m_OutputTensors.clear();
    printf("0-0-0-0-0-0------------------%d",type);
    if(type==2)
        for (int i = 0; i < 2; ++i)
        {

            TensorInfo outputTensor;
            outputTensor.numClasses = CLASS_BUM;
            outputTensor.blobName = "yolo_" + std::to_string(i);
@@ -323,7 +806,17 @@
        {
            TensorInfo outputTensor;
            outputTensor.numClasses = CLASS_BUM;
            outputTensor.blobName = "yolo_" + std::to_string(i);
            outputTensor.blobName = "yolo_" + to_string(i);
            // if (i==0)
            // {
            //     outputTensor.blobName = "139_convolutional_reshape_2";
            // }else if (i==1)
            // {
            //     outputTensor.blobName = "150_convolutional_reshape_2";
            // }else if (i==2)
            // {
            //     outputTensor.blobName = "161_convolutional_reshape_2";
            // }
            outputTensor.gridSize = (m_InputH / 32) * pow(2, 2-i);
            outputTensor.grid_h = (m_InputH / 32) * pow(2, 2-i);
            outputTensor.grid_w = (m_InputW / 32) * pow(2, 2-i);
@@ -380,3 +873,24 @@
            m_OutputTensors.push_back(outputTensor);
        }
}

void Detecter::writePlanFileToDisk()
{
    std::cout << "Serializing the TensorRT Engine..." << std::endl;
    assert(m_Engine && "Invalid TensorRT Engine");
    m_ModelStream = m_Engine->serialize();
    assert(m_ModelStream && "Unable to serialize engine");
    assert(!m_EnginePath.empty() && "Enginepath is empty");

    // write data to output file
    std::stringstream gieModelStream;
    gieModelStream.seekg(0, gieModelStream.beg);
    gieModelStream.write(static_cast<const char*>(m_ModelStream->data()), m_ModelStream->size());
    std::ofstream outFile;
    outFile.open(m_EnginePath, std::ios::binary | std::ios::out);
    outFile << gieModelStream.rdbuf();
    outFile.close();

    std::cout << "Serialized plan file cached at location : " << m_EnginePath << std::endl;
}


 src/detecter_tools/model.h

@@ -3,7 +3,7 @@

#include "plugin_factory.h"
#include "trt_utils.h"

#include "calibrator.h"
#include "NvInfer.h"
#include "NvInferPlugin.h"
#include "NvInferRuntimeCommon.h"
@@ -16,6 +16,7 @@
#include "../utils/time_util.h"
#include "../config.h"
#include "opencv2/opencv.hpp"
#include <numeric>

struct NetworkInfo
{
@@ -78,6 +79,7 @@
    const std::string m_DeviceType;
    const std::string m_InputBlobName;
    std::vector<TensorInfo> m_OutputTensors;
    std::vector<std::map<std::string, std::string>> m_configBlocks;
    uint32_t m_InputH;
    uint32_t m_InputW;
    uint32_t m_InputC;
@@ -172,12 +174,21 @@
    void setOutput(int type);
private:
    Logger m_Logger;
    void createYOLOEngine(const nvinfer1::DataType dataType = nvinfer1::DataType::kFLOAT,
                          Int8EntropyCalibrator* calibrator = nullptr);
    void writePlanFileToDisk();
    std::vector<std::map<std::string, std::string>> parseConfigFile(const std::string cfgFilePath);
    void parseConfigBlocks();
    void allocateBuffers();
    bool verifyEngine();
    void destroyNetworkUtils(std::vector<nvinfer1::Weights>& trtWeights);
protected:
    const std::string m_NetworkType;
    std::unique_ptr<YoloTinyMaxpoolPaddingFormula> m_TinyMaxpoolPaddingFormula;

private:
    Timer _timer;
    int _n_yolo_ind = 0;
};

#endif

 src/detecter_tools/trt_utils.cpp

@@ -21,34 +21,34 @@
                                   cv::Scalar(0.0, 0.0, 0.0),true);
}

// static void leftTrim(std::string& s)
// {
//     s.erase(s.begin(), find_if(s.begin(), s.end(), [](int ch) { return !isspace(ch); }));
// }
static void leftTrim(std::string& s)
{
    s.erase(s.begin(), find_if(s.begin(), s.end(), [](int ch) { return !isspace(ch); }));
}

// static void rightTrim(std::string& s)
// {
//     s.erase(find_if(s.rbegin(), s.rend(), [](int ch) { return !isspace(ch); }).base(), s.end());
// }
static void rightTrim(std::string& s)
{
    s.erase(find_if(s.rbegin(), s.rend(), [](int ch) { return !isspace(ch); }).base(), s.end());
}

// std::string trim(std::string s)
// {
//     leftTrim(s);
//     rightTrim(s);
//     return s;
// }
std::string trim(std::string s)
{
    leftTrim(s);
    rightTrim(s);
    return s;
}

// std::string triml(std::string s,const char* t)
// {
//     s.erase(0, s.find_first_not_of(t));
//     return s;
// }
std::string triml(std::string s,const char* t)
{
    s.erase(0, s.find_first_not_of(t));
    return s;
}

// std::string trimr(std::string s, const char* t)
// {
//     s.erase(s.find_last_not_of(t) + 1);
//     return s;
// }
std::string trimr(std::string s, const char* t)
{
    s.erase(s.find_last_not_of(t) + 1);
    return s;
}

float clamp(const float val, const float minVal, const float maxVal)
{
@@ -115,6 +115,305 @@
//               << " xmax:" << b.box.x2 << " ymax:" << b.box.y2 << std::endl;
// }
//

std::vector<float> loadWeights(const std::string weightsFilePath, const std::string& networkType)
{
    assert(fileExists(weightsFilePath));
    std::cout << "Loading pre-trained weights..." << std::endl;
    std::ifstream file(weightsFilePath, std::ios_base::binary);
    assert(file.good());
    std::string line;
    file.ignore(4);
    char buf[2];
    file.read(buf, 1);
    if ((int)(unsigned char)buf[0] == 1)
    {
        file.ignore(11);
    }
    else if ((int)(unsigned char)buf[0] == 2)
    {
        file.ignore(15);
    }
    else
    {
        std::cout << "Invalid network type" << std::endl;
        assert(0);
    }

    std::vector<float> weights;
    char* floatWeight = new char[4];
    while (!file.eof())
    {
        file.read(floatWeight, 4);
        assert(file.gcount() == 4);
        weights.push_back(*reinterpret_cast<float*>(floatWeight));
        if (file.peek() == std::istream::traits_type::eof()) break;
    }
    std::cout << "Loading complete!" << std::endl;
    delete[] floatWeight;

   // std::cout << "Total Number of weights read : " << weights.size() << std::endl;
    return weights;
}

std::string dimsToString(const nvinfer1::Dims d)
{
    std::stringstream s;
    assert(d.nbDims >= 1);
    for (int i = 0; i < d.nbDims - 1; ++i)
    {
        s << std::setw(4) << d.d[i] << " x";
    }
    s << std::setw(4) << d.d[d.nbDims - 1];

    return s.str();
}

nvinfer1::ILayer* netAddMaxpool(int layerIdx, std::map<std::string, std::string>& block,
                                nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network)
{
    assert(block.at("type") == "maxpool");
    assert(block.find("size") != block.end());
    assert(block.find("stride") != block.end());

    int size = std::stoi(block.at("size"));
    int stride = std::stoi(block.at("stride"));

    nvinfer1::IPoolingLayer* pool
        = network->addPoolingNd(*input, nvinfer1::PoolingType::kMAX, nvinfer1::DimsHW{size, size});
    assert(pool);
    std::string maxpoolLayerName = "maxpool_" + std::to_string(layerIdx);
    int pad = (size - 1) / 2;
    pool->setPaddingNd(nvinfer1::DimsHW{pad,pad});
    pool->setStrideNd(nvinfer1::DimsHW{stride, stride});
    pool->setName(maxpoolLayerName.c_str());

    return pool;
}

nvinfer1::ILayer* netAddConvLinear(int layerIdx, std::map<std::string, std::string>& block,
                                   std::vector<float>& weights,
                                   std::vector<nvinfer1::Weights>& trtWeights, int& weightPtr,
                                   int& inputChannels, nvinfer1::ITensor* input,
                                   nvinfer1::INetworkDefinition* network)
{
    assert(block.at("type") == "convolutional");
    assert(block.find("batch_normalize") == block.end());
    assert(block.at("activation") == "linear");
    assert(block.find("filters") != block.end());
    assert(block.find("pad") != block.end());
    assert(block.find("size") != block.end());
    assert(block.find("stride") != block.end());

    int filters = std::stoi(block.at("filters"));
    int padding = std::stoi(block.at("pad"));
    int kernelSize = std::stoi(block.at("size"));
    int stride = std::stoi(block.at("stride"));
    int pad;
    if (padding)
        pad = (kernelSize - 1) / 2;
    else
        pad = 0;
    // load the convolution layer bias
    nvinfer1::Weights convBias{nvinfer1::DataType::kFLOAT, nullptr, filters};
    float* val = new float[filters];
    for (int i = 0; i < filters; ++i)
    {
        val[i] = weights[weightPtr];
        weightPtr++;
    }
    convBias.values = val;
    trtWeights.push_back(convBias);
    // load the convolutional layer weights
    int size = filters * inputChannels * kernelSize * kernelSize;
    nvinfer1::Weights convWt{nvinfer1::DataType::kFLOAT, nullptr, size};
    val = new float[size];
    for (int i = 0; i < size; ++i)
    {
        val[i] = weights[weightPtr];
        weightPtr++;
    }
    convWt.values = val;
    trtWeights.push_back(convWt);
    nvinfer1::IConvolutionLayer* conv = network->addConvolution(
        *input, filters, nvinfer1::DimsHW{kernelSize, kernelSize}, convWt, convBias);
    assert(conv != nullptr);
    std::string convLayerName = "conv_" + std::to_string(layerIdx);
    conv->setName(convLayerName.c_str());
    conv->setStride(nvinfer1::DimsHW{stride, stride});
    conv->setPadding(nvinfer1::DimsHW{pad, pad});

    return conv;
}

nvinfer1::ILayer* net_conv_bn_mish(int layerIdx,
    std::map<std::string, std::string>& block,
    std::vector<float>& weights,
    std::vector<nvinfer1::Weights>& trtWeights,
    int& weightPtr,
    int& inputChannels,
    nvinfer1::ITensor* input,
    nvinfer1::INetworkDefinition* network)
{
    assert(block.at("type") == "convolutional");
    assert(block.find("batch_normalize") != block.end());
    assert(block.at("batch_normalize") == "1");
    assert(block.at("activation") == "mish");
    assert(block.find("filters") != block.end());
    assert(block.find("pad") != block.end());
    assert(block.find("size") != block.end());
    assert(block.find("stride") != block.end());

    bool batchNormalize, bias;
    if (block.find("batch_normalize") != block.end())
    {
        batchNormalize = (block.at("batch_normalize") == "1");
        bias = false;
    }
    else
    {
        batchNormalize = false;
        bias = true;
    }
    // all conv_bn_leaky layers assume bias is false
    assert(batchNormalize == true && bias == false);

    int filters = std::stoi(block.at("filters"));
    int padding = std::stoi(block.at("pad"));
    int kernelSize = std::stoi(block.at("size"));
    int stride = std::stoi(block.at("stride"));
    int pad;
    if (padding)
        pad = (kernelSize - 1) / 2;
    else
        pad = 0;
    std::vector<float> bnBiases;
    for (int i = 0; i < filters; ++i)
    {
        bnBiases.push_back(weights[weightPtr]);
        weightPtr++;
    }
    // load BN weights
    std::vector<float> bnWeights;
    for (int i = 0; i < filters; ++i)
    {
        bnWeights.push_back(weights[weightPtr]);
        weightPtr++;
    }
    // load BN running_mean
    std::vector<float> bnRunningMean;
    for (int i = 0; i < filters; ++i)
    {
        bnRunningMean.push_back(weights[weightPtr]);
        weightPtr++;
    }
    // load BN running_var
    std::vector<float> bnRunningVar;
    for (int i = 0; i < filters; ++i)
    {
        // 1e-05 for numerical stability
        bnRunningVar.push_back(sqrt(weights[weightPtr] + 1.0e-5f));
        weightPtr++;
    }
    // load Conv layer weights (GKCRS)
    int size = filters * inputChannels * kernelSize * kernelSize;
    nvinfer1::Weights convWt{ nvinfer1::DataType::kFLOAT, nullptr, size };
    float* val = new float[size];
    for (int i = 0; i < size; ++i)
    {
        val[i] = weights[weightPtr];
        weightPtr++;
    }
    convWt.values = val;
    trtWeights.push_back(convWt);
    nvinfer1::Weights convBias{ nvinfer1::DataType::kFLOAT, nullptr, 0 };
    trtWeights.push_back(convBias);
    nvinfer1::IConvolutionLayer* conv = network->addConvolution(
        *input, filters, nvinfer1::DimsHW{ kernelSize, kernelSize }, convWt, convBias);
    assert(conv != nullptr);
    std::string convLayerName = "conv_" + std::to_string(layerIdx);
    conv->setName(convLayerName.c_str());
    conv->setStride(nvinfer1::DimsHW{ stride, stride });
    conv->setPadding(nvinfer1::DimsHW{ pad, pad });

    /***** BATCHNORM LAYER *****/
    /***************************/
    size = filters;
    // create the weights
    nvinfer1::Weights shift{ nvinfer1::DataType::kFLOAT, nullptr, size };
    nvinfer1::Weights scale{ nvinfer1::DataType::kFLOAT, nullptr, size };
    nvinfer1::Weights power{ nvinfer1::DataType::kFLOAT, nullptr, size };
    float* shiftWt = new float[size];
    for (int i = 0; i < size; ++i)
    {
        shiftWt[i]
            = bnBiases.at(i) - ((bnRunningMean.at(i) * bnWeights.at(i)) / bnRunningVar.at(i));
    }
    shift.values = shiftWt;
    float* scaleWt = new float[size];
    for (int i = 0; i < size; ++i)
    {
        scaleWt[i] = bnWeights.at(i) / bnRunningVar[i];
    }
    scale.values = scaleWt;
    float* powerWt = new float[size];
    for (int i = 0; i < size; ++i)
    {
        powerWt[i] = 1.0;
    }
    power.values = powerWt;
    trtWeights.push_back(shift);
    trtWeights.push_back(scale);
    trtWeights.push_back(power);
    // Add the batch norm layers
    nvinfer1::IScaleLayer* bn = network->addScale(
        *conv->getOutput(0), nvinfer1::ScaleMode::kCHANNEL, shift, scale, power);
    assert(bn != nullptr);
    std::string bnLayerName = "batch_norm_" + std::to_string(layerIdx);
    bn->setName(bnLayerName.c_str());
    /***** ACTIVATION LAYER *****/
    /****************************/
    auto creator = getPluginRegistry()->getPluginCreator("Mish_TRT", "1");
    const nvinfer1::PluginFieldCollection* pluginData = creator->getFieldNames();
    nvinfer1::IPluginV2 *pluginObj = creator->createPlugin(("mish" + std::to_string(layerIdx)).c_str(), pluginData);
    nvinfer1::ITensor* inputTensors[] = { bn->getOutput(0) };
    auto mish = network->addPluginV2(&inputTensors[0], 1, *pluginObj);
    return mish;
}

int getNumChannels(nvinfer1::ITensor* t)
{
    nvinfer1::Dims d = t->getDimensions();
    assert(d.nbDims == 3);

    return d.d[0];
}

std::vector<int> split_layer_index(const std::string &s_,const std::string &delimiter_)
{
    std::vector<int> index;
    std::string s = s_;
    size_t pos = 0;
    std::string token;
    while ((pos = s.find(delimiter_)) != std::string::npos)
    {
        token = s.substr(0, pos);
        index.push_back(std::stoi(trim(token)));
        s.erase(0, pos + delimiter_.length());
    }
    index.push_back(std::stoi(trim(s)));
    return index;
}

void printLayerInfo(std::string layerIndex, std::string layerName, std::string layerInput,
                    std::string layerOutput, std::string weightPtr)
{
    std::cout << std::setw(6) << std::left << layerIndex << std::setw(15) << std::left << layerName;
    std::cout << std::setw(20) << std::left << layerInput << std::setw(20) << std::left
              << layerOutput;
    std::cout << std::setw(6) << std::left << weightPtr << std::endl;
}

uint64_t get3DTensorVolume(nvinfer1::Dims inputDims)
{
    assert(inputDims.nbDims == 3);
@@ -216,3 +515,236 @@
    }
    return out;
}

nvinfer1::ILayer* netAddUpsample(int layerIdx, std::map<std::string, std::string>& block,
                                 std::vector<float>& weights,
                                 std::vector<nvinfer1::Weights>& trtWeights, int& inputChannels,
                                 nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network)
{
    assert(block.at("type") == "upsample");
    nvinfer1::Dims inpDims = input->getDimensions();
    assert(inpDims.nbDims == 3);
   // assert(inpDims.d[1] == inpDims.d[2]);
    int n_scale = std::stoi(block.at("stride"));

    int c1 = inpDims.d[0];
    float *deval = new float[c1*n_scale*n_scale];
    for (int i = 0; i < c1*n_scale*n_scale; i++)
    {
        deval[i] = 1.0;
    }
    nvinfer1::Weights wts{ DataType::kFLOAT, deval, c1*n_scale*n_scale };
    nvinfer1::Weights bias{ DataType::kFLOAT, nullptr, 0 };
    IDeconvolutionLayer* upsample = network->addDeconvolutionNd(*input, c1, DimsHW{ n_scale, n_scale }, wts, bias);
    upsample->setStrideNd(DimsHW{ n_scale, n_scale });
    upsample->setNbGroups(c1);
    return upsample;

    #if 0
#endif
}

nvinfer1::ILayer * layer_split(const int n_layer_index_,
    nvinfer1::ITensor *input_,
    nvinfer1::INetworkDefinition* network)
{
    auto creator = getPluginRegistry()->getPluginCreator("CHUNK_TRT", "1.0");
    const nvinfer1::PluginFieldCollection* pluginData = creator->getFieldNames();
    nvinfer1::IPluginV2 *pluginObj = creator->createPlugin(("chunk" + std::to_string(n_layer_index_)).c_str(), pluginData);
    auto chunk = network->addPluginV2(&input_, 1, *pluginObj);
    return chunk;
}

nvinfer1::ILayer* netAddConvBNLeaky(int layerIdx,
                                    std::map<std::string, std::string>& block,
                                    std::vector<float>& weights,
                                    std::vector<nvinfer1::Weights>& trtWeights,
                                    int& weightPtr,
                                    int& inputChannels,
                                    nvinfer1::ITensor* input,
                                    nvinfer1::INetworkDefinition* network)
{
    assert(block.at("type") == "convolutional");
    assert(block.find("batch_normalize") != block.end());
    assert(block.at("batch_normalize") == "1");
    assert(block.at("activation") == "leaky");
    assert(block.find("filters") != block.end());
    assert(block.find("pad") != block.end());
    assert(block.find("size") != block.end());
    assert(block.find("stride") != block.end());

    bool batchNormalize, bias;
    if (block.find("batch_normalize") != block.end())
    {
        batchNormalize = (block.at("batch_normalize") == "1");
        bias = false;
    }
    else
    {
        batchNormalize = false;
        bias = true;
    }
    // all conv_bn_leaky layers assume bias is false
    assert(batchNormalize == true && bias == false);

    int filters = std::stoi(block.at("filters"));
    int padding = std::stoi(block.at("pad"));
    int kernelSize = std::stoi(block.at("size"));
    int stride = std::stoi(block.at("stride"));
    int pad;
    if (padding)
        pad = (kernelSize - 1) / 2;
    else
        pad = 0;

    /***** CONVOLUTION LAYER *****/
    /*****************************/
    // batch norm weights are before the conv layer
    // load BN biases (bn_biases)
    std::vector<float> bnBiases;
    for (int i = 0; i < filters; ++i)
    {
        bnBiases.push_back(weights[weightPtr]);
        weightPtr++;
    }
    // load BN weights
    std::vector<float> bnWeights;
    for (int i = 0; i < filters; ++i)
    {
        bnWeights.push_back(weights[weightPtr]);
        weightPtr++;
    }
    // load BN running_mean
    std::vector<float> bnRunningMean;
    for (int i = 0; i < filters; ++i)
    {
        bnRunningMean.push_back(weights[weightPtr]);
        weightPtr++;
    }
    // load BN running_var
    std::vector<float> bnRunningVar;
    for (int i = 0; i < filters; ++i)
    {
        // 1e-05 for numerical stability
        bnRunningVar.push_back(sqrt(weights[weightPtr] + 1.0e-5f));
        weightPtr++;
    }
    // load Conv layer weights (GKCRS)
    int size = filters * inputChannels * kernelSize * kernelSize;
    nvinfer1::Weights convWt{nvinfer1::DataType::kFLOAT, nullptr, size};
    float* val = new float[size];
    for (int i = 0; i < size; ++i)
    {
        val[i] = weights[weightPtr];
        weightPtr++;
    }
    convWt.values = val;
    trtWeights.push_back(convWt);
    nvinfer1::Weights convBias{nvinfer1::DataType::kFLOAT, nullptr, 0};
    trtWeights.push_back(convBias);
    nvinfer1::IConvolutionLayer* conv = network->addConvolution(
        *input,
        filters,
        nvinfer1::DimsHW{kernelSize, kernelSize},
        convWt,
        convBias);
    assert(conv != nullptr);
    std::string convLayerName = "conv_" + std::to_string(layerIdx);
    conv->setName(convLayerName.c_str());
    conv->setStride(nvinfer1::DimsHW{stride, stride});
    conv->setPadding(nvinfer1::DimsHW{pad, pad});

    /***** BATCHNORM LAYER *****/
    /***************************/
    size = filters;
    // create the weights
    nvinfer1::Weights shift{nvinfer1::DataType::kFLOAT, nullptr, size};
    nvinfer1::Weights scale{nvinfer1::DataType::kFLOAT, nullptr, size};
    nvinfer1::Weights power{nvinfer1::DataType::kFLOAT, nullptr, size};
    float* shiftWt = new float[size];
    for (int i = 0; i < size; ++i)
    {
        shiftWt[i]
            = bnBiases.at(i) - ((bnRunningMean.at(i) * bnWeights.at(i)) / bnRunningVar.at(i));
    }
    shift.values = shiftWt;
    float* scaleWt = new float[size];
    for (int i = 0; i < size; ++i)
    {
        scaleWt[i] = bnWeights.at(i) / bnRunningVar[i];
    }
    scale.values = scaleWt;
    float* powerWt = new float[size];
    for (int i = 0; i < size; ++i)
    {
        powerWt[i] = 1.0;
    }
    power.values = powerWt;
    trtWeights.push_back(shift);
    trtWeights.push_back(scale);
    trtWeights.push_back(power);
    // Add the batch norm layers
    nvinfer1::IScaleLayer* bn = network->addScale(
        *conv->getOutput(0), nvinfer1::ScaleMode::kCHANNEL, shift, scale, power);
    assert(bn != nullptr);
    std::string bnLayerName = "batch_norm_" + std::to_string(layerIdx);
    bn->setName(bnLayerName.c_str());
    /***** ACTIVATION LAYER *****/
    /****************************/
    auto leaky = network->addActivation(*bn->getOutput(0),nvinfer1::ActivationType::kLEAKY_RELU);
    leaky->setAlpha(0.1f);
    /*nvinfer1::IPlugin* leakyRELU = nvinfer1::plugin::createPReLUPlugin(0.1);
    assert(leakyRELU != nullptr);
    nvinfer1::ITensor* bnOutput = bn->getOutput(0);
    nvinfer1::IPluginLayer* leaky = network->addPlugin(&bnOutput, 1, *leakyRELU);*/
    assert(leaky != nullptr);
    std::string leakyLayerName = "leaky_" + std::to_string(layerIdx);
    leaky->setName(leakyLayerName.c_str());

    return leaky;
}


std::vector<std::string> loadListFromTextFile(const std::string filename)
{
    assert(fileExists(filename));
    std::vector<std::string> list;

    std::ifstream f(filename);
    if (!f)
    {
        std::cout << "failed to open " << filename;
        assert(0);
    }

    std::string line;
    while (std::getline(f, line))
    {
        if (line.empty())
            continue;

        else
            list.push_back(trim(line));
    }

    return list;
}
std::vector<std::string> loadImageList(const std::string filename, const std::string prefix)
{
    std::vector<std::string> fileList = loadListFromTextFile(filename);
    for (auto& file : fileList)
    {
        if (fileExists(file, false))
            continue;
        else
        {
            std::string prefixed = prefix + file;
            if (fileExists(prefixed, false))
                file = prefixed;
            else
                std::cerr << "WARNING: couldn't find: " << prefixed
                          << " while loading: " << filename << std::endl;
        }
    }
    return fileList;
}

 src/detecter_tools/trt_utils.h

@@ -67,6 +67,34 @@
        }
    }
};
nvinfer1::ILayer* netAddMaxpool(int layerIdx, std::map<std::string, std::string>& block,
                                nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network);
nvinfer1::ILayer* netAddConvLinear(int layerIdx, std::map<std::string, std::string>& block,
                                   std::vector<float>& weights,
                                   std::vector<nvinfer1::Weights>& trtWeights, int& weightPtr,
                                   int& inputChannels, nvinfer1::ITensor* input,
                                   nvinfer1::INetworkDefinition* network);

nvinfer1::ILayer* net_conv_bn_mish(int layerIdx,
    std::map<std::string, std::string>& block,
    std::vector<float>& weights,
    std::vector<nvinfer1::Weights>& trtWeights,
    int& weightPtr,
    int& inputChannels,
    nvinfer1::ITensor* input,
    nvinfer1::INetworkDefinition* network);

nvinfer1::ILayer* netAddConvBNLeaky(int layerIdx, std::map<std::string, std::string>& block,
                                    std::vector<float>& weights,
                                    std::vector<nvinfer1::Weights>& trtWeights, int& weightPtr,
                                    int& inputChannels, nvinfer1::ITensor* input,
                                    nvinfer1::INetworkDefinition* network);
std::string dimsToString(const nvinfer1::Dims d);
std::vector<float> loadWeights(const std::string weightsFilePath, const std::string& networkType);
int getNumChannels(nvinfer1::ITensor* t);
std::string trim(std::string s);
std::string triml(std::string s, const char* t);
std::string trimr(std::string s, const char* t);
float clamp(const float val, const float minVal, const float maxVal);
// Common helper functions
cv::Mat blobFromDsImages(const std::vector<DsImage>& inputImages, const int& inputH,
@@ -77,9 +105,53 @@
nvinfer1::ICudaEngine* loadTRTEngine(const std::string planFilePath, PluginFactory* pluginFactory,
                                     Logger& logger);
uint64_t get3DTensorVolume(nvinfer1::Dims inputDims);

std::vector<std::string> loadImageList(const std::string filename, const std::string prefix);
std::vector<BBoxInfo> nmsAllClasses(const float nmsThresh, std::vector<BBoxInfo>& binfo,
                                    const uint32_t numClasses, const std::string &model_type);

void printLayerInfo(std::string layerIndex, std::string layerName, std::string layerInput,
                    std::string layerOutput, std::string weightPtr);
std::vector<BBoxInfo> nonMaximumSuppression(const float nmsThresh, std::vector<BBoxInfo> binfo);
std::vector<int> split_layer_index(const std::string &s_,const std::string &delimiter_);
nvinfer1::ILayer * layer_split(const int n_layer_index_,
    nvinfer1::ITensor *input_,
    nvinfer1::INetworkDefinition* network);
nvinfer1::ILayer* netAddUpsample(int layerIdx, std::map<std::string, std::string>& block,
                                 std::vector<float>& weights,
                                 std::vector<nvinfer1::Weights>& trtWeights, int& inputChannels,
                                 nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network);
std::vector<std::string> loadListFromTextFile(const std::string filename);
class YoloTinyMaxpoolPaddingFormula : public nvinfer1::IOutputDimensionsFormula
{
private:
    std::set<std::string> m_SamePaddingLayers;

    nvinfer1::DimsHW compute(nvinfer1::DimsHW inputDims, nvinfer1::DimsHW kernelSize,
                             nvinfer1::DimsHW stride, nvinfer1::DimsHW padding,
                             nvinfer1::DimsHW dilation, const char* layerName) const override
    {
     //   assert(inputDims.d[0] == inputDims.d[1]);
        assert(kernelSize.d[0] == kernelSize.d[1]);
        assert(stride.d[0] == stride.d[1]);
        assert(padding.d[0] == padding.d[1]);

        int output_h, output_w;
        // Only layer maxpool_12 makes use of same padding
        if (m_SamePaddingLayers.find(layerName) != m_SamePaddingLayers.end())
        {
            output_h = (inputDims.d[0] + 2 * padding.d[0]) / stride.d[0];
            output_w = (inputDims.d[1] + 2 * padding.d[1]) / stride.d[1];
        }
        // Valid Padding
        else
        {
            output_h = (inputDims.d[0] - kernelSize.d[0]) / stride.d[0] + 1;
            output_w = (inputDims.d[1] - kernelSize.d[1]) / stride.d[1] + 1;
        }
        return nvinfer1::DimsHW{output_h, output_w};
    }

public:
    void addSamePaddingLayer(std::string input) { m_SamePaddingLayers.insert(input); }
};

#endif

 src/h_interface.cpp

@@ -8,6 +8,8 @@
using namespace std;



string m_staticStruct::model_path = "path";

string m_staticStruct::model_cfg = "0"; // 初始化结构体静态变量值

string m_staticStruct::model_wts = "0"; // 初始化结构体静态变量值

int m_staticStruct::type = 1;

bool m_staticStruct::isTrack= true;

int m_staticStruct::max_cam_num = 0;

@@ -116,6 +118,8 @@
    if(reader.parse(in,root))

    {

        std::string model_path = root["param"]["model_path"].asString();

        std::string model_cfg= root["param"]["model_cfg"].asString();

        std::string model_wts = root["param"]["model_wts"].asString();

        int type = root["param"]["type"].asInt();

        bool isTrack = root["param"]["isTrack"].asBool();

        int max_cam_num = root["param"]["max_cam_num"].asInt();

@@ -123,6 +127,8 @@
        int mv_velocity = root["param"]["mv_velocity"].asFloat();

        int fall_rate = root["param"]["fall_rate"].asFloat();

        m_staticStruct::model_path  = model_path;

        m_staticStruct::model_cfg  = model_cfg;

        m_staticStruct::model_wts  = model_wts;

        m_staticStruct::type  = type;

        m_staticStruct::isTrack = isTrack;

        m_staticStruct::max_cam_num  = max_cam_num;

			@@ -3,6 +3,9 @@
			"runtime": "/opt/vasystem/libs/Detect:/usr/local/cuda-11.1/lib64:",
			"param": {
			"model_path": "/opt/vasystem/bin/models/baseDetector/baseDetector.bin",
			"model_cfg": "/opt/vasystem/bin/models/baseDetector/baseDetector.cfg", // para里边自己算法可能用到的参数
			"model_wts": "/opt/vasystem/bin/models/baseDetector/baseDetector.weights", // para里边自己算法可能用到的参数
			//"model_path": "/data/disk1/project/model_dump/02_yolo/baseDetetor_small.bin", // para里边自己算法可能用到的参数
			"type":1,
			"max_cam_num": 8,
			"wander_time": 5,

New file
			@@ -0,0 +1,4 @@
			config.json 是配置信息的文件
			model_path 是转化号的trt模型路径
			如果没有转化好的模型文件，需要配置model_cfg：cfg文件和model_wts：weights权重文件。如果model_path存在则不需要
			"type"字段如果是1，则为正常版本，如果为2，则是tiny版本

			@@ -51,6 +51,8 @@
			typedef struct m_staticStruct
			{
			static std::string model_path;
			static std::string model_cfg;
			static std::string model_wts;
			static int type;
			static bool isTrack;
			static int max_cam_num;

New file
			@@ -0,0 +1,114 @@
			/**
			MIT License

			Copyright (c) 2018 NVIDIA CORPORATION. All rights reserved.

			Permission is hereby granted, free of charge, to any person obtaining a copy
			of this software and associated documentation files (the "Software"), to deal
			in the Software without restriction, including without limitation the rights
			to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			copies of the Software, and to permit persons to whom the Software is
			furnished to do so, subject to the following conditions:

			The above copyright notice and this permission notice shall be included in all
			copies or substantial portions of the Software.

			THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			SOFTWARE.
			*
			*/

			#include "calibrator.h"
			#include <fstream>
			#include <iostream>
			#include <iterator>

			Int8EntropyCalibrator::Int8EntropyCalibrator(const uint32_t& batchSize, const std::string& calibImages,
			const std::string& calibImagesPath,
			const std::string& calibTableFilePath,
			const uint64_t& inputSize, const uint32_t& inputH,
			const uint32_t& inputW, const std::string& inputBlobName,
			const std::string &s_net_type_) :
			m_BatchSize(batchSize),
			m_InputH(inputH),
			m_InputW(inputW),
			m_InputSize(inputSize),
			m_InputCount(batchSize * inputSize),
			m_InputBlobName(inputBlobName),
			m_CalibTableFilePath(calibTableFilePath),
			m_ImageIndex(0),
			_s_net_type(s_net_type_)
			{
			if (!fileExists(m_CalibTableFilePath, false))
			{
			m_ImageList = loadImageList(calibImages, calibImagesPath);
			m_ImageList.resize(static_cast<int>(m_ImageList.size() / m_BatchSize) * m_BatchSize);
			std::random_shuffle(m_ImageList.begin(), m_ImageList.end(),
			[](int i) { return rand() % i; });
			}

			NV_CUDA_CHECK(cudaMalloc(&m_DeviceInput, m_InputCount * sizeof(float)));
			}

			Int8EntropyCalibrator::~Int8EntropyCalibrator() { NV_CUDA_CHECK(cudaFree(m_DeviceInput)); }

			bool Int8EntropyCalibrator::getBatch(void* bindings[], const char* names[], int nbBindings)
			{
			if (m_ImageIndex + m_BatchSize >= m_ImageList.size()) return false;

			// Load next batch
			std::vector<DsImage> dsImages(m_BatchSize);
			for (uint32_t j = m_ImageIndex; j < m_ImageIndex + m_BatchSize; ++j)
			{
			dsImages.at(j - m_ImageIndex) = DsImage(m_ImageList.at(j), _s_net_type, m_InputH, m_InputW);
			}
			m_ImageIndex += m_BatchSize;

			cv::Mat trtInput = blobFromDsImages(dsImages, m_InputH, m_InputW);

			NV_CUDA_CHECK(cudaMemcpy(m_DeviceInput, trtInput.ptr<float>(0), m_InputCount * sizeof(float),
			cudaMemcpyHostToDevice));
			assert(!strcmp(names[0], m_InputBlobName.c_str()));
			bindings[0] = m_DeviceInput;
			return true;
			}

			const void* Int8EntropyCalibrator::readCalibrationCache(size_t& length)
			{
			void* output;
			m_CalibrationCache.clear();
			assert(!m_CalibTableFilePath.empty());
			std::ifstream input(m_CalibTableFilePath, std::ios::binary \| std::ios::in);
			input >> std::noskipws;
			if (m_ReadCache && input.good())
			std::copy(std::istream_iterator<char>(input), std::istream_iterator<char>(),
			std::back_inserter(m_CalibrationCache));

			length = m_CalibrationCache.size();
			if (length)
			{
			std::cout << "Using cached calibration table to build the engine" << std::endl;
			output = &m_CalibrationCache[0];
			}

			else
			{
			std::cout << "New calibration table will be created to build the engine" << std::endl;
			output = nullptr;
			}

			return output;
			}

			void Int8EntropyCalibrator::writeCalibrationCache(const void* cache, size_t length)
			{
			assert(!m_CalibTableFilePath.empty());
			std::ofstream output(m_CalibTableFilePath, std::ios::binary);
			output.write(reinterpret_cast<const char*>(cache), length);
			output.close();
			}

New file
			@@ -0,0 +1,62 @@
			/**
			MIT License

			Copyright (c) 2018 NVIDIA CORPORATION. All rights reserved.

			Permission is hereby granted, free of charge, to any person obtaining a copy
			of this software and associated documentation files (the "Software"), to deal
			in the Software without restriction, including without limitation the rights
			to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
			copies of the Software, and to permit persons to whom the Software is
			furnished to do so, subject to the following conditions:

			The above copyright notice and this permission notice shall be included in all
			copies or substantial portions of the Software.

			THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
			IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
			FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
			AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
			LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
			OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
			SOFTWARE.
			*
			*/
			#ifndef _CALIBRATOR_H_
			#define _CALIBRATOR_H_

			#include "NvInfer.h"
			#include "ds_image.h"
			#include "trt_utils.h"

			class Int8EntropyCalibrator : public nvinfer1::IInt8EntropyCalibrator2
			{
			public:
			Int8EntropyCalibrator(const uint32_t& batchSize, const std::string& calibImages,
			const std::string& calibImagesPath, const std::string& calibTableFilePath,
			const uint64_t& inputSize, const uint32_t& inputH, const uint32_t& inputW,
			const std::string& inputBlobName,const std::string &s_net_type_);
			virtual ~Int8EntropyCalibrator();

			int getBatchSize() const override { return m_BatchSize; }
			bool getBatch(void* bindings[], const char* names[], int nbBindings) override;
			const void* readCalibrationCache(size_t& length) override;
			void writeCalibrationCache(const void* cache, size_t length) override;

			private:
			const uint32_t m_BatchSize;
			const uint32_t m_InputH;
			const uint32_t m_InputW;
			const uint64_t m_InputSize;
			const uint64_t m_InputCount;
			const std::string m_InputBlobName;
			const std::string _s_net_type;
			const std::string m_CalibTableFilePath{nullptr};
			uint32_t m_ImageIndex;
			bool m_ReadCache{true};
			void* m_DeviceInput{nullptr};
			std::vector<std::string> m_ImageList;
			std::vector<char> m_CalibrationCache;
			};

			#endif

			@@ -31,10 +31,10 @@
			vec_ds_images.emplace_back(img, _vec_net_type[_config.net_type], _p_net->getInputH(), _p_net->getInputW());
			}
			cv::Mat trtInput = blobFromDsImages(vec_ds_images, _p_net->getInputH(),_p_net->getInputW());
			timer.out("pre");
			timer.out("eve pre detect ");

			_p_net->doInference(trtInput.data, vec_ds_images.size());
			timer.reset();
			_p_net->doInference(trtInput.data, vec_ds_images.size());
			for (uint32_t i = 0; i < vec_ds_images.size(); ++i)
			{
			auto curImage = vec_ds_images.at(i);
			@@ -65,9 +65,8 @@
			}
			vec_batch_result[i] = vec_result;
			}
			timer.out("post");
			timer.out("eve pre detect post");
			DEBUG("--detect over--" );

			}

			void Detector::set_gpu_id(const int id)
			@@ -95,9 +94,9 @@

			void Detector::build_net()
			{
			if(_config.net_type == SMALL)
			_p_net = std::unique_ptr<Detecter>{new Detecter(_info,_infer_param,2)};
			else{
			if(_config.net_type == COMMON)
			_p_net = std::unique_ptr<Detecter>{new Detecter(_info,_infer_param,1)};
			else{
			_p_net = std::unique_ptr<Detecter>{new Detecter(_info,_infer_param,2)};
			}
			}

			@@ -11,6 +11,7 @@


			Detecter::Detecter( const NetworkInfo& networkInfo, const InferParams& inferParams, int type) :
			m_NetworkType(networkInfo.networkType),
			m_InputBlobName(networkInfo.inputBlobName),
			m_InputH(416),
			m_InputW(416),
			@@ -26,10 +27,17 @@
			m_Context(nullptr),
			m_InputBindingIndex(-1),
			m_CudaStream(nullptr),
			m_PluginFactory(new PluginFactory)
			m_PluginFactory(new PluginFactory),
			m_TinyMaxpoolPaddingFormula(new YoloTinyMaxpoolPaddingFormula)
			{
			setOutput(type);
			m_EnginePath = m_staticStruct::model_path;
			if(!fileExists(m_EnginePath))
			{
			m_configBlocks = parseConfigFile(m_staticStruct::model_cfg);
			parseConfigBlocks();
			createYOLOEngine();
			}
			setOutput(type);
			DEBUG((boost::format("m_EnginePath:%s")%m_EnginePath).str());
			assert(m_PluginFactory != nullptr);
			m_Engine = loadTRTEngine(m_EnginePath, m_PluginFactory, m_Logger);
			@@ -67,7 +75,482 @@
			m_PluginFactory = nullptr;
			}

			// m_TinyMaxpoolPaddingFormula.reset();
			m_TinyMaxpoolPaddingFormula.reset();
			}

			std::vector<std::map<std::string, std::string>> Detecter::parseConfigFile(const std::string cfgFilePath)
			{
			std::cout << "::::::::::" << cfgFilePath <<std::endl;
			assert(fileExists(cfgFilePath));
			std::ifstream file(cfgFilePath);
			assert(file.good());
			std::string line;
			std::vector<std::map<std::string, std::string>> blocks;
			std::map<std::string, std::string> block;

			while (getline(file, line))
			{
			if (line.empty()) continue;
			if (line.front() == '#') continue;
			line = trim(line);
			if (line.front() == '[')
			{
			if (!block.empty())
			{
			blocks.push_back(block);
			block.clear();
			}
			std::string key = "type";
			std::string value = trim(line.substr(1, line.size() - 2));
			block.insert(std::pair<std::string, std::string>(key, value));
			}
			else
			{
			size_t cpos = line.find('=');
			std::string key = trim(line.substr(0, cpos));
			std::string value = trim(line.substr(cpos + 1));
			block.insert(std::pair<std::string, std::string>(key, value));
			}
			}
			blocks.push_back(block);
			return blocks;
			}

			void Detecter::parseConfigBlocks()
			{
			for (auto block : m_configBlocks)
			{
			if (block.at("type") == "net")
			{
			assert((block.find("height") != block.end())
			&& "Missing 'height' param in network cfg");
			assert((block.find("width") != block.end()) && "Missing 'width' param in network cfg");
			assert((block.find("channels") != block.end())
			&& "Missing 'channels' param in network cfg");
			assert((block.find("batch") != block.end())
			&& "Missing 'batch' param in network cfg");

			m_InputH = std::stoul(trim(block.at("height")));
			m_InputW = std::stoul(trim(block.at("width")));
			m_InputC = std::stoul(trim(block.at("channels")));
			m_BatchSize = std::stoi(trim(block.at("batch")));
			// assert(m_InputW == m_InputH);
			m_InputSize = m_InputC * m_InputH * m_InputW;
			}
			else if ((block.at("type") == "region") \|\| (block.at("type") == "yolo"))
			{
			assert((block.find("num") != block.end())
			&& std::string("Missing 'num' param in " + block.at("type") + " layer").c_str());
			assert((block.find("classes") != block.end())
			&& std::string("Missing 'classes' param in " + block.at("type") + " layer")
			.c_str());
			assert((block.find("anchors") != block.end())
			&& std::string("Missing 'anchors' param in " + block.at("type") + " layer")
			.c_str());

			TensorInfo outputTensor;
			std::string anchorString = block.at("anchors");
			while (!anchorString.empty())
			{
			size_t npos = anchorString.find_first_of(',');
			if (npos != std::string::npos)
			{
			float anchor = std::stof(trim(anchorString.substr(0, npos)));
			outputTensor.anchors.push_back(anchor);
			anchorString.erase(0, npos + 1);
			}
			else
			{
			float anchor = std::stof(trim(anchorString));
			outputTensor.anchors.push_back(anchor);
			break;
			}
			}

			assert((block.find("mask") != block.end())
			&& std::string("Missing 'mask' param in " + block.at("type") + " layer")
			.c_str());

			std::string maskString = block.at("mask");
			while (!maskString.empty())
			{
			size_t npos = maskString.find_first_of(',');
			if (npos != std::string::npos)
			{
			uint32_t mask = std::stoul(trim(maskString.substr(0, npos)));
			outputTensor.masks.push_back(mask);
			maskString.erase(0, npos + 1);
			}
			else
			{
			uint32_t mask = std::stoul(trim(maskString));
			outputTensor.masks.push_back(mask);
			break;
			}
			}

			outputTensor.numBBoxes = outputTensor.masks.size() > 0
			? outputTensor.masks.size()
			: std::stoul(trim(block.at("num")));
			outputTensor.numClasses = std::stoul(block.at("classes"));
			if (m_ClassNames.empty())
			{
			for (uint32_t i=0;i< outputTensor.numClasses;++i)
			{
			m_ClassNames.push_back(std::to_string(i));
			}
			}
			outputTensor.blobName = "yolo_" + std::to_string(_n_yolo_ind);
			outputTensor.gridSize = (m_InputH / 32) * pow(2, _n_yolo_ind);
			outputTensor.grid_h = (m_InputH / 32) * pow(2, _n_yolo_ind);
			outputTensor.grid_w = (m_InputW / 32) * pow(2, _n_yolo_ind);
			outputTensor.gridSize = (m_InputH / 32) * pow(2, 2-_n_yolo_ind);
			outputTensor.grid_h = (m_InputH / 32) * pow(2, 2-_n_yolo_ind);
			outputTensor.grid_w = (m_InputW / 32) * pow(2, 2-_n_yolo_ind);
			outputTensor.stride = m_InputH / outputTensor.gridSize;
			outputTensor.stride_h = m_InputH / outputTensor.grid_h;
			outputTensor.stride_w = m_InputW / outputTensor.grid_w;
			outputTensor.volume = outputTensor.grid_h* outputTensor.grid_w
			(outputTensor.numBBoxes(5 + outputTensor.numClasses));
			m_OutputTensors.push_back(outputTensor);
			_n_yolo_ind++;
			}
			}
			}

			void Detecter::createYOLOEngine(const nvinfer1::DataType dataType, Int8EntropyCalibrator* calibrator)
			{
			if (fileExists(m_EnginePath))return;
			std::vector<float> weights = loadWeights(m_staticStruct::model_wts, m_NetworkType);
			std::vector<nvinfer1::Weights> trtWeights;
			int weightPtr = 0;
			int channels = m_InputC;
			m_Builder = nvinfer1::createInferBuilder(m_Logger);
			nvinfer1::IBuilderConfig* config = m_Builder->createBuilderConfig();
			m_Network = m_Builder->createNetworkV2(0U);
			if ((dataType == nvinfer1::DataType::kINT8 && !m_Builder->platformHasFastInt8())
			\|\| (dataType == nvinfer1::DataType::kHALF && !m_Builder->platformHasFastFp16()))
			{
			std::cout << "Platform doesn't support this precision." << std::endl;
			assert(0);
			}

			nvinfer1::ITensor* data = m_Network->addInput(
			m_InputBlobName.c_str(), nvinfer1::DataType::kFLOAT,
			nvinfer1::DimsCHW{static_cast<int>(m_InputC), static_cast<int>(m_InputH),
			static_cast<int>(m_InputW)});
			assert(data != nullptr);
			// Add elementwise layer to normalize pixel values 0-1
			nvinfer1::Dims divDims{
			3,
			{static_cast<int>(m_InputC), static_cast<int>(m_InputH), static_cast<int>(m_InputW)},
			{nvinfer1::DimensionType::kCHANNEL, nvinfer1::DimensionType::kSPATIAL,
			nvinfer1::DimensionType::kSPATIAL}};
			nvinfer1::Weights divWeights{nvinfer1::DataType::kFLOAT, nullptr,
			static_cast<int64_t>(m_InputSize)};
			float* divWt = new float[m_InputSize];
			for (uint32_t w = 0; w < m_InputSize; ++w) divWt[w] = 255.0;
			divWeights.values = divWt;
			trtWeights.push_back(divWeights);
			nvinfer1::IConstantLayer* constDivide = m_Network->addConstant(divDims, divWeights);
			assert(constDivide != nullptr);
			nvinfer1::IElementWiseLayer* elementDivide = m_Network->addElementWise(
			data, constDivide->getOutput(0), nvinfer1::ElementWiseOperation::kDIV);
			assert(elementDivide != nullptr);

			nvinfer1::ITensor* previous = elementDivide->getOutput(0);
			std::vector<nvinfer1::ITensor*> tensorOutputs;
			uint32_t outputTensorCount = 0;

			// build the network using the network API
			for (uint32_t i = 0; i < m_configBlocks.size(); ++i)
			{
			// check if num. of channels is correct
			assert(getNumChannels(previous) == channels);
			std::string layerIndex = "(" + std::to_string(i) + ")";

			if (m_configBlocks.at(i).at("type") == "net")
			{
			printLayerInfo("", "layer", " inp_size", " out_size", "weightPtr");
			}
			else if (m_configBlocks.at(i).at("type") == "convolutional")
			{
			std::string inputVol = dimsToString(previous->getDimensions());
			nvinfer1::ILayer* out;
			std::string layerType;
			//check activation
			std::string activation = "";
			if (m_configBlocks.at(i).find("activation") != m_configBlocks.at(i).end())
			{
			activation = m_configBlocks[i]["activation"];
			}
			// check if batch_norm enabled
			if ((m_configBlocks.at(i).find("batch_normalize") != m_configBlocks.at(i).end()) &&
			("leaky" == activation))
			{
			out = netAddConvBNLeaky(i, m_configBlocks.at(i), weights, trtWeights, weightPtr,
			channels, previous, m_Network);
			layerType = "conv-bn-leaky";
			}
			else if ((m_configBlocks.at(i).find("batch_normalize") != m_configBlocks.at(i).end()) &&
			("mish" == activation))
			{
			out = net_conv_bn_mish(i, m_configBlocks.at(i), weights, trtWeights, weightPtr,
			channels, previous, m_Network);
			layerType = "conv-bn-mish";
			}
			else// if("linear" == activation)
			{
			out = netAddConvLinear(i, m_configBlocks.at(i), weights, trtWeights, weightPtr,
			channels, previous, m_Network);
			layerType = "conv-linear";
			}
			previous = out->getOutput(0);
			assert(previous != nullptr);
			channels = getNumChannels(previous);
			std::string outputVol = dimsToString(previous->getDimensions());
			tensorOutputs.push_back(out->getOutput(0));
			printLayerInfo(layerIndex, layerType, inputVol, outputVol, std::to_string(weightPtr));
			}
			else if (m_configBlocks.at(i).at("type") == "shortcut")
			{
			assert(m_configBlocks.at(i).at("activation") == "linear");
			assert(m_configBlocks.at(i).find("from") != m_configBlocks.at(i).end());
			int from = stoi(m_configBlocks.at(i).at("from"));

			std::string inputVol = dimsToString(previous->getDimensions());
			// check if indexes are correct
			assert((i - 2 >= 0) && (i - 2 < tensorOutputs.size()));
			assert((i + from - 1 >= 0) && (i + from - 1 < tensorOutputs.size()));
			assert(i + from - 1 < i - 2);
			nvinfer1::IElementWiseLayer* ew
			= m_Network->addElementWise(tensorOutputs[i - 2], tensorOutputs[i + from - 1],
			nvinfer1::ElementWiseOperation::kSUM);
			assert(ew != nullptr);
			std::string ewLayerName = "shortcut_" + std::to_string(i);
			ew->setName(ewLayerName.c_str());
			previous = ew->getOutput(0);
			assert(previous != nullptr);
			std::string outputVol = dimsToString(previous->getDimensions());
			tensorOutputs.push_back(ew->getOutput(0));
			printLayerInfo(layerIndex, "skip", inputVol, outputVol, " -");
			}
			else if (m_configBlocks.at(i).at("type") == "yolo")
			{
			nvinfer1::Dims prevTensorDims = previous->getDimensions();
			// assert(prevTensorDims.d[1] == prevTensorDims.d[2]);
			TensorInfo& curYoloTensor = m_OutputTensors.at(outputTensorCount);
			curYoloTensor.gridSize = prevTensorDims.d[1];
			curYoloTensor.grid_h = prevTensorDims.d[1];
			curYoloTensor.grid_w = prevTensorDims.d[2];
			curYoloTensor.stride = m_InputW / curYoloTensor.gridSize;
			curYoloTensor.stride_h = m_InputH / curYoloTensor.grid_h;
			curYoloTensor.stride_w = m_InputW / curYoloTensor.grid_w;
			m_OutputTensors.at(outputTensorCount).volume = curYoloTensor.grid_h
			* curYoloTensor.grid_w
			* (curYoloTensor.numBBoxes * (5 + curYoloTensor.numClasses));
			std::string layerName = "yolo_" + std::to_string(outputTensorCount);
			curYoloTensor.blobName = layerName;
			nvinfer1::IPlugin* yoloPlugin
			= new SLayerV3(m_OutputTensors.at(outputTensorCount).numBBoxes,
			m_OutputTensors.at(outputTensorCount).numClasses,
			m_OutputTensors.at(outputTensorCount).grid_h,
			m_OutputTensors.at(outputTensorCount).grid_w);
			assert(yoloPlugin != nullptr);
			nvinfer1::IPluginLayer* yolo = m_Network->addPlugin(&previous, 1, *yoloPlugin);
			assert(yolo != nullptr);
			yolo->setName(layerName.c_str());
			std::string inputVol = dimsToString(previous->getDimensions());
			previous = yolo->getOutput(0);
			assert(previous != nullptr);
			previous->setName(layerName.c_str());
			std::string outputVol = dimsToString(previous->getDimensions());
			m_Network->markOutput(*previous);
			channels = getNumChannels(previous);
			tensorOutputs.push_back(yolo->getOutput(0));
			printLayerInfo(layerIndex, "yolo", inputVol, outputVol, std::to_string(weightPtr));
			++outputTensorCount;
			}
			else if (m_configBlocks.at(i).at("type") == "route")
			{
			size_t found = m_configBlocks.at(i).at("layers").find(",");
			if (found != std::string::npos)//concate multi layers
			{
			std::vector<int> vec_index = split_layer_index(m_configBlocks.at(i).at("layers"), ",");
			for (auto &ind_layer:vec_index)
			{
			if (ind_layer < 0)
			{
			ind_layer = static_cast<int>(tensorOutputs.size()) + ind_layer;
			}
			assert(ind_layer < static_cast<int>(tensorOutputs.size()) && ind_layer >= 0);
			}
			nvinfer1::ITensor** concatInputs
			= reinterpret_cast<nvinfer1::ITensor*>(malloc(sizeof(nvinfer1::ITensor) * vec_index.size()));
			for (size_t ind = 0; ind < vec_index.size(); ++ind)
			{
			concatInputs[ind] = tensorOutputs[vec_index[ind]];
			}
			nvinfer1::IConcatenationLayer* concat
			= m_Network->addConcatenation(concatInputs, static_cast<int>(vec_index.size()));
			assert(concat != nullptr);
			std::string concatLayerName = "route_" + std::to_string(i - 1);
			concat->setName(concatLayerName.c_str());
			// concatenate along the channel dimension
			concat->setAxis(0);
			previous = concat->getOutput(0);
			assert(previous != nullptr);
			nvinfer1::Dims debug = previous->getDimensions();
			std::string outputVol = dimsToString(previous->getDimensions());
			int nums = 0;
			for (auto &indx:vec_index)
			{
			nums += getNumChannels(tensorOutputs[indx]);
			}
			channels = nums;
			tensorOutputs.push_back(concat->getOutput(0));
			printLayerInfo(layerIndex, "route", " -", outputVol,std::to_string(weightPtr));
			}
			else //single layer
			{
			int idx = std::stoi(trim(m_configBlocks.at(i).at("layers")));
			if (idx < 0)
			{
			idx = static_cast<int>(tensorOutputs.size()) + idx;
			}
			assert(idx < static_cast<int>(tensorOutputs.size()) && idx >= 0);

			//route
			if (m_configBlocks.at(i).find("groups") == m_configBlocks.at(i).end())
			{
			previous = tensorOutputs[idx];
			assert(previous != nullptr);
			std::string outputVol = dimsToString(previous->getDimensions());
			// set the output volume depth
			channels = getNumChannels(tensorOutputs[idx]);
			tensorOutputs.push_back(tensorOutputs[idx]);
			printLayerInfo(layerIndex, "route", " -", outputVol, std::to_string(weightPtr));

			}
			//yolov4-tiny route split layer
			else
			{
			if (m_configBlocks.at(i).find("group_id") == m_configBlocks.at(i).end())
			{
			assert(0);
			}
			int chunk_idx = std::stoi(trim(m_configBlocks.at(i).at("group_id")));
			nvinfer1::ILayer* out = layer_split(i, tensorOutputs[idx], m_Network);
			std::string inputVol = dimsToString(previous->getDimensions());
			previous = out->getOutput(chunk_idx);
			assert(previous != nullptr);
			channels = getNumChannels(previous);
			std::string outputVol = dimsToString(previous->getDimensions());
			tensorOutputs.push_back(out->getOutput(chunk_idx));
			printLayerInfo(layerIndex,"chunk", inputVol, outputVol, std::to_string(weightPtr));
			}
			}
			}
			else if (m_configBlocks.at(i).at("type") == "upsample")
			{
			std::string inputVol = dimsToString(previous->getDimensions());
			nvinfer1::ILayer* out = netAddUpsample(i - 1, m_configBlocks[i], weights, trtWeights,
			channels, previous, m_Network);
			previous = out->getOutput(0);
			std::string outputVol = dimsToString(previous->getDimensions());
			tensorOutputs.push_back(out->getOutput(0));
			printLayerInfo(layerIndex, "upsample", inputVol, outputVol, " -");
			}
			else if (m_configBlocks.at(i).at("type") == "maxpool")
			{
			// Add same padding layers
			if (m_configBlocks.at(i).at("size") == "2" && m_configBlocks.at(i).at("stride") == "1")
			{
			m_TinyMaxpoolPaddingFormula->addSamePaddingLayer("maxpool_" + std::to_string(i));
			}
			std::string inputVol = dimsToString(previous->getDimensions());
			nvinfer1::ILayer* out = netAddMaxpool(i, m_configBlocks.at(i), previous, m_Network);
			previous = out->getOutput(0);
			assert(previous != nullptr);
			std::string outputVol = dimsToString(previous->getDimensions());
			tensorOutputs.push_back(out->getOutput(0));
			printLayerInfo(layerIndex, "maxpool", inputVol, outputVol, std::to_string(weightPtr));
			}
			else
			{
			std::cout << "Unsupported layer type --> \"" << m_configBlocks.at(i).at("type") << "\""
			<< std::endl;
			assert(0);
			}
			}

			if (static_cast<int>(weights.size()) != weightPtr)
			{
			std::cout << "Number of unused weights left : " << static_cast<int>(weights.size()) - weightPtr << std::endl;
			assert(0);
			}

			// std::cout << "Output blob names :" << std::endl;
			// for (auto& tensor : m_OutputTensors) std::cout << tensor.blobName << std::endl;

			// Create and cache the engine if not already present
			if (fileExists(m_EnginePath))
			{
			std::cout << "Using previously generated plan file located at " << m_EnginePath
			<< std::endl;
			destroyNetworkUtils(trtWeights);
			return;
			}

			/*std::cout << "Unable to find cached TensorRT engine for network : " << m_NetworkType
			<< " precision : " << m_Precision << " and batch size :" << m_BatchSize << std::endl;*/

			m_Builder->setMaxBatchSize(m_BatchSize);
			//m_Builder->setMaxWorkspaceSize(1 << 20);

			config->setMaxWorkspaceSize(1 << 20);
			if (dataType == nvinfer1::DataType::kINT8)
			{
			assert((calibrator != nullptr) && "Invalid calibrator for INT8 precision");
			// m_Builder->setInt8Mode(true);
			config->setFlag(nvinfer1::BuilderFlag::kINT8);
			// m_Builder->setInt8Calibrator(calibrator);
			config->setInt8Calibrator(calibrator);
			// config->setTacticSources(1U << static_cast<uint32_t>(TacticSource::kCUBLAS) \| 1U << static_cast<uint32_t>(TacticSource::kCUBLAS_LT));
			}
			else if (dataType == nvinfer1::DataType::kHALF)
			{
			config->setFlag(nvinfer1::BuilderFlag::kFP16);
			// m_Builder->setHalf2Mode(true);
			}

			m_Builder->allowGPUFallback(true);
			int nbLayers = m_Network->getNbLayers();
			int layersOnDLA = 0;
			// std::cout << "Total number of layers: " << nbLayers << std::endl;
			for (int i = 0; i < nbLayers; i++)
			{
			nvinfer1::ILayer* curLayer = m_Network->getLayer(i);
			if (m_DeviceType == "kDLA" && m_Builder->canRunOnDLA(curLayer))
			{
			m_Builder->setDeviceType(curLayer, nvinfer1::DeviceType::kDLA);
			layersOnDLA++;
			std::cout << "Set layer " << curLayer->getName() << " to run on DLA" << std::endl;
			}
			}
			// std::cout << "Total number of layers on DLA: " << layersOnDLA << std::endl;

			// Build the engine
			std::cout << "Building the TensorRT Engine..." << std::endl;
			m_Engine = m_Builder->buildEngineWithConfig(m_Network,config);
			assert(m_Engine != nullptr);
			std::cout << "Building complete!" << std::endl;

			// Serialize the engine
			writePlanFileToDisk();

			// destroy
			destroyNetworkUtils(trtWeights);
			}

			void Detecter::doInference(const unsigned char* input, const uint32_t batchSize)
			@@ -268,10 +751,10 @@
			void Detecter::setOutput(int type)
			{
			m_OutputTensors.clear();
			printf("0-0-0-0-0-0------------------%d",type);
			if(type==2)
			for (int i = 0; i < 2; ++i)
			{

			TensorInfo outputTensor;
			outputTensor.numClasses = CLASS_BUM;
			outputTensor.blobName = "yolo_" + std::to_string(i);
			@@ -323,7 +806,17 @@
			{
			TensorInfo outputTensor;
			outputTensor.numClasses = CLASS_BUM;
			outputTensor.blobName = "yolo_" + std::to_string(i);
			outputTensor.blobName = "yolo_" + to_string(i);
			// if (i==0)
			// {
			// outputTensor.blobName = "139_convolutional_reshape_2";
			// }else if (i==1)
			// {
			// outputTensor.blobName = "150_convolutional_reshape_2";
			// }else if (i==2)
			// {
			// outputTensor.blobName = "161_convolutional_reshape_2";
			// }
			outputTensor.gridSize = (m_InputH / 32) * pow(2, 2-i);
			outputTensor.grid_h = (m_InputH / 32) * pow(2, 2-i);
			outputTensor.grid_w = (m_InputW / 32) * pow(2, 2-i);
			@@ -380,3 +873,24 @@
			m_OutputTensors.push_back(outputTensor);
			}
			}

			void Detecter::writePlanFileToDisk()
			{
			std::cout << "Serializing the TensorRT Engine..." << std::endl;
			assert(m_Engine && "Invalid TensorRT Engine");
			m_ModelStream = m_Engine->serialize();
			assert(m_ModelStream && "Unable to serialize engine");
			assert(!m_EnginePath.empty() && "Enginepath is empty");

			// write data to output file
			std::stringstream gieModelStream;
			gieModelStream.seekg(0, gieModelStream.beg);
			gieModelStream.write(static_cast<const char*>(m_ModelStream->data()), m_ModelStream->size());
			std::ofstream outFile;
			outFile.open(m_EnginePath, std::ios::binary \| std::ios::out);
			outFile << gieModelStream.rdbuf();
			outFile.close();

			std::cout << "Serialized plan file cached at location : " << m_EnginePath << std::endl;
			}

			@@ -3,7 +3,7 @@

			#include "plugin_factory.h"
			#include "trt_utils.h"

			#include "calibrator.h"
			#include "NvInfer.h"
			#include "NvInferPlugin.h"
			#include "NvInferRuntimeCommon.h"
			@@ -16,6 +16,7 @@
			#include "../utils/time_util.h"
			#include "../config.h"
			#include "opencv2/opencv.hpp"
			#include <numeric>

			struct NetworkInfo
			{
			@@ -78,6 +79,7 @@
			const std::string m_DeviceType;
			const std::string m_InputBlobName;
			std::vector<TensorInfo> m_OutputTensors;
			std::vector<std::map<std::string, std::string>> m_configBlocks;
			uint32_t m_InputH;
			uint32_t m_InputW;
			uint32_t m_InputC;
			@@ -172,12 +174,21 @@
			void setOutput(int type);
			private:
			Logger m_Logger;
			void createYOLOEngine(const nvinfer1::DataType dataType = nvinfer1::DataType::kFLOAT,
			Int8EntropyCalibrator* calibrator = nullptr);
			void writePlanFileToDisk();
			std::vector<std::map<std::string, std::string>> parseConfigFile(const std::string cfgFilePath);
			void parseConfigBlocks();
			void allocateBuffers();
			bool verifyEngine();
			void destroyNetworkUtils(std::vector<nvinfer1::Weights>& trtWeights);
			protected:
			const std::string m_NetworkType;
			std::unique_ptr<YoloTinyMaxpoolPaddingFormula> m_TinyMaxpoolPaddingFormula;

			private:
			Timer _timer;
			int _n_yolo_ind = 0;
			};

			#endif

			@@ -21,34 +21,34 @@
			cv::Scalar(0.0, 0.0, 0.0),true);
			}

			// static void leftTrim(std::string& s)
			// {
			// s.erase(s.begin(), find_if(s.begin(), s.end(), [](int ch) { return !isspace(ch); }));
			// }
			static void leftTrim(std::string& s)
			{
			s.erase(s.begin(), find_if(s.begin(), s.end(), [](int ch) { return !isspace(ch); }));
			}

			// static void rightTrim(std::string& s)
			// {
			// s.erase(find_if(s.rbegin(), s.rend(), [](int ch) { return !isspace(ch); }).base(), s.end());
			// }
			static void rightTrim(std::string& s)
			{
			s.erase(find_if(s.rbegin(), s.rend(), [](int ch) { return !isspace(ch); }).base(), s.end());
			}

			// std::string trim(std::string s)
			// {
			// leftTrim(s);
			// rightTrim(s);
			// return s;
			// }
			std::string trim(std::string s)
			{
			leftTrim(s);
			rightTrim(s);
			return s;
			}

			// std::string triml(std::string s,const char* t)
			// {
			// s.erase(0, s.find_first_not_of(t));
			// return s;
			// }
			std::string triml(std::string s,const char* t)
			{
			s.erase(0, s.find_first_not_of(t));
			return s;
			}

			// std::string trimr(std::string s, const char* t)
			// {
			// s.erase(s.find_last_not_of(t) + 1);
			// return s;
			// }
			std::string trimr(std::string s, const char* t)
			{
			s.erase(s.find_last_not_of(t) + 1);
			return s;
			}

			float clamp(const float val, const float minVal, const float maxVal)
			{
			@@ -115,6 +115,305 @@
			// << " xmax:" << b.box.x2 << " ymax:" << b.box.y2 << std::endl;
			// }
			//

			std::vector<float> loadWeights(const std::string weightsFilePath, const std::string& networkType)
			{
			assert(fileExists(weightsFilePath));
			std::cout << "Loading pre-trained weights..." << std::endl;
			std::ifstream file(weightsFilePath, std::ios_base::binary);
			assert(file.good());
			std::string line;
			file.ignore(4);
			char buf[2];
			file.read(buf, 1);
			if ((int)(unsigned char)buf[0] == 1)
			{
			file.ignore(11);
			}
			else if ((int)(unsigned char)buf[0] == 2)
			{
			file.ignore(15);
			}
			else
			{
			std::cout << "Invalid network type" << std::endl;
			assert(0);
			}

			std::vector<float> weights;
			char* floatWeight = new char[4];
			while (!file.eof())
			{
			file.read(floatWeight, 4);
			assert(file.gcount() == 4);
			weights.push_back(reinterpret_cast<float>(floatWeight));
			if (file.peek() == std::istream::traits_type::eof()) break;
			}
			std::cout << "Loading complete!" << std::endl;
			delete[] floatWeight;

			// std::cout << "Total Number of weights read : " << weights.size() << std::endl;
			return weights;
			}

			std::string dimsToString(const nvinfer1::Dims d)
			{
			std::stringstream s;
			assert(d.nbDims >= 1);
			for (int i = 0; i < d.nbDims - 1; ++i)
			{
			s << std::setw(4) << d.d[i] << " x";
			}
			s << std::setw(4) << d.d[d.nbDims - 1];

			return s.str();
			}

			nvinfer1::ILayer* netAddMaxpool(int layerIdx, std::map<std::string, std::string>& block,
			nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network)
			{
			assert(block.at("type") == "maxpool");
			assert(block.find("size") != block.end());
			assert(block.find("stride") != block.end());

			int size = std::stoi(block.at("size"));
			int stride = std::stoi(block.at("stride"));

			nvinfer1::IPoolingLayer* pool
			= network->addPoolingNd(*input, nvinfer1::PoolingType::kMAX, nvinfer1::DimsHW{size, size});
			assert(pool);
			std::string maxpoolLayerName = "maxpool_" + std::to_string(layerIdx);
			int pad = (size - 1) / 2;
			pool->setPaddingNd(nvinfer1::DimsHW{pad,pad});
			pool->setStrideNd(nvinfer1::DimsHW{stride, stride});
			pool->setName(maxpoolLayerName.c_str());

			return pool;
			}

			nvinfer1::ILayer* netAddConvLinear(int layerIdx, std::map<std::string, std::string>& block,
			std::vector<float>& weights,
			std::vector<nvinfer1::Weights>& trtWeights, int& weightPtr,
			int& inputChannels, nvinfer1::ITensor* input,
			nvinfer1::INetworkDefinition* network)
			{
			assert(block.at("type") == "convolutional");
			assert(block.find("batch_normalize") == block.end());
			assert(block.at("activation") == "linear");
			assert(block.find("filters") != block.end());
			assert(block.find("pad") != block.end());
			assert(block.find("size") != block.end());
			assert(block.find("stride") != block.end());

			int filters = std::stoi(block.at("filters"));
			int padding = std::stoi(block.at("pad"));
			int kernelSize = std::stoi(block.at("size"));
			int stride = std::stoi(block.at("stride"));
			int pad;
			if (padding)
			pad = (kernelSize - 1) / 2;
			else
			pad = 0;
			// load the convolution layer bias
			nvinfer1::Weights convBias{nvinfer1::DataType::kFLOAT, nullptr, filters};
			float* val = new float[filters];
			for (int i = 0; i < filters; ++i)
			{
			val[i] = weights[weightPtr];
			weightPtr++;
			}
			convBias.values = val;
			trtWeights.push_back(convBias);
			// load the convolutional layer weights
			int size = filters * inputChannels * kernelSize * kernelSize;
			nvinfer1::Weights convWt{nvinfer1::DataType::kFLOAT, nullptr, size};
			val = new float[size];
			for (int i = 0; i < size; ++i)
			{
			val[i] = weights[weightPtr];
			weightPtr++;
			}
			convWt.values = val;
			trtWeights.push_back(convWt);
			nvinfer1::IConvolutionLayer* conv = network->addConvolution(
			*input, filters, nvinfer1::DimsHW{kernelSize, kernelSize}, convWt, convBias);
			assert(conv != nullptr);
			std::string convLayerName = "conv_" + std::to_string(layerIdx);
			conv->setName(convLayerName.c_str());
			conv->setStride(nvinfer1::DimsHW{stride, stride});
			conv->setPadding(nvinfer1::DimsHW{pad, pad});

			return conv;
			}

			nvinfer1::ILayer* net_conv_bn_mish(int layerIdx,
			std::map<std::string, std::string>& block,
			std::vector<float>& weights,
			std::vector<nvinfer1::Weights>& trtWeights,
			int& weightPtr,
			int& inputChannels,
			nvinfer1::ITensor* input,
			nvinfer1::INetworkDefinition* network)
			{
			assert(block.at("type") == "convolutional");
			assert(block.find("batch_normalize") != block.end());
			assert(block.at("batch_normalize") == "1");
			assert(block.at("activation") == "mish");
			assert(block.find("filters") != block.end());
			assert(block.find("pad") != block.end());
			assert(block.find("size") != block.end());
			assert(block.find("stride") != block.end());

			bool batchNormalize, bias;
			if (block.find("batch_normalize") != block.end())
			{
			batchNormalize = (block.at("batch_normalize") == "1");
			bias = false;
			}
			else
			{
			batchNormalize = false;
			bias = true;
			}
			// all conv_bn_leaky layers assume bias is false
			assert(batchNormalize == true && bias == false);

			int filters = std::stoi(block.at("filters"));
			int padding = std::stoi(block.at("pad"));
			int kernelSize = std::stoi(block.at("size"));
			int stride = std::stoi(block.at("stride"));
			int pad;
			if (padding)
			pad = (kernelSize - 1) / 2;
			else
			pad = 0;
			std::vector<float> bnBiases;
			for (int i = 0; i < filters; ++i)
			{
			bnBiases.push_back(weights[weightPtr]);
			weightPtr++;
			}
			// load BN weights
			std::vector<float> bnWeights;
			for (int i = 0; i < filters; ++i)
			{
			bnWeights.push_back(weights[weightPtr]);
			weightPtr++;
			}
			// load BN running_mean
			std::vector<float> bnRunningMean;
			for (int i = 0; i < filters; ++i)
			{
			bnRunningMean.push_back(weights[weightPtr]);
			weightPtr++;
			}
			// load BN running_var
			std::vector<float> bnRunningVar;
			for (int i = 0; i < filters; ++i)
			{
			// 1e-05 for numerical stability
			bnRunningVar.push_back(sqrt(weights[weightPtr] + 1.0e-5f));
			weightPtr++;
			}
			// load Conv layer weights (GKCRS)
			int size = filters * inputChannels * kernelSize * kernelSize;
			nvinfer1::Weights convWt{ nvinfer1::DataType::kFLOAT, nullptr, size };
			float* val = new float[size];
			for (int i = 0; i < size; ++i)
			{
			val[i] = weights[weightPtr];
			weightPtr++;
			}
			convWt.values = val;
			trtWeights.push_back(convWt);
			nvinfer1::Weights convBias{ nvinfer1::DataType::kFLOAT, nullptr, 0 };
			trtWeights.push_back(convBias);
			nvinfer1::IConvolutionLayer* conv = network->addConvolution(
			*input, filters, nvinfer1::DimsHW{ kernelSize, kernelSize }, convWt, convBias);
			assert(conv != nullptr);
			std::string convLayerName = "conv_" + std::to_string(layerIdx);
			conv->setName(convLayerName.c_str());
			conv->setStride(nvinfer1::DimsHW{ stride, stride });
			conv->setPadding(nvinfer1::DimsHW{ pad, pad });

			/*** BATCHNORM LAYER ***/
			/***************************/
			size = filters;
			// create the weights
			nvinfer1::Weights shift{ nvinfer1::DataType::kFLOAT, nullptr, size };
			nvinfer1::Weights scale{ nvinfer1::DataType::kFLOAT, nullptr, size };
			nvinfer1::Weights power{ nvinfer1::DataType::kFLOAT, nullptr, size };
			float* shiftWt = new float[size];
			for (int i = 0; i < size; ++i)
			{
			shiftWt[i]
			= bnBiases.at(i) - ((bnRunningMean.at(i) * bnWeights.at(i)) / bnRunningVar.at(i));
			}
			shift.values = shiftWt;
			float* scaleWt = new float[size];
			for (int i = 0; i < size; ++i)
			{
			scaleWt[i] = bnWeights.at(i) / bnRunningVar[i];
			}
			scale.values = scaleWt;
			float* powerWt = new float[size];
			for (int i = 0; i < size; ++i)
			{
			powerWt[i] = 1.0;
			}
			power.values = powerWt;
			trtWeights.push_back(shift);
			trtWeights.push_back(scale);
			trtWeights.push_back(power);
			// Add the batch norm layers
			nvinfer1::IScaleLayer* bn = network->addScale(
			*conv->getOutput(0), nvinfer1::ScaleMode::kCHANNEL, shift, scale, power);
			assert(bn != nullptr);
			std::string bnLayerName = "batch_norm_" + std::to_string(layerIdx);
			bn->setName(bnLayerName.c_str());
			/*** ACTIVATION LAYER ***/
			/****************************/
			auto creator = getPluginRegistry()->getPluginCreator("Mish_TRT", "1");
			const nvinfer1::PluginFieldCollection* pluginData = creator->getFieldNames();
			nvinfer1::IPluginV2 *pluginObj = creator->createPlugin(("mish" + std::to_string(layerIdx)).c_str(), pluginData);
			nvinfer1::ITensor* inputTensors[] = { bn->getOutput(0) };
			auto mish = network->addPluginV2(&inputTensors[0], 1, *pluginObj);
			return mish;
			}

			int getNumChannels(nvinfer1::ITensor* t)
			{
			nvinfer1::Dims d = t->getDimensions();
			assert(d.nbDims == 3);

			return d.d[0];
			}

			std::vector<int> split_layer_index(const std::string &s_,const std::string &delimiter_)
			{
			std::vector<int> index;
			std::string s = s_;
			size_t pos = 0;
			std::string token;
			while ((pos = s.find(delimiter_)) != std::string::npos)
			{
			token = s.substr(0, pos);
			index.push_back(std::stoi(trim(token)));
			s.erase(0, pos + delimiter_.length());
			}
			index.push_back(std::stoi(trim(s)));
			return index;
			}

			void printLayerInfo(std::string layerIndex, std::string layerName, std::string layerInput,
			std::string layerOutput, std::string weightPtr)
			{
			std::cout << std::setw(6) << std::left << layerIndex << std::setw(15) << std::left << layerName;
			std::cout << std::setw(20) << std::left << layerInput << std::setw(20) << std::left
			<< layerOutput;
			std::cout << std::setw(6) << std::left << weightPtr << std::endl;
			}

			uint64_t get3DTensorVolume(nvinfer1::Dims inputDims)
			{
			assert(inputDims.nbDims == 3);
			@@ -216,3 +515,236 @@
			}
			return out;
			}

			nvinfer1::ILayer* netAddUpsample(int layerIdx, std::map<std::string, std::string>& block,
			std::vector<float>& weights,
			std::vector<nvinfer1::Weights>& trtWeights, int& inputChannels,
			nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network)
			{
			assert(block.at("type") == "upsample");
			nvinfer1::Dims inpDims = input->getDimensions();
			assert(inpDims.nbDims == 3);
			// assert(inpDims.d[1] == inpDims.d[2]);
			int n_scale = std::stoi(block.at("stride"));

			int c1 = inpDims.d[0];
			float deval = new float[c1n_scale*n_scale];
			for (int i = 0; i < c1n_scalen_scale; i++)
			{
			deval[i] = 1.0;
			}
			nvinfer1::Weights wts{ DataType::kFLOAT, deval, c1n_scalen_scale };
			nvinfer1::Weights bias{ DataType::kFLOAT, nullptr, 0 };
			IDeconvolutionLayer* upsample = network->addDeconvolutionNd(*input, c1, DimsHW{ n_scale, n_scale }, wts, bias);
			upsample->setStrideNd(DimsHW{ n_scale, n_scale });
			upsample->setNbGroups(c1);
			return upsample;

			#if 0
			#endif
			}

			nvinfer1::ILayer * layer_split(const int n_layer_index_,
			nvinfer1::ITensor *input_,
			nvinfer1::INetworkDefinition* network)
			{
			auto creator = getPluginRegistry()->getPluginCreator("CHUNK_TRT", "1.0");
			const nvinfer1::PluginFieldCollection* pluginData = creator->getFieldNames();
			nvinfer1::IPluginV2 *pluginObj = creator->createPlugin(("chunk" + std::to_string(n_layer_index_)).c_str(), pluginData);
			auto chunk = network->addPluginV2(&input_, 1, *pluginObj);
			return chunk;
			}

			nvinfer1::ILayer* netAddConvBNLeaky(int layerIdx,
			std::map<std::string, std::string>& block,
			std::vector<float>& weights,
			std::vector<nvinfer1::Weights>& trtWeights,
			int& weightPtr,
			int& inputChannels,
			nvinfer1::ITensor* input,
			nvinfer1::INetworkDefinition* network)
			{
			assert(block.at("type") == "convolutional");
			assert(block.find("batch_normalize") != block.end());
			assert(block.at("batch_normalize") == "1");
			assert(block.at("activation") == "leaky");
			assert(block.find("filters") != block.end());
			assert(block.find("pad") != block.end());
			assert(block.find("size") != block.end());
			assert(block.find("stride") != block.end());

			bool batchNormalize, bias;
			if (block.find("batch_normalize") != block.end())
			{
			batchNormalize = (block.at("batch_normalize") == "1");
			bias = false;
			}
			else
			{
			batchNormalize = false;
			bias = true;
			}
			// all conv_bn_leaky layers assume bias is false
			assert(batchNormalize == true && bias == false);

			int filters = std::stoi(block.at("filters"));
			int padding = std::stoi(block.at("pad"));
			int kernelSize = std::stoi(block.at("size"));
			int stride = std::stoi(block.at("stride"));
			int pad;
			if (padding)
			pad = (kernelSize - 1) / 2;
			else
			pad = 0;

			/*** CONVOLUTION LAYER ***/
			/*****************************/
			// batch norm weights are before the conv layer
			// load BN biases (bn_biases)
			std::vector<float> bnBiases;
			for (int i = 0; i < filters; ++i)
			{
			bnBiases.push_back(weights[weightPtr]);
			weightPtr++;
			}
			// load BN weights
			std::vector<float> bnWeights;
			for (int i = 0; i < filters; ++i)
			{
			bnWeights.push_back(weights[weightPtr]);
			weightPtr++;
			}
			// load BN running_mean
			std::vector<float> bnRunningMean;
			for (int i = 0; i < filters; ++i)
			{
			bnRunningMean.push_back(weights[weightPtr]);
			weightPtr++;
			}
			// load BN running_var
			std::vector<float> bnRunningVar;
			for (int i = 0; i < filters; ++i)
			{
			// 1e-05 for numerical stability
			bnRunningVar.push_back(sqrt(weights[weightPtr] + 1.0e-5f));
			weightPtr++;
			}
			// load Conv layer weights (GKCRS)
			int size = filters * inputChannels * kernelSize * kernelSize;
			nvinfer1::Weights convWt{nvinfer1::DataType::kFLOAT, nullptr, size};
			float* val = new float[size];
			for (int i = 0; i < size; ++i)
			{
			val[i] = weights[weightPtr];
			weightPtr++;
			}
			convWt.values = val;
			trtWeights.push_back(convWt);
			nvinfer1::Weights convBias{nvinfer1::DataType::kFLOAT, nullptr, 0};
			trtWeights.push_back(convBias);
			nvinfer1::IConvolutionLayer* conv = network->addConvolution(
			*input,
			filters,
			nvinfer1::DimsHW{kernelSize, kernelSize},
			convWt,
			convBias);
			assert(conv != nullptr);
			std::string convLayerName = "conv_" + std::to_string(layerIdx);
			conv->setName(convLayerName.c_str());
			conv->setStride(nvinfer1::DimsHW{stride, stride});
			conv->setPadding(nvinfer1::DimsHW{pad, pad});

			/*** BATCHNORM LAYER ***/
			/***************************/
			size = filters;
			// create the weights
			nvinfer1::Weights shift{nvinfer1::DataType::kFLOAT, nullptr, size};
			nvinfer1::Weights scale{nvinfer1::DataType::kFLOAT, nullptr, size};
			nvinfer1::Weights power{nvinfer1::DataType::kFLOAT, nullptr, size};
			float* shiftWt = new float[size];
			for (int i = 0; i < size; ++i)
			{
			shiftWt[i]
			= bnBiases.at(i) - ((bnRunningMean.at(i) * bnWeights.at(i)) / bnRunningVar.at(i));
			}
			shift.values = shiftWt;
			float* scaleWt = new float[size];
			for (int i = 0; i < size; ++i)
			{
			scaleWt[i] = bnWeights.at(i) / bnRunningVar[i];
			}
			scale.values = scaleWt;
			float* powerWt = new float[size];
			for (int i = 0; i < size; ++i)
			{
			powerWt[i] = 1.0;
			}
			power.values = powerWt;
			trtWeights.push_back(shift);
			trtWeights.push_back(scale);
			trtWeights.push_back(power);
			// Add the batch norm layers
			nvinfer1::IScaleLayer* bn = network->addScale(
			*conv->getOutput(0), nvinfer1::ScaleMode::kCHANNEL, shift, scale, power);
			assert(bn != nullptr);
			std::string bnLayerName = "batch_norm_" + std::to_string(layerIdx);
			bn->setName(bnLayerName.c_str());
			/*** ACTIVATION LAYER ***/
			/****************************/
			auto leaky = network->addActivation(*bn->getOutput(0),nvinfer1::ActivationType::kLEAKY_RELU);
			leaky->setAlpha(0.1f);
			/nvinfer1::IPlugin leakyRELU = nvinfer1::plugin::createPReLUPlugin(0.1);
			assert(leakyRELU != nullptr);
			nvinfer1::ITensor* bnOutput = bn->getOutput(0);
			nvinfer1::IPluginLayer* leaky = network->addPlugin(&bnOutput, 1, leakyRELU);/
			assert(leaky != nullptr);
			std::string leakyLayerName = "leaky_" + std::to_string(layerIdx);
			leaky->setName(leakyLayerName.c_str());

			return leaky;
			}


			std::vector<std::string> loadListFromTextFile(const std::string filename)
			{
			assert(fileExists(filename));
			std::vector<std::string> list;

			std::ifstream f(filename);
			if (!f)
			{
			std::cout << "failed to open " << filename;
			assert(0);
			}

			std::string line;
			while (std::getline(f, line))
			{
			if (line.empty())
			continue;

			else
			list.push_back(trim(line));
			}

			return list;
			}
			std::vector<std::string> loadImageList(const std::string filename, const std::string prefix)
			{
			std::vector<std::string> fileList = loadListFromTextFile(filename);
			for (auto& file : fileList)
			{
			if (fileExists(file, false))
			continue;
			else
			{
			std::string prefixed = prefix + file;
			if (fileExists(prefixed, false))
			file = prefixed;
			else
			std::cerr << "WARNING: couldn't find: " << prefixed
			<< " while loading: " << filename << std::endl;
			}
			}
			return fileList;
			}

			@@ -67,6 +67,34 @@
			}
			}
			};
			nvinfer1::ILayer* netAddMaxpool(int layerIdx, std::map<std::string, std::string>& block,
			nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network);
			nvinfer1::ILayer* netAddConvLinear(int layerIdx, std::map<std::string, std::string>& block,
			std::vector<float>& weights,
			std::vector<nvinfer1::Weights>& trtWeights, int& weightPtr,
			int& inputChannels, nvinfer1::ITensor* input,
			nvinfer1::INetworkDefinition* network);

			nvinfer1::ILayer* net_conv_bn_mish(int layerIdx,
			std::map<std::string, std::string>& block,
			std::vector<float>& weights,
			std::vector<nvinfer1::Weights>& trtWeights,
			int& weightPtr,
			int& inputChannels,
			nvinfer1::ITensor* input,
			nvinfer1::INetworkDefinition* network);

			nvinfer1::ILayer* netAddConvBNLeaky(int layerIdx, std::map<std::string, std::string>& block,
			std::vector<float>& weights,
			std::vector<nvinfer1::Weights>& trtWeights, int& weightPtr,
			int& inputChannels, nvinfer1::ITensor* input,
			nvinfer1::INetworkDefinition* network);
			std::string dimsToString(const nvinfer1::Dims d);
			std::vector<float> loadWeights(const std::string weightsFilePath, const std::string& networkType);
			int getNumChannels(nvinfer1::ITensor* t);
			std::string trim(std::string s);
			std::string triml(std::string s, const char* t);
			std::string trimr(std::string s, const char* t);
			float clamp(const float val, const float minVal, const float maxVal);
			// Common helper functions
			cv::Mat blobFromDsImages(const std::vector<DsImage>& inputImages, const int& inputH,
			@@ -77,9 +105,53 @@
			nvinfer1::ICudaEngine* loadTRTEngine(const std::string planFilePath, PluginFactory* pluginFactory,
			Logger& logger);
			uint64_t get3DTensorVolume(nvinfer1::Dims inputDims);

			std::vector<std::string> loadImageList(const std::string filename, const std::string prefix);
			std::vector<BBoxInfo> nmsAllClasses(const float nmsThresh, std::vector<BBoxInfo>& binfo,
			const uint32_t numClasses, const std::string &model_type);

			void printLayerInfo(std::string layerIndex, std::string layerName, std::string layerInput,
			std::string layerOutput, std::string weightPtr);
			std::vector<BBoxInfo> nonMaximumSuppression(const float nmsThresh, std::vector<BBoxInfo> binfo);
			std::vector<int> split_layer_index(const std::string &s_,const std::string &delimiter_);
			nvinfer1::ILayer * layer_split(const int n_layer_index_,
			nvinfer1::ITensor *input_,
			nvinfer1::INetworkDefinition* network);
			nvinfer1::ILayer* netAddUpsample(int layerIdx, std::map<std::string, std::string>& block,
			std::vector<float>& weights,
			std::vector<nvinfer1::Weights>& trtWeights, int& inputChannels,
			nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network);
			std::vector<std::string> loadListFromTextFile(const std::string filename);
			class YoloTinyMaxpoolPaddingFormula : public nvinfer1::IOutputDimensionsFormula
			{
			private:
			std::set<std::string> m_SamePaddingLayers;

			nvinfer1::DimsHW compute(nvinfer1::DimsHW inputDims, nvinfer1::DimsHW kernelSize,
			nvinfer1::DimsHW stride, nvinfer1::DimsHW padding,
			nvinfer1::DimsHW dilation, const char* layerName) const override
			{
			// assert(inputDims.d[0] == inputDims.d[1]);
			assert(kernelSize.d[0] == kernelSize.d[1]);
			assert(stride.d[0] == stride.d[1]);
			assert(padding.d[0] == padding.d[1]);

			int output_h, output_w;
			// Only layer maxpool_12 makes use of same padding
			if (m_SamePaddingLayers.find(layerName) != m_SamePaddingLayers.end())
			{
			output_h = (inputDims.d[0] + 2 * padding.d[0]) / stride.d[0];
			output_w = (inputDims.d[1] + 2 * padding.d[1]) / stride.d[1];
			}
			// Valid Padding
			else
			{
			output_h = (inputDims.d[0] - kernelSize.d[0]) / stride.d[0] + 1;
			output_w = (inputDims.d[1] - kernelSize.d[1]) / stride.d[1] + 1;
			}
			return nvinfer1::DimsHW{output_h, output_w};
			}

			public:
			void addSamePaddingLayer(std::string input) { m_SamePaddingLayers.insert(input); }
			};

			#endif

			@@ -8,6 +8,8 @@
			using namespace std;

			string m_staticStruct::model_path = "path";
			string m_staticStruct::model_cfg = "0"; // 初始化结构体静态变量值
			string m_staticStruct::model_wts = "0"; // 初始化结构体静态变量值
			int m_staticStruct::type = 1;
			bool m_staticStruct::isTrack= true;
			int m_staticStruct::max_cam_num = 0;
			@@ -116,6 +118,8 @@
			if(reader.parse(in,root))
			{
			std::string model_path = root["param"]["model_path"].asString();
			std::string model_cfg= root["param"]["model_cfg"].asString();
			std::string model_wts = root["param"]["model_wts"].asString();
			int type = root["param"]["type"].asInt();
			bool isTrack = root["param"]["isTrack"].asBool();
			int max_cam_num = root["param"]["max_cam_num"].asInt();
			@@ -123,6 +127,8 @@
			int mv_velocity = root["param"]["mv_velocity"].asFloat();
			int fall_rate = root["param"]["fall_rate"].asFloat();
			m_staticStruct::model_path = model_path;
			m_staticStruct::model_cfg = model_cfg;
			m_staticStruct::model_wts = model_wts;
			m_staticStruct::type = type;
			m_staticStruct::isTrack = isTrack;
			m_staticStruct::max_cam_num = max_cam_num;