From 9b1532d86c2cf48a63017f3460897d8d14b98b60 Mon Sep 17 00:00:00 2001
From: Scheaven <xuepengqiang>
Date: 星期三, 08 九月 2021 17:45:25 +0800
Subject: [PATCH] coding

---
 config.json                       |    2 
 src/config.h                      |    2 
 src/detecter_tools/trt_utils.h    |   76 +++
 src/detecter_tools/calibrator.h   |   62 ++
 src/detecter_tools/detector.cpp   |   13 
 src/detecter_tools/trt_utils.cpp  |  580 +++++++++++++++++++++++-
 src/h_interface.cpp               |    6 
 src/detecter_tools/calibrator.cpp |  114 ++++
 src/detecter_tools/model.cpp      |  524 +++++++++++++++++++++
 src/detecter_tools/model.h        |   13 
 CMakeLists.txt                    |    4 
 11 files changed, 1,355 insertions(+), 41 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 362ad2c..72caba6 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -3,8 +3,8 @@
 enable_language(CUDA)
 
 set(CMAKE_CXX_COMPILIER "/usr/bin/g++")
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14 -O3 -DS_DEBUG -Wno-write-strings")
-set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -DS_DEBUG -Wl,-rpath -Wl,$ORIGIN")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14 -O3 -Wno-write-strings")
+set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-rpath -Wl,$ORIGIN")
 
 set(CUDA_Path /usr/local/cuda)
 find_package(OpenCV REQUIRED PATHS "/data/disk2/opt/01_opencv/opencv4.5.2")
diff --git a/config.json b/config.json
index 13fc379..28410e1 100644
--- a/config.json
+++ b/config.json
@@ -3,6 +3,8 @@
   "runtime": "/opt/vasystem/libs/Detect:/usr/local/cuda-11.1/lib64:",  // 椤圭洰鎵�鐢ㄥ埌鐨勭幆澧�
   "param": {
     "model_path": "/data/disk1/project/model_dump/02_yolo/baseDetect-kFLOAT-batch1.engine", // para閲岃竟鑷繁绠楁硶鍙兘鐢ㄥ埌鐨勫弬鏁�
+    "model_cfg": "/data/disk1/project/model_dump/02_yolo/cfg/baseDetector.cfg", // para閲岃竟鑷繁绠楁硶鍙兘鐢ㄥ埌鐨勫弬鏁�
+    "model_wts": "/data/disk1/project/model_dump/02_yolo/baseDetector.weights", // para閲岃竟鑷繁绠楁硶鍙兘鐢ㄥ埌鐨勫弬鏁�
     //"model_path": "/data/disk1/project/model_dump/02_yolo/baseDetetor_small.bin", // para閲岃竟鑷繁绠楁硶鍙兘鐢ㄥ埌鐨勫弬鏁�
     "type":1,
     "max_cam_num": 8,
diff --git a/src/config.h b/src/config.h
index e7f6f80..9673e66 100644
--- a/src/config.h
+++ b/src/config.h
@@ -51,6 +51,8 @@
 typedef struct m_staticStruct
 {
     static std::string model_path;
+    static std::string model_cfg;
+    static std::string model_wts;
     static int type;
     static bool isTrack;
     static int max_cam_num;
diff --git a/src/detecter_tools/calibrator.cpp b/src/detecter_tools/calibrator.cpp
new file mode 100644
index 0000000..0a9bf2f
--- /dev/null
+++ b/src/detecter_tools/calibrator.cpp
@@ -0,0 +1,114 @@
+/**
+MIT License
+
+Copyright (c) 2018 NVIDIA CORPORATION. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+*
+*/
+
+#include "calibrator.h"
+#include <fstream>
+#include <iostream>
+#include <iterator>
+
+Int8EntropyCalibrator::Int8EntropyCalibrator(const uint32_t& batchSize, const std::string& calibImages,
+    const std::string& calibImagesPath,
+    const std::string& calibTableFilePath,
+    const uint64_t& inputSize, const uint32_t& inputH,
+    const uint32_t& inputW, const std::string& inputBlobName,
+    const std::string &s_net_type_) :
+    m_BatchSize(batchSize),
+    m_InputH(inputH),
+    m_InputW(inputW),
+    m_InputSize(inputSize),
+    m_InputCount(batchSize * inputSize),
+    m_InputBlobName(inputBlobName),
+    m_CalibTableFilePath(calibTableFilePath),
+    m_ImageIndex(0),
+    _s_net_type(s_net_type_)
+{
+    if (!fileExists(m_CalibTableFilePath, false))
+    {
+        m_ImageList = loadImageList(calibImages, calibImagesPath);
+        m_ImageList.resize(static_cast<int>(m_ImageList.size() / m_BatchSize) * m_BatchSize);
+        std::random_shuffle(m_ImageList.begin(), m_ImageList.end(),
+                            [](int i) { return rand() % i; });
+    }
+
+    NV_CUDA_CHECK(cudaMalloc(&m_DeviceInput, m_InputCount * sizeof(float)));
+}
+
+Int8EntropyCalibrator::~Int8EntropyCalibrator() { NV_CUDA_CHECK(cudaFree(m_DeviceInput)); }
+
+bool Int8EntropyCalibrator::getBatch(void* bindings[], const char* names[], int nbBindings)
+{
+    if (m_ImageIndex + m_BatchSize >= m_ImageList.size()) return false;
+
+    // Load next batch
+    std::vector<DsImage> dsImages(m_BatchSize);
+    for (uint32_t j = m_ImageIndex; j < m_ImageIndex + m_BatchSize; ++j)
+    {
+        dsImages.at(j - m_ImageIndex) = DsImage(m_ImageList.at(j), _s_net_type, m_InputH, m_InputW);
+    }
+    m_ImageIndex += m_BatchSize;
+
+    cv::Mat trtInput = blobFromDsImages(dsImages, m_InputH, m_InputW);
+
+    NV_CUDA_CHECK(cudaMemcpy(m_DeviceInput, trtInput.ptr<float>(0), m_InputCount * sizeof(float),
+                             cudaMemcpyHostToDevice));
+    assert(!strcmp(names[0], m_InputBlobName.c_str()));
+    bindings[0] = m_DeviceInput;
+    return true;
+}
+
+const void* Int8EntropyCalibrator::readCalibrationCache(size_t& length)
+{
+    void* output;
+    m_CalibrationCache.clear();
+    assert(!m_CalibTableFilePath.empty());
+    std::ifstream input(m_CalibTableFilePath, std::ios::binary | std::ios::in);
+    input >> std::noskipws;
+    if (m_ReadCache && input.good())
+        std::copy(std::istream_iterator<char>(input), std::istream_iterator<char>(),
+                  std::back_inserter(m_CalibrationCache));
+
+    length = m_CalibrationCache.size();
+    if (length)
+    {
+        std::cout << "Using cached calibration table to build the engine" << std::endl;
+        output = &m_CalibrationCache[0];
+    }
+
+    else
+    {
+        std::cout << "New calibration table will be created to build the engine" << std::endl;
+        output = nullptr;
+    }
+
+    return output;
+}
+
+void Int8EntropyCalibrator::writeCalibrationCache(const void* cache, size_t length)
+{
+    assert(!m_CalibTableFilePath.empty());
+    std::ofstream output(m_CalibTableFilePath, std::ios::binary);
+    output.write(reinterpret_cast<const char*>(cache), length);
+    output.close();
+}
diff --git a/src/detecter_tools/calibrator.h b/src/detecter_tools/calibrator.h
new file mode 100644
index 0000000..4eb44a7
--- /dev/null
+++ b/src/detecter_tools/calibrator.h
@@ -0,0 +1,62 @@
+/**
+MIT License
+
+Copyright (c) 2018 NVIDIA CORPORATION. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+*
+*/
+#ifndef _CALIBRATOR_H_
+#define _CALIBRATOR_H_
+
+#include "NvInfer.h"
+#include "ds_image.h"
+#include "trt_utils.h"
+
+class Int8EntropyCalibrator : public nvinfer1::IInt8EntropyCalibrator2
+{
+public:
+    Int8EntropyCalibrator(const uint32_t& batchSize, const std::string& calibImages,
+                          const std::string& calibImagesPath, const std::string& calibTableFilePath,
+                          const uint64_t& inputSize, const uint32_t& inputH, const uint32_t& inputW,
+              const std::string& inputBlobName,const std::string &s_net_type_);
+    virtual ~Int8EntropyCalibrator();
+
+    int getBatchSize() const override { return m_BatchSize; }
+    bool getBatch(void* bindings[], const char* names[], int nbBindings) override;
+    const void* readCalibrationCache(size_t& length) override;
+    void writeCalibrationCache(const void* cache, size_t length) override;
+
+private:
+    const uint32_t m_BatchSize;
+    const uint32_t m_InputH;
+    const uint32_t m_InputW;
+    const uint64_t m_InputSize;
+    const uint64_t m_InputCount;
+    const std::string m_InputBlobName;
+  const std::string _s_net_type;
+    const std::string m_CalibTableFilePath{nullptr};
+    uint32_t m_ImageIndex;
+    bool m_ReadCache{true};
+    void* m_DeviceInput{nullptr};
+    std::vector<std::string> m_ImageList;
+    std::vector<char> m_CalibrationCache;
+};
+
+#endif
diff --git a/src/detecter_tools/detector.cpp b/src/detecter_tools/detector.cpp
index 78e136f..be5f86a 100644
--- a/src/detecter_tools/detector.cpp
+++ b/src/detecter_tools/detector.cpp
@@ -31,10 +31,10 @@
         vec_ds_images.emplace_back(img, _vec_net_type[_config.net_type], _p_net->getInputH(), _p_net->getInputW());
     }
     cv::Mat trtInput = blobFromDsImages(vec_ds_images, _p_net->getInputH(),_p_net->getInputW());
-    timer.out("pre");
+    timer.out("eve pre detect ");
 
-    _p_net->doInference(trtInput.data, vec_ds_images.size());
     timer.reset();
+    _p_net->doInference(trtInput.data, vec_ds_images.size());
     for (uint32_t i = 0; i < vec_ds_images.size(); ++i)
     {
         auto curImage = vec_ds_images.at(i);
@@ -65,9 +65,8 @@
         }
         vec_batch_result[i] = vec_result;
     }
-    timer.out("post");
+    timer.out("eve pre detect post");
     DEBUG("--detect over--" );
-
 }
 
 void Detector::set_gpu_id(const int id)
@@ -95,9 +94,9 @@
 
 void Detector::build_net()
 {
-    if(_config.net_type == SMALL)
-        _p_net = std::unique_ptr<Detecter>{new Detecter(_info,_infer_param,2)};
-    else{
+    if(_config.net_type == COMMON)
         _p_net = std::unique_ptr<Detecter>{new Detecter(_info,_infer_param,1)};
+    else{
+        _p_net = std::unique_ptr<Detecter>{new Detecter(_info,_infer_param,2)};
     }
 }
diff --git a/src/detecter_tools/model.cpp b/src/detecter_tools/model.cpp
index c548561..0bccfb3 100644
--- a/src/detecter_tools/model.cpp
+++ b/src/detecter_tools/model.cpp
@@ -11,6 +11,7 @@
 
 
 Detecter::Detecter( const NetworkInfo& networkInfo, const InferParams& inferParams, int type) :
+    m_NetworkType(networkInfo.networkType),
     m_InputBlobName(networkInfo.inputBlobName),
     m_InputH(416),
     m_InputW(416),
@@ -26,10 +27,17 @@
     m_Context(nullptr),
     m_InputBindingIndex(-1),
     m_CudaStream(nullptr),
-    m_PluginFactory(new PluginFactory)
+    m_PluginFactory(new PluginFactory),
+    m_TinyMaxpoolPaddingFormula(new YoloTinyMaxpoolPaddingFormula)
 {
-    setOutput(type);
     m_EnginePath = m_staticStruct::model_path;
+    if(!fileExists(m_EnginePath))
+    {
+        m_configBlocks = parseConfigFile(m_staticStruct::model_cfg);
+        parseConfigBlocks();
+        createYOLOEngine();
+    }
+    setOutput(type);
     DEBUG((boost::format("m_EnginePath:%s")%m_EnginePath).str());
     assert(m_PluginFactory != nullptr);
     m_Engine = loadTRTEngine(m_EnginePath, m_PluginFactory, m_Logger);
@@ -67,7 +75,482 @@
         m_PluginFactory = nullptr;
     }
 
-    // m_TinyMaxpoolPaddingFormula.reset();
+    m_TinyMaxpoolPaddingFormula.reset();
+}
+
+std::vector<std::map<std::string, std::string>> Detecter::parseConfigFile(const std::string cfgFilePath)
+{
+    std::cout << "::::::::::" << cfgFilePath <<std::endl;
+    assert(fileExists(cfgFilePath));
+    std::ifstream file(cfgFilePath);
+    assert(file.good());
+    std::string line;
+    std::vector<std::map<std::string, std::string>> blocks;
+    std::map<std::string, std::string> block;
+
+    while (getline(file, line))
+    {
+        if (line.empty()) continue;
+        if (line.front() == '#') continue;
+        line = trim(line);
+        if (line.front() == '[')
+        {
+            if (!block.empty())
+            {
+                blocks.push_back(block);
+                block.clear();
+            }
+            std::string key = "type";
+            std::string value = trim(line.substr(1, line.size() - 2));
+            block.insert(std::pair<std::string, std::string>(key, value));
+        }
+        else
+        {
+            size_t cpos = line.find('=');
+            std::string key = trim(line.substr(0, cpos));
+            std::string value = trim(line.substr(cpos + 1));
+            block.insert(std::pair<std::string, std::string>(key, value));
+        }
+    }
+    blocks.push_back(block);
+    return blocks;
+}
+
+void Detecter::parseConfigBlocks()
+{
+    for (auto block : m_configBlocks)
+    {
+        if (block.at("type") == "net")
+        {
+            assert((block.find("height") != block.end())
+                   && "Missing 'height' param in network cfg");
+            assert((block.find("width") != block.end()) && "Missing 'width' param in network cfg");
+            assert((block.find("channels") != block.end())
+                   && "Missing 'channels' param in network cfg");
+            assert((block.find("batch") != block.end())
+                   && "Missing 'batch' param in network cfg");
+
+            m_InputH = std::stoul(trim(block.at("height")));
+            m_InputW = std::stoul(trim(block.at("width")));
+            m_InputC = std::stoul(trim(block.at("channels")));
+            m_BatchSize = std::stoi(trim(block.at("batch")));
+         //   assert(m_InputW == m_InputH);
+            m_InputSize = m_InputC * m_InputH * m_InputW;
+        }
+        else if ((block.at("type") == "region") || (block.at("type") == "yolo"))
+        {
+            assert((block.find("num") != block.end())
+                   && std::string("Missing 'num' param in " + block.at("type") + " layer").c_str());
+            assert((block.find("classes") != block.end())
+                   && std::string("Missing 'classes' param in " + block.at("type") + " layer")
+                          .c_str());
+            assert((block.find("anchors") != block.end())
+                   && std::string("Missing 'anchors' param in " + block.at("type") + " layer")
+                          .c_str());
+
+            TensorInfo outputTensor;
+            std::string anchorString = block.at("anchors");
+            while (!anchorString.empty())
+            {
+                size_t npos = anchorString.find_first_of(',');
+                if (npos != std::string::npos)
+                {
+                    float anchor = std::stof(trim(anchorString.substr(0, npos)));
+                    outputTensor.anchors.push_back(anchor);
+                    anchorString.erase(0, npos + 1);
+                }
+                else
+                {
+                    float anchor = std::stof(trim(anchorString));
+                    outputTensor.anchors.push_back(anchor);
+                    break;
+                }
+            }
+
+            assert((block.find("mask") != block.end())
+                   && std::string("Missing 'mask' param in " + block.at("type") + " layer")
+                          .c_str());
+
+            std::string maskString = block.at("mask");
+            while (!maskString.empty())
+            {
+                size_t npos = maskString.find_first_of(',');
+                if (npos != std::string::npos)
+                {
+                    uint32_t mask = std::stoul(trim(maskString.substr(0, npos)));
+                    outputTensor.masks.push_back(mask);
+                    maskString.erase(0, npos + 1);
+                }
+                else
+                {
+                    uint32_t mask = std::stoul(trim(maskString));
+                    outputTensor.masks.push_back(mask);
+                    break;
+                }
+            }
+
+            outputTensor.numBBoxes = outputTensor.masks.size() > 0
+                ? outputTensor.masks.size()
+                : std::stoul(trim(block.at("num")));
+            outputTensor.numClasses = std::stoul(block.at("classes"));
+            if (m_ClassNames.empty())
+            {
+                for (uint32_t i=0;i< outputTensor.numClasses;++i)
+                {
+                    m_ClassNames.push_back(std::to_string(i));
+                }
+            }
+            outputTensor.blobName = "yolo_" + std::to_string(_n_yolo_ind);
+            outputTensor.gridSize = (m_InputH / 32) * pow(2, _n_yolo_ind);
+            outputTensor.grid_h = (m_InputH / 32) * pow(2, _n_yolo_ind);
+            outputTensor.grid_w = (m_InputW / 32) * pow(2, _n_yolo_ind);
+            outputTensor.gridSize = (m_InputH / 32) * pow(2, 2-_n_yolo_ind);
+            outputTensor.grid_h = (m_InputH / 32) * pow(2, 2-_n_yolo_ind);
+            outputTensor.grid_w = (m_InputW / 32) * pow(2, 2-_n_yolo_ind);
+            outputTensor.stride = m_InputH / outputTensor.gridSize;
+            outputTensor.stride_h = m_InputH / outputTensor.grid_h;
+            outputTensor.stride_w = m_InputW / outputTensor.grid_w;
+            outputTensor.volume = outputTensor.grid_h* outputTensor.grid_w
+                *(outputTensor.numBBoxes*(5 + outputTensor.numClasses));
+            m_OutputTensors.push_back(outputTensor);
+            _n_yolo_ind++;
+        }
+    }
+}
+
+void Detecter::createYOLOEngine(const nvinfer1::DataType dataType, Int8EntropyCalibrator* calibrator)
+{
+    if (fileExists(m_EnginePath))return;
+    std::vector<float> weights = loadWeights(m_staticStruct::model_wts, m_NetworkType);
+    std::vector<nvinfer1::Weights> trtWeights;
+    int weightPtr = 0;
+    int channels = m_InputC;
+    m_Builder = nvinfer1::createInferBuilder(m_Logger);
+    nvinfer1::IBuilderConfig* config = m_Builder->createBuilderConfig();
+    m_Network = m_Builder->createNetworkV2(0U);
+    if ((dataType == nvinfer1::DataType::kINT8 && !m_Builder->platformHasFastInt8())
+        || (dataType == nvinfer1::DataType::kHALF && !m_Builder->platformHasFastFp16()))
+    {
+        std::cout << "Platform doesn't support this precision." << std::endl;
+        assert(0);
+    }
+
+    nvinfer1::ITensor* data = m_Network->addInput(
+        m_InputBlobName.c_str(), nvinfer1::DataType::kFLOAT,
+        nvinfer1::DimsCHW{static_cast<int>(m_InputC), static_cast<int>(m_InputH),
+                          static_cast<int>(m_InputW)});
+    assert(data != nullptr);
+    // Add elementwise layer to normalize pixel values 0-1
+    nvinfer1::Dims divDims{
+        3,
+        {static_cast<int>(m_InputC), static_cast<int>(m_InputH), static_cast<int>(m_InputW)},
+        {nvinfer1::DimensionType::kCHANNEL, nvinfer1::DimensionType::kSPATIAL,
+         nvinfer1::DimensionType::kSPATIAL}};
+    nvinfer1::Weights divWeights{nvinfer1::DataType::kFLOAT, nullptr,
+                                 static_cast<int64_t>(m_InputSize)};
+    float* divWt = new float[m_InputSize];
+    for (uint32_t w = 0; w < m_InputSize; ++w) divWt[w] = 255.0;
+    divWeights.values = divWt;
+    trtWeights.push_back(divWeights);
+    nvinfer1::IConstantLayer* constDivide = m_Network->addConstant(divDims, divWeights);
+    assert(constDivide != nullptr);
+    nvinfer1::IElementWiseLayer* elementDivide = m_Network->addElementWise(
+        *data, *constDivide->getOutput(0), nvinfer1::ElementWiseOperation::kDIV);
+    assert(elementDivide != nullptr);
+
+    nvinfer1::ITensor* previous = elementDivide->getOutput(0);
+    std::vector<nvinfer1::ITensor*> tensorOutputs;
+    uint32_t outputTensorCount = 0;
+
+    // build the network using the network API
+    for (uint32_t i = 0; i < m_configBlocks.size(); ++i)
+    {
+        // check if num. of channels is correct
+        assert(getNumChannels(previous) == channels);
+        std::string layerIndex = "(" + std::to_string(i) + ")";
+
+        if (m_configBlocks.at(i).at("type") == "net")
+        {
+            printLayerInfo("", "layer", "     inp_size", "     out_size", "weightPtr");
+        }
+        else if (m_configBlocks.at(i).at("type") == "convolutional")
+        {
+            std::string inputVol = dimsToString(previous->getDimensions());
+            nvinfer1::ILayer* out;
+            std::string layerType;
+            //check activation
+            std::string activation = "";
+            if (m_configBlocks.at(i).find("activation") != m_configBlocks.at(i).end())
+            {
+                activation = m_configBlocks[i]["activation"];
+            }
+            // check if batch_norm enabled
+            if ((m_configBlocks.at(i).find("batch_normalize") != m_configBlocks.at(i).end()) &&
+                ("leaky" == activation))
+            {
+                out = netAddConvBNLeaky(i, m_configBlocks.at(i), weights, trtWeights, weightPtr,
+                                        channels, previous, m_Network);
+                layerType = "conv-bn-leaky";
+            }
+            else if ((m_configBlocks.at(i).find("batch_normalize") != m_configBlocks.at(i).end()) &&
+                ("mish" == activation))
+            {
+                out = net_conv_bn_mish(i, m_configBlocks.at(i), weights, trtWeights, weightPtr,
+                                        channels, previous, m_Network);
+                layerType = "conv-bn-mish";
+            }
+            else// if("linear" == activation)
+            {
+                out = netAddConvLinear(i, m_configBlocks.at(i), weights, trtWeights, weightPtr,
+                                       channels, previous, m_Network);
+                layerType = "conv-linear";
+            }
+            previous = out->getOutput(0);
+            assert(previous != nullptr);
+            channels = getNumChannels(previous);
+            std::string outputVol = dimsToString(previous->getDimensions());
+            tensorOutputs.push_back(out->getOutput(0));
+            printLayerInfo(layerIndex, layerType, inputVol, outputVol, std::to_string(weightPtr));
+        }
+        else if (m_configBlocks.at(i).at("type") == "shortcut")
+        {
+            assert(m_configBlocks.at(i).at("activation") == "linear");
+            assert(m_configBlocks.at(i).find("from") != m_configBlocks.at(i).end());
+            int from = stoi(m_configBlocks.at(i).at("from"));
+
+            std::string inputVol = dimsToString(previous->getDimensions());
+            // check if indexes are correct
+            assert((i - 2 >= 0) && (i - 2 < tensorOutputs.size()));
+            assert((i + from - 1 >= 0) && (i + from - 1 < tensorOutputs.size()));
+            assert(i + from - 1 < i - 2);
+            nvinfer1::IElementWiseLayer* ew
+                = m_Network->addElementWise(*tensorOutputs[i - 2], *tensorOutputs[i + from - 1],
+                                            nvinfer1::ElementWiseOperation::kSUM);
+            assert(ew != nullptr);
+            std::string ewLayerName = "shortcut_" + std::to_string(i);
+            ew->setName(ewLayerName.c_str());
+            previous = ew->getOutput(0);
+            assert(previous != nullptr);
+            std::string outputVol = dimsToString(previous->getDimensions());
+            tensorOutputs.push_back(ew->getOutput(0));
+            printLayerInfo(layerIndex, "skip", inputVol, outputVol, "    -");
+        }
+        else if (m_configBlocks.at(i).at("type") == "yolo")
+        {
+            nvinfer1::Dims prevTensorDims = previous->getDimensions();
+           // assert(prevTensorDims.d[1] == prevTensorDims.d[2]);
+            TensorInfo& curYoloTensor = m_OutputTensors.at(outputTensorCount);
+            curYoloTensor.gridSize = prevTensorDims.d[1];
+            curYoloTensor.grid_h = prevTensorDims.d[1];
+            curYoloTensor.grid_w = prevTensorDims.d[2];
+            curYoloTensor.stride = m_InputW / curYoloTensor.gridSize;
+            curYoloTensor.stride_h = m_InputH / curYoloTensor.grid_h;
+            curYoloTensor.stride_w = m_InputW / curYoloTensor.grid_w;
+            m_OutputTensors.at(outputTensorCount).volume = curYoloTensor.grid_h
+                * curYoloTensor.grid_w
+                * (curYoloTensor.numBBoxes * (5 + curYoloTensor.numClasses));
+            std::string layerName = "yolo_" + std::to_string(outputTensorCount);
+            curYoloTensor.blobName = layerName;
+            nvinfer1::IPlugin* yoloPlugin
+                = new SLayerV3(m_OutputTensors.at(outputTensorCount).numBBoxes,
+                                  m_OutputTensors.at(outputTensorCount).numClasses,
+                                  m_OutputTensors.at(outputTensorCount).grid_h,
+                                  m_OutputTensors.at(outputTensorCount).grid_w);
+            assert(yoloPlugin != nullptr);
+            nvinfer1::IPluginLayer* yolo = m_Network->addPlugin(&previous, 1, *yoloPlugin);
+            assert(yolo != nullptr);
+            yolo->setName(layerName.c_str());
+            std::string inputVol = dimsToString(previous->getDimensions());
+            previous = yolo->getOutput(0);
+            assert(previous != nullptr);
+            previous->setName(layerName.c_str());
+            std::string outputVol = dimsToString(previous->getDimensions());
+            m_Network->markOutput(*previous);
+            channels = getNumChannels(previous);
+            tensorOutputs.push_back(yolo->getOutput(0));
+            printLayerInfo(layerIndex, "yolo", inputVol, outputVol, std::to_string(weightPtr));
+            ++outputTensorCount;
+        }
+        else if (m_configBlocks.at(i).at("type") == "route")
+        {
+            size_t found = m_configBlocks.at(i).at("layers").find(",");
+            if (found != std::string::npos)//concate multi layers
+            {
+                std::vector<int> vec_index = split_layer_index(m_configBlocks.at(i).at("layers"), ",");
+                for (auto &ind_layer:vec_index)
+                {
+                    if (ind_layer < 0)
+                    {
+                        ind_layer = static_cast<int>(tensorOutputs.size()) + ind_layer;
+                    }
+                    assert(ind_layer < static_cast<int>(tensorOutputs.size()) && ind_layer >= 0);
+                }
+                nvinfer1::ITensor** concatInputs
+                    = reinterpret_cast<nvinfer1::ITensor**>(malloc(sizeof(nvinfer1::ITensor*) * vec_index.size()));
+                for (size_t ind = 0; ind < vec_index.size(); ++ind)
+                {
+                    concatInputs[ind] = tensorOutputs[vec_index[ind]];
+                }
+                nvinfer1::IConcatenationLayer* concat
+                    = m_Network->addConcatenation(concatInputs, static_cast<int>(vec_index.size()));
+                assert(concat != nullptr);
+                std::string concatLayerName = "route_" + std::to_string(i - 1);
+                concat->setName(concatLayerName.c_str());
+                // concatenate along the channel dimension
+                concat->setAxis(0);
+                previous = concat->getOutput(0);
+                assert(previous != nullptr);
+                nvinfer1::Dims debug = previous->getDimensions();
+                std::string outputVol = dimsToString(previous->getDimensions());
+                int nums = 0;
+                for (auto &indx:vec_index)
+                {
+                    nums += getNumChannels(tensorOutputs[indx]);
+                }
+                channels = nums;
+                tensorOutputs.push_back(concat->getOutput(0));
+                printLayerInfo(layerIndex, "route", "        -", outputVol,std::to_string(weightPtr));
+            }
+            else //single layer
+            {
+                int idx = std::stoi(trim(m_configBlocks.at(i).at("layers")));
+                if (idx < 0)
+                {
+                    idx = static_cast<int>(tensorOutputs.size()) + idx;
+                }
+                assert(idx < static_cast<int>(tensorOutputs.size()) && idx >= 0);
+
+                //route
+                if (m_configBlocks.at(i).find("groups") == m_configBlocks.at(i).end())
+                {
+                    previous = tensorOutputs[idx];
+                    assert(previous != nullptr);
+                    std::string outputVol = dimsToString(previous->getDimensions());
+                    // set the output volume depth
+                    channels = getNumChannels(tensorOutputs[idx]);
+                    tensorOutputs.push_back(tensorOutputs[idx]);
+                    printLayerInfo(layerIndex, "route", "        -", outputVol, std::to_string(weightPtr));
+
+                }
+                //yolov4-tiny route split layer
+                else
+                {
+                    if (m_configBlocks.at(i).find("group_id") == m_configBlocks.at(i).end())
+                    {
+                        assert(0);
+                    }
+                    int chunk_idx = std::stoi(trim(m_configBlocks.at(i).at("group_id")));
+                    nvinfer1::ILayer* out = layer_split(i, tensorOutputs[idx], m_Network);
+                    std::string inputVol = dimsToString(previous->getDimensions());
+                    previous = out->getOutput(chunk_idx);
+                    assert(previous != nullptr);
+                    channels = getNumChannels(previous);
+                    std::string outputVol = dimsToString(previous->getDimensions());
+                    tensorOutputs.push_back(out->getOutput(chunk_idx));
+                    printLayerInfo(layerIndex,"chunk", inputVol, outputVol, std::to_string(weightPtr));
+                }
+            }
+        }
+        else if (m_configBlocks.at(i).at("type") == "upsample")
+        {
+            std::string inputVol = dimsToString(previous->getDimensions());
+            nvinfer1::ILayer* out = netAddUpsample(i - 1, m_configBlocks[i], weights, trtWeights,
+                                                   channels, previous, m_Network);
+            previous = out->getOutput(0);
+            std::string outputVol = dimsToString(previous->getDimensions());
+            tensorOutputs.push_back(out->getOutput(0));
+            printLayerInfo(layerIndex, "upsample", inputVol, outputVol, "    -");
+        }
+        else if (m_configBlocks.at(i).at("type") == "maxpool")
+        {
+            // Add same padding layers
+            if (m_configBlocks.at(i).at("size") == "2" && m_configBlocks.at(i).at("stride") == "1")
+            {
+                m_TinyMaxpoolPaddingFormula->addSamePaddingLayer("maxpool_" + std::to_string(i));
+            }
+            std::string inputVol = dimsToString(previous->getDimensions());
+            nvinfer1::ILayer* out = netAddMaxpool(i, m_configBlocks.at(i), previous, m_Network);
+            previous = out->getOutput(0);
+            assert(previous != nullptr);
+            std::string outputVol = dimsToString(previous->getDimensions());
+            tensorOutputs.push_back(out->getOutput(0));
+            printLayerInfo(layerIndex, "maxpool", inputVol, outputVol, std::to_string(weightPtr));
+        }
+        else
+        {
+            std::cout << "Unsupported layer type --> \"" << m_configBlocks.at(i).at("type") << "\""
+                      << std::endl;
+            assert(0);
+        }
+    }
+
+    if (static_cast<int>(weights.size()) != weightPtr)
+    {
+        std::cout << "Number of unused weights left : " << static_cast<int>(weights.size()) - weightPtr << std::endl;
+        assert(0);
+    }
+
+ //   std::cout << "Output blob names :" << std::endl;
+ //   for (auto& tensor : m_OutputTensors) std::cout << tensor.blobName << std::endl;
+
+    // Create and cache the engine if not already present
+    if (fileExists(m_EnginePath))
+    {
+        std::cout << "Using previously generated plan file located at " << m_EnginePath
+                  << std::endl;
+        destroyNetworkUtils(trtWeights);
+        return;
+    }
+
+    /*std::cout << "Unable to find cached TensorRT engine for network : " << m_NetworkType
+              << " precision : " << m_Precision << " and batch size :" << m_BatchSize << std::endl;*/
+
+    m_Builder->setMaxBatchSize(m_BatchSize);
+    //m_Builder->setMaxWorkspaceSize(1 << 20);
+
+    config->setMaxWorkspaceSize(1 << 20);
+    if (dataType == nvinfer1::DataType::kINT8)
+    {
+        assert((calibrator != nullptr) && "Invalid calibrator for INT8 precision");
+      //  m_Builder->setInt8Mode(true);
+        config->setFlag(nvinfer1::BuilderFlag::kINT8);
+     //   m_Builder->setInt8Calibrator(calibrator);
+        config->setInt8Calibrator(calibrator);
+    //  config->setTacticSources(1U << static_cast<uint32_t>(TacticSource::kCUBLAS) | 1U << static_cast<uint32_t>(TacticSource::kCUBLAS_LT));
+    }
+    else if (dataType == nvinfer1::DataType::kHALF)
+    {
+        config->setFlag(nvinfer1::BuilderFlag::kFP16);
+     //   m_Builder->setHalf2Mode(true);
+    }
+
+    m_Builder->allowGPUFallback(true);
+    int nbLayers = m_Network->getNbLayers();
+    int layersOnDLA = 0;
+ //   std::cout << "Total number of layers: " << nbLayers << std::endl;
+    for (int i = 0; i < nbLayers; i++)
+    {
+        nvinfer1::ILayer* curLayer = m_Network->getLayer(i);
+        if (m_DeviceType == "kDLA" && m_Builder->canRunOnDLA(curLayer))
+        {
+            m_Builder->setDeviceType(curLayer, nvinfer1::DeviceType::kDLA);
+            layersOnDLA++;
+            std::cout << "Set layer " << curLayer->getName() << " to run on DLA" << std::endl;
+        }
+    }
+ //   std::cout << "Total number of layers on DLA: " << layersOnDLA << std::endl;
+
+    // Build the engine
+    std::cout << "Building the TensorRT Engine..." << std::endl;
+    m_Engine = m_Builder->buildEngineWithConfig(*m_Network,*config);
+    assert(m_Engine != nullptr);
+    std::cout << "Building complete!" << std::endl;
+
+    // Serialize the engine
+    writePlanFileToDisk();
+
+    // destroy
+    destroyNetworkUtils(trtWeights);
 }
 
 void Detecter::doInference(const unsigned char* input, const uint32_t batchSize)
@@ -268,10 +751,10 @@
 void Detecter::setOutput(int type)
 {
     m_OutputTensors.clear();
+    printf("0-0-0-0-0-0------------------%d",type);
     if(type==2)
         for (int i = 0; i < 2; ++i)
         {
-
             TensorInfo outputTensor;
             outputTensor.numClasses = CLASS_BUM;
             outputTensor.blobName = "yolo_" + std::to_string(i);
@@ -323,7 +806,17 @@
         {
             TensorInfo outputTensor;
             outputTensor.numClasses = CLASS_BUM;
-            outputTensor.blobName = "yolo_" + std::to_string(i);
+            outputTensor.blobName = "yolo_" + to_string(i);
+            // if (i==0)
+            // {
+            //     outputTensor.blobName = "139_convolutional_reshape_2";
+            // }else if (i==1)
+            // {
+            //     outputTensor.blobName = "150_convolutional_reshape_2";
+            // }else if (i==2)
+            // {
+            //     outputTensor.blobName = "161_convolutional_reshape_2";
+            // }
             outputTensor.gridSize = (m_InputH / 32) * pow(2, 2-i);
             outputTensor.grid_h = (m_InputH / 32) * pow(2, 2-i);
             outputTensor.grid_w = (m_InputW / 32) * pow(2, 2-i);
@@ -380,3 +873,24 @@
             m_OutputTensors.push_back(outputTensor);
         }
 }
+
+void Detecter::writePlanFileToDisk()
+{
+    std::cout << "Serializing the TensorRT Engine..." << std::endl;
+    assert(m_Engine && "Invalid TensorRT Engine");
+    m_ModelStream = m_Engine->serialize();
+    assert(m_ModelStream && "Unable to serialize engine");
+    assert(!m_EnginePath.empty() && "Enginepath is empty");
+
+    // write data to output file
+    std::stringstream gieModelStream;
+    gieModelStream.seekg(0, gieModelStream.beg);
+    gieModelStream.write(static_cast<const char*>(m_ModelStream->data()), m_ModelStream->size());
+    std::ofstream outFile;
+    outFile.open(m_EnginePath, std::ios::binary | std::ios::out);
+    outFile << gieModelStream.rdbuf();
+    outFile.close();
+
+    std::cout << "Serialized plan file cached at location : " << m_EnginePath << std::endl;
+}
+
diff --git a/src/detecter_tools/model.h b/src/detecter_tools/model.h
index eb3adff..e993f44 100644
--- a/src/detecter_tools/model.h
+++ b/src/detecter_tools/model.h
@@ -3,7 +3,7 @@
 
 #include "plugin_factory.h"
 #include "trt_utils.h"
-
+#include "calibrator.h"
 #include "NvInfer.h"
 #include "NvInferPlugin.h"
 #include "NvInferRuntimeCommon.h"
@@ -16,6 +16,7 @@
 #include "../utils/time_util.h"
 #include "../config.h"
 #include "opencv2/opencv.hpp"
+#include <numeric>
 
 struct NetworkInfo
 {
@@ -78,6 +79,7 @@
     const std::string m_DeviceType;
     const std::string m_InputBlobName;
     std::vector<TensorInfo> m_OutputTensors;
+    std::vector<std::map<std::string, std::string>> m_configBlocks;
     uint32_t m_InputH;
     uint32_t m_InputW;
     uint32_t m_InputC;
@@ -172,12 +174,21 @@
     void setOutput(int type);
 private:
     Logger m_Logger;
+    void createYOLOEngine(const nvinfer1::DataType dataType = nvinfer1::DataType::kFLOAT,
+                          Int8EntropyCalibrator* calibrator = nullptr);
+    void writePlanFileToDisk();
+    std::vector<std::map<std::string, std::string>> parseConfigFile(const std::string cfgFilePath);
+    void parseConfigBlocks();
     void allocateBuffers();
     bool verifyEngine();
     void destroyNetworkUtils(std::vector<nvinfer1::Weights>& trtWeights);
+protected:
+    const std::string m_NetworkType;
+    std::unique_ptr<YoloTinyMaxpoolPaddingFormula> m_TinyMaxpoolPaddingFormula;
 
 private:
     Timer _timer;
+    int _n_yolo_ind = 0;
 };
 
 #endif
diff --git a/src/detecter_tools/trt_utils.cpp b/src/detecter_tools/trt_utils.cpp
index 75d5d6a..ebf2864 100644
--- a/src/detecter_tools/trt_utils.cpp
+++ b/src/detecter_tools/trt_utils.cpp
@@ -21,34 +21,34 @@
                                    cv::Scalar(0.0, 0.0, 0.0),true);
 }
 
-// static void leftTrim(std::string& s)
-// {
-//     s.erase(s.begin(), find_if(s.begin(), s.end(), [](int ch) { return !isspace(ch); }));
-// }
+static void leftTrim(std::string& s)
+{
+    s.erase(s.begin(), find_if(s.begin(), s.end(), [](int ch) { return !isspace(ch); }));
+}
 
-// static void rightTrim(std::string& s)
-// {
-//     s.erase(find_if(s.rbegin(), s.rend(), [](int ch) { return !isspace(ch); }).base(), s.end());
-// }
+static void rightTrim(std::string& s)
+{
+    s.erase(find_if(s.rbegin(), s.rend(), [](int ch) { return !isspace(ch); }).base(), s.end());
+}
 
-// std::string trim(std::string s)
-// {
-//     leftTrim(s);
-//     rightTrim(s);
-//     return s;
-// }
+std::string trim(std::string s)
+{
+    leftTrim(s);
+    rightTrim(s);
+    return s;
+}
 
-// std::string triml(std::string s,const char* t)
-// {
-//     s.erase(0, s.find_first_not_of(t));
-//     return s;
-// }
+std::string triml(std::string s,const char* t)
+{
+    s.erase(0, s.find_first_not_of(t));
+    return s;
+}
 
-// std::string trimr(std::string s, const char* t)
-// {
-//     s.erase(s.find_last_not_of(t) + 1);
-//     return s;
-// }
+std::string trimr(std::string s, const char* t)
+{
+    s.erase(s.find_last_not_of(t) + 1);
+    return s;
+}
 
 float clamp(const float val, const float minVal, const float maxVal)
 {
@@ -115,6 +115,305 @@
 //               << " xmax:" << b.box.x2 << " ymax:" << b.box.y2 << std::endl;
 // }
 //
+
+std::vector<float> loadWeights(const std::string weightsFilePath, const std::string& networkType)
+{
+    assert(fileExists(weightsFilePath));
+    std::cout << "Loading pre-trained weights..." << std::endl;
+    std::ifstream file(weightsFilePath, std::ios_base::binary);
+    assert(file.good());
+    std::string line;
+    file.ignore(4);
+    char buf[2];
+    file.read(buf, 1);
+    if ((int)(unsigned char)buf[0] == 1)
+    {
+        file.ignore(11);
+    }
+    else if ((int)(unsigned char)buf[0] == 2)
+    {
+        file.ignore(15);
+    }
+    else
+    {
+        std::cout << "Invalid network type" << std::endl;
+        assert(0);
+    }
+
+    std::vector<float> weights;
+    char* floatWeight = new char[4];
+    while (!file.eof())
+    {
+        file.read(floatWeight, 4);
+        assert(file.gcount() == 4);
+        weights.push_back(*reinterpret_cast<float*>(floatWeight));
+        if (file.peek() == std::istream::traits_type::eof()) break;
+    }
+    std::cout << "Loading complete!" << std::endl;
+    delete[] floatWeight;
+
+   // std::cout << "Total Number of weights read : " << weights.size() << std::endl;
+    return weights;
+}
+
+std::string dimsToString(const nvinfer1::Dims d)
+{
+    std::stringstream s;
+    assert(d.nbDims >= 1);
+    for (int i = 0; i < d.nbDims - 1; ++i)
+    {
+        s << std::setw(4) << d.d[i] << " x";
+    }
+    s << std::setw(4) << d.d[d.nbDims - 1];
+
+    return s.str();
+}
+
+nvinfer1::ILayer* netAddMaxpool(int layerIdx, std::map<std::string, std::string>& block,
+                                nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network)
+{
+    assert(block.at("type") == "maxpool");
+    assert(block.find("size") != block.end());
+    assert(block.find("stride") != block.end());
+
+    int size = std::stoi(block.at("size"));
+    int stride = std::stoi(block.at("stride"));
+
+    nvinfer1::IPoolingLayer* pool
+        = network->addPoolingNd(*input, nvinfer1::PoolingType::kMAX, nvinfer1::DimsHW{size, size});
+    assert(pool);
+    std::string maxpoolLayerName = "maxpool_" + std::to_string(layerIdx);
+    int pad = (size - 1) / 2;
+    pool->setPaddingNd(nvinfer1::DimsHW{pad,pad});
+    pool->setStrideNd(nvinfer1::DimsHW{stride, stride});
+    pool->setName(maxpoolLayerName.c_str());
+
+    return pool;
+}
+
+nvinfer1::ILayer* netAddConvLinear(int layerIdx, std::map<std::string, std::string>& block,
+                                   std::vector<float>& weights,
+                                   std::vector<nvinfer1::Weights>& trtWeights, int& weightPtr,
+                                   int& inputChannels, nvinfer1::ITensor* input,
+                                   nvinfer1::INetworkDefinition* network)
+{
+    assert(block.at("type") == "convolutional");
+    assert(block.find("batch_normalize") == block.end());
+    assert(block.at("activation") == "linear");
+    assert(block.find("filters") != block.end());
+    assert(block.find("pad") != block.end());
+    assert(block.find("size") != block.end());
+    assert(block.find("stride") != block.end());
+
+    int filters = std::stoi(block.at("filters"));
+    int padding = std::stoi(block.at("pad"));
+    int kernelSize = std::stoi(block.at("size"));
+    int stride = std::stoi(block.at("stride"));
+    int pad;
+    if (padding)
+        pad = (kernelSize - 1) / 2;
+    else
+        pad = 0;
+    // load the convolution layer bias
+    nvinfer1::Weights convBias{nvinfer1::DataType::kFLOAT, nullptr, filters};
+    float* val = new float[filters];
+    for (int i = 0; i < filters; ++i)
+    {
+        val[i] = weights[weightPtr];
+        weightPtr++;
+    }
+    convBias.values = val;
+    trtWeights.push_back(convBias);
+    // load the convolutional layer weights
+    int size = filters * inputChannels * kernelSize * kernelSize;
+    nvinfer1::Weights convWt{nvinfer1::DataType::kFLOAT, nullptr, size};
+    val = new float[size];
+    for (int i = 0; i < size; ++i)
+    {
+        val[i] = weights[weightPtr];
+        weightPtr++;
+    }
+    convWt.values = val;
+    trtWeights.push_back(convWt);
+    nvinfer1::IConvolutionLayer* conv = network->addConvolution(
+        *input, filters, nvinfer1::DimsHW{kernelSize, kernelSize}, convWt, convBias);
+    assert(conv != nullptr);
+    std::string convLayerName = "conv_" + std::to_string(layerIdx);
+    conv->setName(convLayerName.c_str());
+    conv->setStride(nvinfer1::DimsHW{stride, stride});
+    conv->setPadding(nvinfer1::DimsHW{pad, pad});
+
+    return conv;
+}
+
+nvinfer1::ILayer* net_conv_bn_mish(int layerIdx,
+    std::map<std::string, std::string>& block,
+    std::vector<float>& weights,
+    std::vector<nvinfer1::Weights>& trtWeights,
+    int& weightPtr,
+    int& inputChannels,
+    nvinfer1::ITensor* input,
+    nvinfer1::INetworkDefinition* network)
+{
+    assert(block.at("type") == "convolutional");
+    assert(block.find("batch_normalize") != block.end());
+    assert(block.at("batch_normalize") == "1");
+    assert(block.at("activation") == "mish");
+    assert(block.find("filters") != block.end());
+    assert(block.find("pad") != block.end());
+    assert(block.find("size") != block.end());
+    assert(block.find("stride") != block.end());
+
+    bool batchNormalize, bias;
+    if (block.find("batch_normalize") != block.end())
+    {
+        batchNormalize = (block.at("batch_normalize") == "1");
+        bias = false;
+    }
+    else
+    {
+        batchNormalize = false;
+        bias = true;
+    }
+    // all conv_bn_leaky layers assume bias is false
+    assert(batchNormalize == true && bias == false);
+
+    int filters = std::stoi(block.at("filters"));
+    int padding = std::stoi(block.at("pad"));
+    int kernelSize = std::stoi(block.at("size"));
+    int stride = std::stoi(block.at("stride"));
+    int pad;
+    if (padding)
+        pad = (kernelSize - 1) / 2;
+    else
+        pad = 0;
+    std::vector<float> bnBiases;
+    for (int i = 0; i < filters; ++i)
+    {
+        bnBiases.push_back(weights[weightPtr]);
+        weightPtr++;
+    }
+    // load BN weights
+    std::vector<float> bnWeights;
+    for (int i = 0; i < filters; ++i)
+    {
+        bnWeights.push_back(weights[weightPtr]);
+        weightPtr++;
+    }
+    // load BN running_mean
+    std::vector<float> bnRunningMean;
+    for (int i = 0; i < filters; ++i)
+    {
+        bnRunningMean.push_back(weights[weightPtr]);
+        weightPtr++;
+    }
+    // load BN running_var
+    std::vector<float> bnRunningVar;
+    for (int i = 0; i < filters; ++i)
+    {
+        // 1e-05 for numerical stability
+        bnRunningVar.push_back(sqrt(weights[weightPtr] + 1.0e-5f));
+        weightPtr++;
+    }
+    // load Conv layer weights (GKCRS)
+    int size = filters * inputChannels * kernelSize * kernelSize;
+    nvinfer1::Weights convWt{ nvinfer1::DataType::kFLOAT, nullptr, size };
+    float* val = new float[size];
+    for (int i = 0; i < size; ++i)
+    {
+        val[i] = weights[weightPtr];
+        weightPtr++;
+    }
+    convWt.values = val;
+    trtWeights.push_back(convWt);
+    nvinfer1::Weights convBias{ nvinfer1::DataType::kFLOAT, nullptr, 0 };
+    trtWeights.push_back(convBias);
+    nvinfer1::IConvolutionLayer* conv = network->addConvolution(
+        *input, filters, nvinfer1::DimsHW{ kernelSize, kernelSize }, convWt, convBias);
+    assert(conv != nullptr);
+    std::string convLayerName = "conv_" + std::to_string(layerIdx);
+    conv->setName(convLayerName.c_str());
+    conv->setStride(nvinfer1::DimsHW{ stride, stride });
+    conv->setPadding(nvinfer1::DimsHW{ pad, pad });
+
+    /***** BATCHNORM LAYER *****/
+    /***************************/
+    size = filters;
+    // create the weights
+    nvinfer1::Weights shift{ nvinfer1::DataType::kFLOAT, nullptr, size };
+    nvinfer1::Weights scale{ nvinfer1::DataType::kFLOAT, nullptr, size };
+    nvinfer1::Weights power{ nvinfer1::DataType::kFLOAT, nullptr, size };
+    float* shiftWt = new float[size];
+    for (int i = 0; i < size; ++i)
+    {
+        shiftWt[i]
+            = bnBiases.at(i) - ((bnRunningMean.at(i) * bnWeights.at(i)) / bnRunningVar.at(i));
+    }
+    shift.values = shiftWt;
+    float* scaleWt = new float[size];
+    for (int i = 0; i < size; ++i)
+    {
+        scaleWt[i] = bnWeights.at(i) / bnRunningVar[i];
+    }
+    scale.values = scaleWt;
+    float* powerWt = new float[size];
+    for (int i = 0; i < size; ++i)
+    {
+        powerWt[i] = 1.0;
+    }
+    power.values = powerWt;
+    trtWeights.push_back(shift);
+    trtWeights.push_back(scale);
+    trtWeights.push_back(power);
+    // Add the batch norm layers
+    nvinfer1::IScaleLayer* bn = network->addScale(
+        *conv->getOutput(0), nvinfer1::ScaleMode::kCHANNEL, shift, scale, power);
+    assert(bn != nullptr);
+    std::string bnLayerName = "batch_norm_" + std::to_string(layerIdx);
+    bn->setName(bnLayerName.c_str());
+    /***** ACTIVATION LAYER *****/
+    /****************************/
+    auto creator = getPluginRegistry()->getPluginCreator("Mish_TRT", "1");
+    const nvinfer1::PluginFieldCollection* pluginData = creator->getFieldNames();
+    nvinfer1::IPluginV2 *pluginObj = creator->createPlugin(("mish" + std::to_string(layerIdx)).c_str(), pluginData);
+    nvinfer1::ITensor* inputTensors[] = { bn->getOutput(0) };
+    auto mish = network->addPluginV2(&inputTensors[0], 1, *pluginObj);
+    return mish;
+}
+
+int getNumChannels(nvinfer1::ITensor* t)
+{
+    nvinfer1::Dims d = t->getDimensions();
+    assert(d.nbDims == 3);
+
+    return d.d[0];
+}
+
+std::vector<int> split_layer_index(const std::string &s_,const std::string &delimiter_)
+{
+    std::vector<int> index;
+    std::string s = s_;
+    size_t pos = 0;
+    std::string token;
+    while ((pos = s.find(delimiter_)) != std::string::npos)
+    {
+        token = s.substr(0, pos);
+        index.push_back(std::stoi(trim(token)));
+        s.erase(0, pos + delimiter_.length());
+    }
+    index.push_back(std::stoi(trim(s)));
+    return index;
+}
+
+void printLayerInfo(std::string layerIndex, std::string layerName, std::string layerInput,
+                    std::string layerOutput, std::string weightPtr)
+{
+    std::cout << std::setw(6) << std::left << layerIndex << std::setw(15) << std::left << layerName;
+    std::cout << std::setw(20) << std::left << layerInput << std::setw(20) << std::left
+              << layerOutput;
+    std::cout << std::setw(6) << std::left << weightPtr << std::endl;
+}
+
 uint64_t get3DTensorVolume(nvinfer1::Dims inputDims)
 {
     assert(inputDims.nbDims == 3);
@@ -216,3 +515,236 @@
     }
     return out;
 }
+
+nvinfer1::ILayer* netAddUpsample(int layerIdx, std::map<std::string, std::string>& block,
+                                 std::vector<float>& weights,
+                                 std::vector<nvinfer1::Weights>& trtWeights, int& inputChannels,
+                                 nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network)
+{
+    assert(block.at("type") == "upsample");
+    nvinfer1::Dims inpDims = input->getDimensions();
+    assert(inpDims.nbDims == 3);
+   // assert(inpDims.d[1] == inpDims.d[2]);
+    int n_scale = std::stoi(block.at("stride"));
+
+    int c1 = inpDims.d[0];
+    float *deval = new float[c1*n_scale*n_scale];
+    for (int i = 0; i < c1*n_scale*n_scale; i++)
+    {
+        deval[i] = 1.0;
+    }
+    nvinfer1::Weights wts{ DataType::kFLOAT, deval, c1*n_scale*n_scale };
+    nvinfer1::Weights bias{ DataType::kFLOAT, nullptr, 0 };
+    IDeconvolutionLayer* upsample = network->addDeconvolutionNd(*input, c1, DimsHW{ n_scale, n_scale }, wts, bias);
+    upsample->setStrideNd(DimsHW{ n_scale, n_scale });
+    upsample->setNbGroups(c1);
+    return upsample;
+
+    #if 0
+#endif
+}
+
+nvinfer1::ILayer * layer_split(const int n_layer_index_,
+    nvinfer1::ITensor *input_,
+    nvinfer1::INetworkDefinition* network)
+{
+    auto creator = getPluginRegistry()->getPluginCreator("CHUNK_TRT", "1.0");
+    const nvinfer1::PluginFieldCollection* pluginData = creator->getFieldNames();
+    nvinfer1::IPluginV2 *pluginObj = creator->createPlugin(("chunk" + std::to_string(n_layer_index_)).c_str(), pluginData);
+    auto chunk = network->addPluginV2(&input_, 1, *pluginObj);
+    return chunk;
+}
+
+nvinfer1::ILayer* netAddConvBNLeaky(int layerIdx,
+                                    std::map<std::string, std::string>& block,
+                                    std::vector<float>& weights,
+                                    std::vector<nvinfer1::Weights>& trtWeights,
+                                    int& weightPtr,
+                                    int& inputChannels,
+                                    nvinfer1::ITensor* input,
+                                    nvinfer1::INetworkDefinition* network)
+{
+    assert(block.at("type") == "convolutional");
+    assert(block.find("batch_normalize") != block.end());
+    assert(block.at("batch_normalize") == "1");
+    assert(block.at("activation") == "leaky");
+    assert(block.find("filters") != block.end());
+    assert(block.find("pad") != block.end());
+    assert(block.find("size") != block.end());
+    assert(block.find("stride") != block.end());
+
+    bool batchNormalize, bias;
+    if (block.find("batch_normalize") != block.end())
+    {
+        batchNormalize = (block.at("batch_normalize") == "1");
+        bias = false;
+    }
+    else
+    {
+        batchNormalize = false;
+        bias = true;
+    }
+    // all conv_bn_leaky layers assume bias is false
+    assert(batchNormalize == true && bias == false);
+
+    int filters = std::stoi(block.at("filters"));
+    int padding = std::stoi(block.at("pad"));
+    int kernelSize = std::stoi(block.at("size"));
+    int stride = std::stoi(block.at("stride"));
+    int pad;
+    if (padding)
+        pad = (kernelSize - 1) / 2;
+    else
+        pad = 0;
+
+    /***** CONVOLUTION LAYER *****/
+    /*****************************/
+    // batch norm weights are before the conv layer
+    // load BN biases (bn_biases)
+    std::vector<float> bnBiases;
+    for (int i = 0; i < filters; ++i)
+    {
+        bnBiases.push_back(weights[weightPtr]);
+        weightPtr++;
+    }
+    // load BN weights
+    std::vector<float> bnWeights;
+    for (int i = 0; i < filters; ++i)
+    {
+        bnWeights.push_back(weights[weightPtr]);
+        weightPtr++;
+    }
+    // load BN running_mean
+    std::vector<float> bnRunningMean;
+    for (int i = 0; i < filters; ++i)
+    {
+        bnRunningMean.push_back(weights[weightPtr]);
+        weightPtr++;
+    }
+    // load BN running_var
+    std::vector<float> bnRunningVar;
+    for (int i = 0; i < filters; ++i)
+    {
+        // 1e-05 for numerical stability
+        bnRunningVar.push_back(sqrt(weights[weightPtr] + 1.0e-5f));
+        weightPtr++;
+    }
+    // load Conv layer weights (GKCRS)
+    int size = filters * inputChannels * kernelSize * kernelSize;
+    nvinfer1::Weights convWt{nvinfer1::DataType::kFLOAT, nullptr, size};
+    float* val = new float[size];
+    for (int i = 0; i < size; ++i)
+    {
+        val[i] = weights[weightPtr];
+        weightPtr++;
+    }
+    convWt.values = val;
+    trtWeights.push_back(convWt);
+    nvinfer1::Weights convBias{nvinfer1::DataType::kFLOAT, nullptr, 0};
+    trtWeights.push_back(convBias);
+    nvinfer1::IConvolutionLayer* conv = network->addConvolution(
+        *input,
+        filters,
+        nvinfer1::DimsHW{kernelSize, kernelSize},
+        convWt,
+        convBias);
+    assert(conv != nullptr);
+    std::string convLayerName = "conv_" + std::to_string(layerIdx);
+    conv->setName(convLayerName.c_str());
+    conv->setStride(nvinfer1::DimsHW{stride, stride});
+    conv->setPadding(nvinfer1::DimsHW{pad, pad});
+
+    /***** BATCHNORM LAYER *****/
+    /***************************/
+    size = filters;
+    // create the weights
+    nvinfer1::Weights shift{nvinfer1::DataType::kFLOAT, nullptr, size};
+    nvinfer1::Weights scale{nvinfer1::DataType::kFLOAT, nullptr, size};
+    nvinfer1::Weights power{nvinfer1::DataType::kFLOAT, nullptr, size};
+    float* shiftWt = new float[size];
+    for (int i = 0; i < size; ++i)
+    {
+        shiftWt[i]
+            = bnBiases.at(i) - ((bnRunningMean.at(i) * bnWeights.at(i)) / bnRunningVar.at(i));
+    }
+    shift.values = shiftWt;
+    float* scaleWt = new float[size];
+    for (int i = 0; i < size; ++i)
+    {
+        scaleWt[i] = bnWeights.at(i) / bnRunningVar[i];
+    }
+    scale.values = scaleWt;
+    float* powerWt = new float[size];
+    for (int i = 0; i < size; ++i)
+    {
+        powerWt[i] = 1.0;
+    }
+    power.values = powerWt;
+    trtWeights.push_back(shift);
+    trtWeights.push_back(scale);
+    trtWeights.push_back(power);
+    // Add the batch norm layers
+    nvinfer1::IScaleLayer* bn = network->addScale(
+        *conv->getOutput(0), nvinfer1::ScaleMode::kCHANNEL, shift, scale, power);
+    assert(bn != nullptr);
+    std::string bnLayerName = "batch_norm_" + std::to_string(layerIdx);
+    bn->setName(bnLayerName.c_str());
+    /***** ACTIVATION LAYER *****/
+    /****************************/
+    auto leaky = network->addActivation(*bn->getOutput(0),nvinfer1::ActivationType::kLEAKY_RELU);
+    leaky->setAlpha(0.1f);
+    /*nvinfer1::IPlugin* leakyRELU = nvinfer1::plugin::createPReLUPlugin(0.1);
+    assert(leakyRELU != nullptr);
+    nvinfer1::ITensor* bnOutput = bn->getOutput(0);
+    nvinfer1::IPluginLayer* leaky = network->addPlugin(&bnOutput, 1, *leakyRELU);*/
+    assert(leaky != nullptr);
+    std::string leakyLayerName = "leaky_" + std::to_string(layerIdx);
+    leaky->setName(leakyLayerName.c_str());
+
+    return leaky;
+}
+
+
+std::vector<std::string> loadListFromTextFile(const std::string filename)
+{
+    assert(fileExists(filename));
+    std::vector<std::string> list;
+
+    std::ifstream f(filename);
+    if (!f)
+    {
+        std::cout << "failed to open " << filename;
+        assert(0);
+    }
+
+    std::string line;
+    while (std::getline(f, line))
+    {
+        if (line.empty())
+            continue;
+
+        else
+            list.push_back(trim(line));
+    }
+
+    return list;
+}
+std::vector<std::string> loadImageList(const std::string filename, const std::string prefix)
+{
+    std::vector<std::string> fileList = loadListFromTextFile(filename);
+    for (auto& file : fileList)
+    {
+        if (fileExists(file, false))
+            continue;
+        else
+        {
+            std::string prefixed = prefix + file;
+            if (fileExists(prefixed, false))
+                file = prefixed;
+            else
+                std::cerr << "WARNING: couldn't find: " << prefixed
+                          << " while loading: " << filename << std::endl;
+        }
+    }
+    return fileList;
+}
diff --git a/src/detecter_tools/trt_utils.h b/src/detecter_tools/trt_utils.h
index 189a60b..a166a6f 100644
--- a/src/detecter_tools/trt_utils.h
+++ b/src/detecter_tools/trt_utils.h
@@ -67,6 +67,34 @@
         }
     }
 };
+nvinfer1::ILayer* netAddMaxpool(int layerIdx, std::map<std::string, std::string>& block,
+                                nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network);
+nvinfer1::ILayer* netAddConvLinear(int layerIdx, std::map<std::string, std::string>& block,
+                                   std::vector<float>& weights,
+                                   std::vector<nvinfer1::Weights>& trtWeights, int& weightPtr,
+                                   int& inputChannels, nvinfer1::ITensor* input,
+                                   nvinfer1::INetworkDefinition* network);
+
+nvinfer1::ILayer* net_conv_bn_mish(int layerIdx,
+    std::map<std::string, std::string>& block,
+    std::vector<float>& weights,
+    std::vector<nvinfer1::Weights>& trtWeights,
+    int& weightPtr,
+    int& inputChannels,
+    nvinfer1::ITensor* input,
+    nvinfer1::INetworkDefinition* network);
+
+nvinfer1::ILayer* netAddConvBNLeaky(int layerIdx, std::map<std::string, std::string>& block,
+                                    std::vector<float>& weights,
+                                    std::vector<nvinfer1::Weights>& trtWeights, int& weightPtr,
+                                    int& inputChannels, nvinfer1::ITensor* input,
+                                    nvinfer1::INetworkDefinition* network);
+std::string dimsToString(const nvinfer1::Dims d);
+std::vector<float> loadWeights(const std::string weightsFilePath, const std::string& networkType);
+int getNumChannels(nvinfer1::ITensor* t);
+std::string trim(std::string s);
+std::string triml(std::string s, const char* t);
+std::string trimr(std::string s, const char* t);
 float clamp(const float val, const float minVal, const float maxVal);
 // Common helper functions
 cv::Mat blobFromDsImages(const std::vector<DsImage>& inputImages, const int& inputH,
@@ -77,9 +105,53 @@
 nvinfer1::ICudaEngine* loadTRTEngine(const std::string planFilePath, PluginFactory* pluginFactory,
                                      Logger& logger);
 uint64_t get3DTensorVolume(nvinfer1::Dims inputDims);
-
+std::vector<std::string> loadImageList(const std::string filename, const std::string prefix);
 std::vector<BBoxInfo> nmsAllClasses(const float nmsThresh, std::vector<BBoxInfo>& binfo,
                                     const uint32_t numClasses, const std::string &model_type);
-
+void printLayerInfo(std::string layerIndex, std::string layerName, std::string layerInput,
+                    std::string layerOutput, std::string weightPtr);
 std::vector<BBoxInfo> nonMaximumSuppression(const float nmsThresh, std::vector<BBoxInfo> binfo);
+std::vector<int> split_layer_index(const std::string &s_,const std::string &delimiter_);
+nvinfer1::ILayer * layer_split(const int n_layer_index_,
+    nvinfer1::ITensor *input_,
+    nvinfer1::INetworkDefinition* network);
+nvinfer1::ILayer* netAddUpsample(int layerIdx, std::map<std::string, std::string>& block,
+                                 std::vector<float>& weights,
+                                 std::vector<nvinfer1::Weights>& trtWeights, int& inputChannels,
+                                 nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network);
+std::vector<std::string> loadListFromTextFile(const std::string filename);
+class YoloTinyMaxpoolPaddingFormula : public nvinfer1::IOutputDimensionsFormula
+{
+private:
+    std::set<std::string> m_SamePaddingLayers;
+
+    nvinfer1::DimsHW compute(nvinfer1::DimsHW inputDims, nvinfer1::DimsHW kernelSize,
+                             nvinfer1::DimsHW stride, nvinfer1::DimsHW padding,
+                             nvinfer1::DimsHW dilation, const char* layerName) const override
+    {
+     //   assert(inputDims.d[0] == inputDims.d[1]);
+        assert(kernelSize.d[0] == kernelSize.d[1]);
+        assert(stride.d[0] == stride.d[1]);
+        assert(padding.d[0] == padding.d[1]);
+
+        int output_h, output_w;
+        // Only layer maxpool_12 makes use of same padding
+        if (m_SamePaddingLayers.find(layerName) != m_SamePaddingLayers.end())
+        {
+            output_h = (inputDims.d[0] + 2 * padding.d[0]) / stride.d[0];
+            output_w = (inputDims.d[1] + 2 * padding.d[1]) / stride.d[1];
+        }
+        // Valid Padding
+        else
+        {
+            output_h = (inputDims.d[0] - kernelSize.d[0]) / stride.d[0] + 1;
+            output_w = (inputDims.d[1] - kernelSize.d[1]) / stride.d[1] + 1;
+        }
+        return nvinfer1::DimsHW{output_h, output_w};
+    }
+
+public:
+    void addSamePaddingLayer(std::string input) { m_SamePaddingLayers.insert(input); }
+};
+
 #endif
diff --git a/src/h_interface.cpp b/src/h_interface.cpp
index 98e2595..3b3a751 100644
--- a/src/h_interface.cpp
+++ b/src/h_interface.cpp
@@ -8,6 +8,8 @@
 using namespace std;
 
 string m_staticStruct::model_path = "path";
+string m_staticStruct::model_cfg = "0"; // 鍒濆鍖栫粨鏋勪綋闈欐�佸彉閲忓��
+string m_staticStruct::model_wts = "0"; // 鍒濆鍖栫粨鏋勪綋闈欐�佸彉閲忓��
 int m_staticStruct::type = 1;
 bool m_staticStruct::isTrack= true;
 int m_staticStruct::max_cam_num = 0;
@@ -115,6 +117,8 @@
     if(reader.parse(in,root))
     {
         std::string model_path = root["param"]["model_path"].asString();
+        std::string model_cfg= root["param"]["model_cfg"].asString();
+        std::string model_wts = root["param"]["model_wts"].asString();
         int type = root["param"]["type"].asInt();
         bool isTrack = root["param"]["isTrack"].asBool();
         int max_cam_num = root["param"]["max_cam_num"].asInt();
@@ -122,6 +126,8 @@
         int mv_velocity = root["param"]["mv_velocity"].asFloat();
         int fall_rate = root["param"]["fall_rate"].asFloat();
         m_staticStruct::model_path  = model_path;
+        m_staticStruct::model_cfg  = model_cfg;
+        m_staticStruct::model_wts  = model_wts;
         m_staticStruct::type  = type;
         m_staticStruct::isTrack = isTrack;
         m_staticStruct::max_cam_num  = max_cam_num;

--
Gitblit v1.8.0