From 9b1532d86c2cf48a63017f3460897d8d14b98b60 Mon Sep 17 00:00:00 2001
From: Scheaven <xuepengqiang>
Date: 星期三, 08 九月 2021 17:45:25 +0800
Subject: [PATCH] coding
---
config.json | 2
src/config.h | 2
src/detecter_tools/trt_utils.h | 76 +++
src/detecter_tools/calibrator.h | 62 ++
src/detecter_tools/detector.cpp | 13
src/detecter_tools/trt_utils.cpp | 580 +++++++++++++++++++++++-
src/h_interface.cpp | 6
src/detecter_tools/calibrator.cpp | 114 ++++
src/detecter_tools/model.cpp | 524 +++++++++++++++++++++
src/detecter_tools/model.h | 13
CMakeLists.txt | 4
11 files changed, 1,355 insertions(+), 41 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 362ad2c..72caba6 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -3,8 +3,8 @@
enable_language(CUDA)
set(CMAKE_CXX_COMPILIER "/usr/bin/g++")
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14 -O3 -DS_DEBUG -Wno-write-strings")
-set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -DS_DEBUG -Wl,-rpath -Wl,$ORIGIN")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14 -O3 -Wno-write-strings")
+set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-rpath -Wl,$ORIGIN")
set(CUDA_Path /usr/local/cuda)
find_package(OpenCV REQUIRED PATHS "/data/disk2/opt/01_opencv/opencv4.5.2")
diff --git a/config.json b/config.json
index 13fc379..28410e1 100644
--- a/config.json
+++ b/config.json
@@ -3,6 +3,8 @@
"runtime": "/opt/vasystem/libs/Detect:/usr/local/cuda-11.1/lib64:", // 椤圭洰鎵�鐢ㄥ埌鐨勭幆澧�
"param": {
"model_path": "/data/disk1/project/model_dump/02_yolo/baseDetect-kFLOAT-batch1.engine", // para閲岃竟鑷繁绠楁硶鍙兘鐢ㄥ埌鐨勫弬鏁�
+ "model_cfg": "/data/disk1/project/model_dump/02_yolo/cfg/baseDetector.cfg", // para閲岃竟鑷繁绠楁硶鍙兘鐢ㄥ埌鐨勫弬鏁�
+ "model_wts": "/data/disk1/project/model_dump/02_yolo/baseDetector.weights", // para閲岃竟鑷繁绠楁硶鍙兘鐢ㄥ埌鐨勫弬鏁�
//"model_path": "/data/disk1/project/model_dump/02_yolo/baseDetetor_small.bin", // para閲岃竟鑷繁绠楁硶鍙兘鐢ㄥ埌鐨勫弬鏁�
"type":1,
"max_cam_num": 8,
diff --git a/src/config.h b/src/config.h
index e7f6f80..9673e66 100644
--- a/src/config.h
+++ b/src/config.h
@@ -51,6 +51,8 @@
typedef struct m_staticStruct
{
static std::string model_path;
+ static std::string model_cfg;
+ static std::string model_wts;
static int type;
static bool isTrack;
static int max_cam_num;
diff --git a/src/detecter_tools/calibrator.cpp b/src/detecter_tools/calibrator.cpp
new file mode 100644
index 0000000..0a9bf2f
--- /dev/null
+++ b/src/detecter_tools/calibrator.cpp
@@ -0,0 +1,114 @@
+/**
+MIT License
+
+Copyright (c) 2018 NVIDIA CORPORATION. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+*
+*/
+
+#include "calibrator.h"
+#include <fstream>
+#include <iostream>
+#include <iterator>
+
+Int8EntropyCalibrator::Int8EntropyCalibrator(const uint32_t& batchSize, const std::string& calibImages,
+ const std::string& calibImagesPath,
+ const std::string& calibTableFilePath,
+ const uint64_t& inputSize, const uint32_t& inputH,
+ const uint32_t& inputW, const std::string& inputBlobName,
+ const std::string &s_net_type_) :
+ m_BatchSize(batchSize),
+ m_InputH(inputH),
+ m_InputW(inputW),
+ m_InputSize(inputSize),
+ m_InputCount(batchSize * inputSize),
+ m_InputBlobName(inputBlobName),
+ m_CalibTableFilePath(calibTableFilePath),
+ m_ImageIndex(0),
+ _s_net_type(s_net_type_)
+{
+ if (!fileExists(m_CalibTableFilePath, false))
+ {
+ m_ImageList = loadImageList(calibImages, calibImagesPath);
+ m_ImageList.resize(static_cast<int>(m_ImageList.size() / m_BatchSize) * m_BatchSize);
+ std::random_shuffle(m_ImageList.begin(), m_ImageList.end(),
+ [](int i) { return rand() % i; });
+ }
+
+ NV_CUDA_CHECK(cudaMalloc(&m_DeviceInput, m_InputCount * sizeof(float)));
+}
+
+Int8EntropyCalibrator::~Int8EntropyCalibrator() { NV_CUDA_CHECK(cudaFree(m_DeviceInput)); }
+
+bool Int8EntropyCalibrator::getBatch(void* bindings[], const char* names[], int nbBindings)
+{
+ if (m_ImageIndex + m_BatchSize >= m_ImageList.size()) return false;
+
+ // Load next batch
+ std::vector<DsImage> dsImages(m_BatchSize);
+ for (uint32_t j = m_ImageIndex; j < m_ImageIndex + m_BatchSize; ++j)
+ {
+ dsImages.at(j - m_ImageIndex) = DsImage(m_ImageList.at(j), _s_net_type, m_InputH, m_InputW);
+ }
+ m_ImageIndex += m_BatchSize;
+
+ cv::Mat trtInput = blobFromDsImages(dsImages, m_InputH, m_InputW);
+
+ NV_CUDA_CHECK(cudaMemcpy(m_DeviceInput, trtInput.ptr<float>(0), m_InputCount * sizeof(float),
+ cudaMemcpyHostToDevice));
+ assert(!strcmp(names[0], m_InputBlobName.c_str()));
+ bindings[0] = m_DeviceInput;
+ return true;
+}
+
+const void* Int8EntropyCalibrator::readCalibrationCache(size_t& length)
+{
+ void* output;
+ m_CalibrationCache.clear();
+ assert(!m_CalibTableFilePath.empty());
+ std::ifstream input(m_CalibTableFilePath, std::ios::binary | std::ios::in);
+ input >> std::noskipws;
+ if (m_ReadCache && input.good())
+ std::copy(std::istream_iterator<char>(input), std::istream_iterator<char>(),
+ std::back_inserter(m_CalibrationCache));
+
+ length = m_CalibrationCache.size();
+ if (length)
+ {
+ std::cout << "Using cached calibration table to build the engine" << std::endl;
+ output = &m_CalibrationCache[0];
+ }
+
+ else
+ {
+ std::cout << "New calibration table will be created to build the engine" << std::endl;
+ output = nullptr;
+ }
+
+ return output;
+}
+
+void Int8EntropyCalibrator::writeCalibrationCache(const void* cache, size_t length)
+{
+ assert(!m_CalibTableFilePath.empty());
+ std::ofstream output(m_CalibTableFilePath, std::ios::binary);
+ output.write(reinterpret_cast<const char*>(cache), length);
+ output.close();
+}
diff --git a/src/detecter_tools/calibrator.h b/src/detecter_tools/calibrator.h
new file mode 100644
index 0000000..4eb44a7
--- /dev/null
+++ b/src/detecter_tools/calibrator.h
@@ -0,0 +1,62 @@
+/**
+MIT License
+
+Copyright (c) 2018 NVIDIA CORPORATION. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+*
+*/
+#ifndef _CALIBRATOR_H_
+#define _CALIBRATOR_H_
+
+#include "NvInfer.h"
+#include "ds_image.h"
+#include "trt_utils.h"
+
+class Int8EntropyCalibrator : public nvinfer1::IInt8EntropyCalibrator2
+{
+public:
+ Int8EntropyCalibrator(const uint32_t& batchSize, const std::string& calibImages,
+ const std::string& calibImagesPath, const std::string& calibTableFilePath,
+ const uint64_t& inputSize, const uint32_t& inputH, const uint32_t& inputW,
+ const std::string& inputBlobName,const std::string &s_net_type_);
+ virtual ~Int8EntropyCalibrator();
+
+ int getBatchSize() const override { return m_BatchSize; }
+ bool getBatch(void* bindings[], const char* names[], int nbBindings) override;
+ const void* readCalibrationCache(size_t& length) override;
+ void writeCalibrationCache(const void* cache, size_t length) override;
+
+private:
+ const uint32_t m_BatchSize;
+ const uint32_t m_InputH;
+ const uint32_t m_InputW;
+ const uint64_t m_InputSize;
+ const uint64_t m_InputCount;
+ const std::string m_InputBlobName;
+ const std::string _s_net_type;
+ const std::string m_CalibTableFilePath{nullptr};
+ uint32_t m_ImageIndex;
+ bool m_ReadCache{true};
+ void* m_DeviceInput{nullptr};
+ std::vector<std::string> m_ImageList;
+ std::vector<char> m_CalibrationCache;
+};
+
+#endif
diff --git a/src/detecter_tools/detector.cpp b/src/detecter_tools/detector.cpp
index 78e136f..be5f86a 100644
--- a/src/detecter_tools/detector.cpp
+++ b/src/detecter_tools/detector.cpp
@@ -31,10 +31,10 @@
vec_ds_images.emplace_back(img, _vec_net_type[_config.net_type], _p_net->getInputH(), _p_net->getInputW());
}
cv::Mat trtInput = blobFromDsImages(vec_ds_images, _p_net->getInputH(),_p_net->getInputW());
- timer.out("pre");
+ timer.out("eve pre detect ");
- _p_net->doInference(trtInput.data, vec_ds_images.size());
timer.reset();
+ _p_net->doInference(trtInput.data, vec_ds_images.size());
for (uint32_t i = 0; i < vec_ds_images.size(); ++i)
{
auto curImage = vec_ds_images.at(i);
@@ -65,9 +65,8 @@
}
vec_batch_result[i] = vec_result;
}
- timer.out("post");
+ timer.out("eve pre detect post");
DEBUG("--detect over--" );
-
}
void Detector::set_gpu_id(const int id)
@@ -95,9 +94,9 @@
void Detector::build_net()
{
- if(_config.net_type == SMALL)
- _p_net = std::unique_ptr<Detecter>{new Detecter(_info,_infer_param,2)};
- else{
+ if(_config.net_type == COMMON)
_p_net = std::unique_ptr<Detecter>{new Detecter(_info,_infer_param,1)};
+ else{
+ _p_net = std::unique_ptr<Detecter>{new Detecter(_info,_infer_param,2)};
}
}
diff --git a/src/detecter_tools/model.cpp b/src/detecter_tools/model.cpp
index c548561..0bccfb3 100644
--- a/src/detecter_tools/model.cpp
+++ b/src/detecter_tools/model.cpp
@@ -11,6 +11,7 @@
Detecter::Detecter( const NetworkInfo& networkInfo, const InferParams& inferParams, int type) :
+ m_NetworkType(networkInfo.networkType),
m_InputBlobName(networkInfo.inputBlobName),
m_InputH(416),
m_InputW(416),
@@ -26,10 +27,17 @@
m_Context(nullptr),
m_InputBindingIndex(-1),
m_CudaStream(nullptr),
- m_PluginFactory(new PluginFactory)
+ m_PluginFactory(new PluginFactory),
+ m_TinyMaxpoolPaddingFormula(new YoloTinyMaxpoolPaddingFormula)
{
- setOutput(type);
m_EnginePath = m_staticStruct::model_path;
+ if(!fileExists(m_EnginePath))
+ {
+ m_configBlocks = parseConfigFile(m_staticStruct::model_cfg);
+ parseConfigBlocks();
+ createYOLOEngine();
+ }
+ setOutput(type);
DEBUG((boost::format("m_EnginePath:%s")%m_EnginePath).str());
assert(m_PluginFactory != nullptr);
m_Engine = loadTRTEngine(m_EnginePath, m_PluginFactory, m_Logger);
@@ -67,7 +75,482 @@
m_PluginFactory = nullptr;
}
- // m_TinyMaxpoolPaddingFormula.reset();
+ m_TinyMaxpoolPaddingFormula.reset();
+}
+
+std::vector<std::map<std::string, std::string>> Detecter::parseConfigFile(const std::string cfgFilePath)
+{
+ std::cout << "::::::::::" << cfgFilePath <<std::endl;
+ assert(fileExists(cfgFilePath));
+ std::ifstream file(cfgFilePath);
+ assert(file.good());
+ std::string line;
+ std::vector<std::map<std::string, std::string>> blocks;
+ std::map<std::string, std::string> block;
+
+ while (getline(file, line))
+ {
+ if (line.empty()) continue;
+ if (line.front() == '#') continue;
+ line = trim(line);
+ if (line.front() == '[')
+ {
+ if (!block.empty())
+ {
+ blocks.push_back(block);
+ block.clear();
+ }
+ std::string key = "type";
+ std::string value = trim(line.substr(1, line.size() - 2));
+ block.insert(std::pair<std::string, std::string>(key, value));
+ }
+ else
+ {
+ size_t cpos = line.find('=');
+ std::string key = trim(line.substr(0, cpos));
+ std::string value = trim(line.substr(cpos + 1));
+ block.insert(std::pair<std::string, std::string>(key, value));
+ }
+ }
+ blocks.push_back(block);
+ return blocks;
+}
+
+void Detecter::parseConfigBlocks()
+{
+ for (auto block : m_configBlocks)
+ {
+ if (block.at("type") == "net")
+ {
+ assert((block.find("height") != block.end())
+ && "Missing 'height' param in network cfg");
+ assert((block.find("width") != block.end()) && "Missing 'width' param in network cfg");
+ assert((block.find("channels") != block.end())
+ && "Missing 'channels' param in network cfg");
+ assert((block.find("batch") != block.end())
+ && "Missing 'batch' param in network cfg");
+
+ m_InputH = std::stoul(trim(block.at("height")));
+ m_InputW = std::stoul(trim(block.at("width")));
+ m_InputC = std::stoul(trim(block.at("channels")));
+ m_BatchSize = std::stoi(trim(block.at("batch")));
+ // assert(m_InputW == m_InputH);
+ m_InputSize = m_InputC * m_InputH * m_InputW;
+ }
+ else if ((block.at("type") == "region") || (block.at("type") == "yolo"))
+ {
+ assert((block.find("num") != block.end())
+ && std::string("Missing 'num' param in " + block.at("type") + " layer").c_str());
+ assert((block.find("classes") != block.end())
+ && std::string("Missing 'classes' param in " + block.at("type") + " layer")
+ .c_str());
+ assert((block.find("anchors") != block.end())
+ && std::string("Missing 'anchors' param in " + block.at("type") + " layer")
+ .c_str());
+
+ TensorInfo outputTensor;
+ std::string anchorString = block.at("anchors");
+ while (!anchorString.empty())
+ {
+ size_t npos = anchorString.find_first_of(',');
+ if (npos != std::string::npos)
+ {
+ float anchor = std::stof(trim(anchorString.substr(0, npos)));
+ outputTensor.anchors.push_back(anchor);
+ anchorString.erase(0, npos + 1);
+ }
+ else
+ {
+ float anchor = std::stof(trim(anchorString));
+ outputTensor.anchors.push_back(anchor);
+ break;
+ }
+ }
+
+ assert((block.find("mask") != block.end())
+ && std::string("Missing 'mask' param in " + block.at("type") + " layer")
+ .c_str());
+
+ std::string maskString = block.at("mask");
+ while (!maskString.empty())
+ {
+ size_t npos = maskString.find_first_of(',');
+ if (npos != std::string::npos)
+ {
+ uint32_t mask = std::stoul(trim(maskString.substr(0, npos)));
+ outputTensor.masks.push_back(mask);
+ maskString.erase(0, npos + 1);
+ }
+ else
+ {
+ uint32_t mask = std::stoul(trim(maskString));
+ outputTensor.masks.push_back(mask);
+ break;
+ }
+ }
+
+ outputTensor.numBBoxes = outputTensor.masks.size() > 0
+ ? outputTensor.masks.size()
+ : std::stoul(trim(block.at("num")));
+ outputTensor.numClasses = std::stoul(block.at("classes"));
+ if (m_ClassNames.empty())
+ {
+ for (uint32_t i=0;i< outputTensor.numClasses;++i)
+ {
+ m_ClassNames.push_back(std::to_string(i));
+ }
+ }
+ outputTensor.blobName = "yolo_" + std::to_string(_n_yolo_ind);
+ outputTensor.gridSize = (m_InputH / 32) * pow(2, _n_yolo_ind);
+ outputTensor.grid_h = (m_InputH / 32) * pow(2, _n_yolo_ind);
+ outputTensor.grid_w = (m_InputW / 32) * pow(2, _n_yolo_ind);
+ outputTensor.gridSize = (m_InputH / 32) * pow(2, 2-_n_yolo_ind);
+ outputTensor.grid_h = (m_InputH / 32) * pow(2, 2-_n_yolo_ind);
+ outputTensor.grid_w = (m_InputW / 32) * pow(2, 2-_n_yolo_ind);
+ outputTensor.stride = m_InputH / outputTensor.gridSize;
+ outputTensor.stride_h = m_InputH / outputTensor.grid_h;
+ outputTensor.stride_w = m_InputW / outputTensor.grid_w;
+ outputTensor.volume = outputTensor.grid_h* outputTensor.grid_w
+ *(outputTensor.numBBoxes*(5 + outputTensor.numClasses));
+ m_OutputTensors.push_back(outputTensor);
+ _n_yolo_ind++;
+ }
+ }
+}
+
+void Detecter::createYOLOEngine(const nvinfer1::DataType dataType, Int8EntropyCalibrator* calibrator)
+{
+ if (fileExists(m_EnginePath))return;
+ std::vector<float> weights = loadWeights(m_staticStruct::model_wts, m_NetworkType);
+ std::vector<nvinfer1::Weights> trtWeights;
+ int weightPtr = 0;
+ int channels = m_InputC;
+ m_Builder = nvinfer1::createInferBuilder(m_Logger);
+ nvinfer1::IBuilderConfig* config = m_Builder->createBuilderConfig();
+ m_Network = m_Builder->createNetworkV2(0U);
+ if ((dataType == nvinfer1::DataType::kINT8 && !m_Builder->platformHasFastInt8())
+ || (dataType == nvinfer1::DataType::kHALF && !m_Builder->platformHasFastFp16()))
+ {
+ std::cout << "Platform doesn't support this precision." << std::endl;
+ assert(0);
+ }
+
+ nvinfer1::ITensor* data = m_Network->addInput(
+ m_InputBlobName.c_str(), nvinfer1::DataType::kFLOAT,
+ nvinfer1::DimsCHW{static_cast<int>(m_InputC), static_cast<int>(m_InputH),
+ static_cast<int>(m_InputW)});
+ assert(data != nullptr);
+ // Add elementwise layer to normalize pixel values 0-1
+ nvinfer1::Dims divDims{
+ 3,
+ {static_cast<int>(m_InputC), static_cast<int>(m_InputH), static_cast<int>(m_InputW)},
+ {nvinfer1::DimensionType::kCHANNEL, nvinfer1::DimensionType::kSPATIAL,
+ nvinfer1::DimensionType::kSPATIAL}};
+ nvinfer1::Weights divWeights{nvinfer1::DataType::kFLOAT, nullptr,
+ static_cast<int64_t>(m_InputSize)};
+ float* divWt = new float[m_InputSize];
+ for (uint32_t w = 0; w < m_InputSize; ++w) divWt[w] = 255.0;
+ divWeights.values = divWt;
+ trtWeights.push_back(divWeights);
+ nvinfer1::IConstantLayer* constDivide = m_Network->addConstant(divDims, divWeights);
+ assert(constDivide != nullptr);
+ nvinfer1::IElementWiseLayer* elementDivide = m_Network->addElementWise(
+ *data, *constDivide->getOutput(0), nvinfer1::ElementWiseOperation::kDIV);
+ assert(elementDivide != nullptr);
+
+ nvinfer1::ITensor* previous = elementDivide->getOutput(0);
+ std::vector<nvinfer1::ITensor*> tensorOutputs;
+ uint32_t outputTensorCount = 0;
+
+ // build the network using the network API
+ for (uint32_t i = 0; i < m_configBlocks.size(); ++i)
+ {
+ // check if num. of channels is correct
+ assert(getNumChannels(previous) == channels);
+ std::string layerIndex = "(" + std::to_string(i) + ")";
+
+ if (m_configBlocks.at(i).at("type") == "net")
+ {
+ printLayerInfo("", "layer", " inp_size", " out_size", "weightPtr");
+ }
+ else if (m_configBlocks.at(i).at("type") == "convolutional")
+ {
+ std::string inputVol = dimsToString(previous->getDimensions());
+ nvinfer1::ILayer* out;
+ std::string layerType;
+ //check activation
+ std::string activation = "";
+ if (m_configBlocks.at(i).find("activation") != m_configBlocks.at(i).end())
+ {
+ activation = m_configBlocks[i]["activation"];
+ }
+ // check if batch_norm enabled
+ if ((m_configBlocks.at(i).find("batch_normalize") != m_configBlocks.at(i).end()) &&
+ ("leaky" == activation))
+ {
+ out = netAddConvBNLeaky(i, m_configBlocks.at(i), weights, trtWeights, weightPtr,
+ channels, previous, m_Network);
+ layerType = "conv-bn-leaky";
+ }
+ else if ((m_configBlocks.at(i).find("batch_normalize") != m_configBlocks.at(i).end()) &&
+ ("mish" == activation))
+ {
+ out = net_conv_bn_mish(i, m_configBlocks.at(i), weights, trtWeights, weightPtr,
+ channels, previous, m_Network);
+ layerType = "conv-bn-mish";
+ }
+ else// if("linear" == activation)
+ {
+ out = netAddConvLinear(i, m_configBlocks.at(i), weights, trtWeights, weightPtr,
+ channels, previous, m_Network);
+ layerType = "conv-linear";
+ }
+ previous = out->getOutput(0);
+ assert(previous != nullptr);
+ channels = getNumChannels(previous);
+ std::string outputVol = dimsToString(previous->getDimensions());
+ tensorOutputs.push_back(out->getOutput(0));
+ printLayerInfo(layerIndex, layerType, inputVol, outputVol, std::to_string(weightPtr));
+ }
+ else if (m_configBlocks.at(i).at("type") == "shortcut")
+ {
+ assert(m_configBlocks.at(i).at("activation") == "linear");
+ assert(m_configBlocks.at(i).find("from") != m_configBlocks.at(i).end());
+ int from = stoi(m_configBlocks.at(i).at("from"));
+
+ std::string inputVol = dimsToString(previous->getDimensions());
+ // check if indexes are correct
+ assert((i - 2 >= 0) && (i - 2 < tensorOutputs.size()));
+ assert((i + from - 1 >= 0) && (i + from - 1 < tensorOutputs.size()));
+ assert(i + from - 1 < i - 2);
+ nvinfer1::IElementWiseLayer* ew
+ = m_Network->addElementWise(*tensorOutputs[i - 2], *tensorOutputs[i + from - 1],
+ nvinfer1::ElementWiseOperation::kSUM);
+ assert(ew != nullptr);
+ std::string ewLayerName = "shortcut_" + std::to_string(i);
+ ew->setName(ewLayerName.c_str());
+ previous = ew->getOutput(0);
+ assert(previous != nullptr);
+ std::string outputVol = dimsToString(previous->getDimensions());
+ tensorOutputs.push_back(ew->getOutput(0));
+ printLayerInfo(layerIndex, "skip", inputVol, outputVol, " -");
+ }
+ else if (m_configBlocks.at(i).at("type") == "yolo")
+ {
+ nvinfer1::Dims prevTensorDims = previous->getDimensions();
+ // assert(prevTensorDims.d[1] == prevTensorDims.d[2]);
+ TensorInfo& curYoloTensor = m_OutputTensors.at(outputTensorCount);
+ curYoloTensor.gridSize = prevTensorDims.d[1];
+ curYoloTensor.grid_h = prevTensorDims.d[1];
+ curYoloTensor.grid_w = prevTensorDims.d[2];
+ curYoloTensor.stride = m_InputW / curYoloTensor.gridSize;
+ curYoloTensor.stride_h = m_InputH / curYoloTensor.grid_h;
+ curYoloTensor.stride_w = m_InputW / curYoloTensor.grid_w;
+ m_OutputTensors.at(outputTensorCount).volume = curYoloTensor.grid_h
+ * curYoloTensor.grid_w
+ * (curYoloTensor.numBBoxes * (5 + curYoloTensor.numClasses));
+ std::string layerName = "yolo_" + std::to_string(outputTensorCount);
+ curYoloTensor.blobName = layerName;
+ nvinfer1::IPlugin* yoloPlugin
+ = new SLayerV3(m_OutputTensors.at(outputTensorCount).numBBoxes,
+ m_OutputTensors.at(outputTensorCount).numClasses,
+ m_OutputTensors.at(outputTensorCount).grid_h,
+ m_OutputTensors.at(outputTensorCount).grid_w);
+ assert(yoloPlugin != nullptr);
+ nvinfer1::IPluginLayer* yolo = m_Network->addPlugin(&previous, 1, *yoloPlugin);
+ assert(yolo != nullptr);
+ yolo->setName(layerName.c_str());
+ std::string inputVol = dimsToString(previous->getDimensions());
+ previous = yolo->getOutput(0);
+ assert(previous != nullptr);
+ previous->setName(layerName.c_str());
+ std::string outputVol = dimsToString(previous->getDimensions());
+ m_Network->markOutput(*previous);
+ channels = getNumChannels(previous);
+ tensorOutputs.push_back(yolo->getOutput(0));
+ printLayerInfo(layerIndex, "yolo", inputVol, outputVol, std::to_string(weightPtr));
+ ++outputTensorCount;
+ }
+ else if (m_configBlocks.at(i).at("type") == "route")
+ {
+ size_t found = m_configBlocks.at(i).at("layers").find(",");
+ if (found != std::string::npos)//concate multi layers
+ {
+ std::vector<int> vec_index = split_layer_index(m_configBlocks.at(i).at("layers"), ",");
+ for (auto &ind_layer:vec_index)
+ {
+ if (ind_layer < 0)
+ {
+ ind_layer = static_cast<int>(tensorOutputs.size()) + ind_layer;
+ }
+ assert(ind_layer < static_cast<int>(tensorOutputs.size()) && ind_layer >= 0);
+ }
+ nvinfer1::ITensor** concatInputs
+ = reinterpret_cast<nvinfer1::ITensor**>(malloc(sizeof(nvinfer1::ITensor*) * vec_index.size()));
+ for (size_t ind = 0; ind < vec_index.size(); ++ind)
+ {
+ concatInputs[ind] = tensorOutputs[vec_index[ind]];
+ }
+ nvinfer1::IConcatenationLayer* concat
+ = m_Network->addConcatenation(concatInputs, static_cast<int>(vec_index.size()));
+ assert(concat != nullptr);
+ std::string concatLayerName = "route_" + std::to_string(i - 1);
+ concat->setName(concatLayerName.c_str());
+ // concatenate along the channel dimension
+ concat->setAxis(0);
+ previous = concat->getOutput(0);
+ assert(previous != nullptr);
+ nvinfer1::Dims debug = previous->getDimensions();
+ std::string outputVol = dimsToString(previous->getDimensions());
+ int nums = 0;
+ for (auto &indx:vec_index)
+ {
+ nums += getNumChannels(tensorOutputs[indx]);
+ }
+ channels = nums;
+ tensorOutputs.push_back(concat->getOutput(0));
+ printLayerInfo(layerIndex, "route", " -", outputVol,std::to_string(weightPtr));
+ }
+ else //single layer
+ {
+ int idx = std::stoi(trim(m_configBlocks.at(i).at("layers")));
+ if (idx < 0)
+ {
+ idx = static_cast<int>(tensorOutputs.size()) + idx;
+ }
+ assert(idx < static_cast<int>(tensorOutputs.size()) && idx >= 0);
+
+ //route
+ if (m_configBlocks.at(i).find("groups") == m_configBlocks.at(i).end())
+ {
+ previous = tensorOutputs[idx];
+ assert(previous != nullptr);
+ std::string outputVol = dimsToString(previous->getDimensions());
+ // set the output volume depth
+ channels = getNumChannels(tensorOutputs[idx]);
+ tensorOutputs.push_back(tensorOutputs[idx]);
+ printLayerInfo(layerIndex, "route", " -", outputVol, std::to_string(weightPtr));
+
+ }
+ //yolov4-tiny route split layer
+ else
+ {
+ if (m_configBlocks.at(i).find("group_id") == m_configBlocks.at(i).end())
+ {
+ assert(0);
+ }
+ int chunk_idx = std::stoi(trim(m_configBlocks.at(i).at("group_id")));
+ nvinfer1::ILayer* out = layer_split(i, tensorOutputs[idx], m_Network);
+ std::string inputVol = dimsToString(previous->getDimensions());
+ previous = out->getOutput(chunk_idx);
+ assert(previous != nullptr);
+ channels = getNumChannels(previous);
+ std::string outputVol = dimsToString(previous->getDimensions());
+ tensorOutputs.push_back(out->getOutput(chunk_idx));
+ printLayerInfo(layerIndex,"chunk", inputVol, outputVol, std::to_string(weightPtr));
+ }
+ }
+ }
+ else if (m_configBlocks.at(i).at("type") == "upsample")
+ {
+ std::string inputVol = dimsToString(previous->getDimensions());
+ nvinfer1::ILayer* out = netAddUpsample(i - 1, m_configBlocks[i], weights, trtWeights,
+ channels, previous, m_Network);
+ previous = out->getOutput(0);
+ std::string outputVol = dimsToString(previous->getDimensions());
+ tensorOutputs.push_back(out->getOutput(0));
+ printLayerInfo(layerIndex, "upsample", inputVol, outputVol, " -");
+ }
+ else if (m_configBlocks.at(i).at("type") == "maxpool")
+ {
+ // Add same padding layers
+ if (m_configBlocks.at(i).at("size") == "2" && m_configBlocks.at(i).at("stride") == "1")
+ {
+ m_TinyMaxpoolPaddingFormula->addSamePaddingLayer("maxpool_" + std::to_string(i));
+ }
+ std::string inputVol = dimsToString(previous->getDimensions());
+ nvinfer1::ILayer* out = netAddMaxpool(i, m_configBlocks.at(i), previous, m_Network);
+ previous = out->getOutput(0);
+ assert(previous != nullptr);
+ std::string outputVol = dimsToString(previous->getDimensions());
+ tensorOutputs.push_back(out->getOutput(0));
+ printLayerInfo(layerIndex, "maxpool", inputVol, outputVol, std::to_string(weightPtr));
+ }
+ else
+ {
+ std::cout << "Unsupported layer type --> \"" << m_configBlocks.at(i).at("type") << "\""
+ << std::endl;
+ assert(0);
+ }
+ }
+
+ if (static_cast<int>(weights.size()) != weightPtr)
+ {
+ std::cout << "Number of unused weights left : " << static_cast<int>(weights.size()) - weightPtr << std::endl;
+ assert(0);
+ }
+
+ // std::cout << "Output blob names :" << std::endl;
+ // for (auto& tensor : m_OutputTensors) std::cout << tensor.blobName << std::endl;
+
+ // Create and cache the engine if not already present
+ if (fileExists(m_EnginePath))
+ {
+ std::cout << "Using previously generated plan file located at " << m_EnginePath
+ << std::endl;
+ destroyNetworkUtils(trtWeights);
+ return;
+ }
+
+ /*std::cout << "Unable to find cached TensorRT engine for network : " << m_NetworkType
+ << " precision : " << m_Precision << " and batch size :" << m_BatchSize << std::endl;*/
+
+ m_Builder->setMaxBatchSize(m_BatchSize);
+ //m_Builder->setMaxWorkspaceSize(1 << 20);
+
+ config->setMaxWorkspaceSize(1 << 20);
+ if (dataType == nvinfer1::DataType::kINT8)
+ {
+ assert((calibrator != nullptr) && "Invalid calibrator for INT8 precision");
+ // m_Builder->setInt8Mode(true);
+ config->setFlag(nvinfer1::BuilderFlag::kINT8);
+ // m_Builder->setInt8Calibrator(calibrator);
+ config->setInt8Calibrator(calibrator);
+ // config->setTacticSources(1U << static_cast<uint32_t>(TacticSource::kCUBLAS) | 1U << static_cast<uint32_t>(TacticSource::kCUBLAS_LT));
+ }
+ else if (dataType == nvinfer1::DataType::kHALF)
+ {
+ config->setFlag(nvinfer1::BuilderFlag::kFP16);
+ // m_Builder->setHalf2Mode(true);
+ }
+
+ m_Builder->allowGPUFallback(true);
+ int nbLayers = m_Network->getNbLayers();
+ int layersOnDLA = 0;
+ // std::cout << "Total number of layers: " << nbLayers << std::endl;
+ for (int i = 0; i < nbLayers; i++)
+ {
+ nvinfer1::ILayer* curLayer = m_Network->getLayer(i);
+ if (m_DeviceType == "kDLA" && m_Builder->canRunOnDLA(curLayer))
+ {
+ m_Builder->setDeviceType(curLayer, nvinfer1::DeviceType::kDLA);
+ layersOnDLA++;
+ std::cout << "Set layer " << curLayer->getName() << " to run on DLA" << std::endl;
+ }
+ }
+ // std::cout << "Total number of layers on DLA: " << layersOnDLA << std::endl;
+
+ // Build the engine
+ std::cout << "Building the TensorRT Engine..." << std::endl;
+ m_Engine = m_Builder->buildEngineWithConfig(*m_Network,*config);
+ assert(m_Engine != nullptr);
+ std::cout << "Building complete!" << std::endl;
+
+ // Serialize the engine
+ writePlanFileToDisk();
+
+ // destroy
+ destroyNetworkUtils(trtWeights);
}
void Detecter::doInference(const unsigned char* input, const uint32_t batchSize)
@@ -268,10 +751,10 @@
void Detecter::setOutput(int type)
{
m_OutputTensors.clear();
+ printf("0-0-0-0-0-0------------------%d",type);
if(type==2)
for (int i = 0; i < 2; ++i)
{
-
TensorInfo outputTensor;
outputTensor.numClasses = CLASS_BUM;
outputTensor.blobName = "yolo_" + std::to_string(i);
@@ -323,7 +806,17 @@
{
TensorInfo outputTensor;
outputTensor.numClasses = CLASS_BUM;
- outputTensor.blobName = "yolo_" + std::to_string(i);
+ outputTensor.blobName = "yolo_" + to_string(i);
+ // if (i==0)
+ // {
+ // outputTensor.blobName = "139_convolutional_reshape_2";
+ // }else if (i==1)
+ // {
+ // outputTensor.blobName = "150_convolutional_reshape_2";
+ // }else if (i==2)
+ // {
+ // outputTensor.blobName = "161_convolutional_reshape_2";
+ // }
outputTensor.gridSize = (m_InputH / 32) * pow(2, 2-i);
outputTensor.grid_h = (m_InputH / 32) * pow(2, 2-i);
outputTensor.grid_w = (m_InputW / 32) * pow(2, 2-i);
@@ -380,3 +873,24 @@
m_OutputTensors.push_back(outputTensor);
}
}
+
+void Detecter::writePlanFileToDisk()
+{
+ std::cout << "Serializing the TensorRT Engine..." << std::endl;
+ assert(m_Engine && "Invalid TensorRT Engine");
+ m_ModelStream = m_Engine->serialize();
+ assert(m_ModelStream && "Unable to serialize engine");
+ assert(!m_EnginePath.empty() && "Enginepath is empty");
+
+ // write data to output file
+ std::stringstream gieModelStream;
+ gieModelStream.seekg(0, gieModelStream.beg);
+ gieModelStream.write(static_cast<const char*>(m_ModelStream->data()), m_ModelStream->size());
+ std::ofstream outFile;
+ outFile.open(m_EnginePath, std::ios::binary | std::ios::out);
+ outFile << gieModelStream.rdbuf();
+ outFile.close();
+
+ std::cout << "Serialized plan file cached at location : " << m_EnginePath << std::endl;
+}
+
diff --git a/src/detecter_tools/model.h b/src/detecter_tools/model.h
index eb3adff..e993f44 100644
--- a/src/detecter_tools/model.h
+++ b/src/detecter_tools/model.h
@@ -3,7 +3,7 @@
#include "plugin_factory.h"
#include "trt_utils.h"
-
+#include "calibrator.h"
#include "NvInfer.h"
#include "NvInferPlugin.h"
#include "NvInferRuntimeCommon.h"
@@ -16,6 +16,7 @@
#include "../utils/time_util.h"
#include "../config.h"
#include "opencv2/opencv.hpp"
+#include <numeric>
struct NetworkInfo
{
@@ -78,6 +79,7 @@
const std::string m_DeviceType;
const std::string m_InputBlobName;
std::vector<TensorInfo> m_OutputTensors;
+ std::vector<std::map<std::string, std::string>> m_configBlocks;
uint32_t m_InputH;
uint32_t m_InputW;
uint32_t m_InputC;
@@ -172,12 +174,21 @@
void setOutput(int type);
private:
Logger m_Logger;
+ void createYOLOEngine(const nvinfer1::DataType dataType = nvinfer1::DataType::kFLOAT,
+ Int8EntropyCalibrator* calibrator = nullptr);
+ void writePlanFileToDisk();
+ std::vector<std::map<std::string, std::string>> parseConfigFile(const std::string cfgFilePath);
+ void parseConfigBlocks();
void allocateBuffers();
bool verifyEngine();
void destroyNetworkUtils(std::vector<nvinfer1::Weights>& trtWeights);
+protected:
+ const std::string m_NetworkType;
+ std::unique_ptr<YoloTinyMaxpoolPaddingFormula> m_TinyMaxpoolPaddingFormula;
private:
Timer _timer;
+ int _n_yolo_ind = 0;
};
#endif
diff --git a/src/detecter_tools/trt_utils.cpp b/src/detecter_tools/trt_utils.cpp
index 75d5d6a..ebf2864 100644
--- a/src/detecter_tools/trt_utils.cpp
+++ b/src/detecter_tools/trt_utils.cpp
@@ -21,34 +21,34 @@
cv::Scalar(0.0, 0.0, 0.0),true);
}
-// static void leftTrim(std::string& s)
-// {
-// s.erase(s.begin(), find_if(s.begin(), s.end(), [](int ch) { return !isspace(ch); }));
-// }
+static void leftTrim(std::string& s)
+{
+ s.erase(s.begin(), find_if(s.begin(), s.end(), [](int ch) { return !isspace(ch); }));
+}
-// static void rightTrim(std::string& s)
-// {
-// s.erase(find_if(s.rbegin(), s.rend(), [](int ch) { return !isspace(ch); }).base(), s.end());
-// }
+static void rightTrim(std::string& s)
+{
+ s.erase(find_if(s.rbegin(), s.rend(), [](int ch) { return !isspace(ch); }).base(), s.end());
+}
-// std::string trim(std::string s)
-// {
-// leftTrim(s);
-// rightTrim(s);
-// return s;
-// }
+std::string trim(std::string s)
+{
+ leftTrim(s);
+ rightTrim(s);
+ return s;
+}
-// std::string triml(std::string s,const char* t)
-// {
-// s.erase(0, s.find_first_not_of(t));
-// return s;
-// }
+std::string triml(std::string s,const char* t)
+{
+ s.erase(0, s.find_first_not_of(t));
+ return s;
+}
-// std::string trimr(std::string s, const char* t)
-// {
-// s.erase(s.find_last_not_of(t) + 1);
-// return s;
-// }
+std::string trimr(std::string s, const char* t)
+{
+ s.erase(s.find_last_not_of(t) + 1);
+ return s;
+}
float clamp(const float val, const float minVal, const float maxVal)
{
@@ -115,6 +115,305 @@
// << " xmax:" << b.box.x2 << " ymax:" << b.box.y2 << std::endl;
// }
//
+
+std::vector<float> loadWeights(const std::string weightsFilePath, const std::string& networkType)
+{
+ assert(fileExists(weightsFilePath));
+ std::cout << "Loading pre-trained weights..." << std::endl;
+ std::ifstream file(weightsFilePath, std::ios_base::binary);
+ assert(file.good());
+ std::string line;
+ file.ignore(4);
+ char buf[2];
+ file.read(buf, 1);
+ if ((int)(unsigned char)buf[0] == 1)
+ {
+ file.ignore(11);
+ }
+ else if ((int)(unsigned char)buf[0] == 2)
+ {
+ file.ignore(15);
+ }
+ else
+ {
+ std::cout << "Invalid network type" << std::endl;
+ assert(0);
+ }
+
+ std::vector<float> weights;
+ char* floatWeight = new char[4];
+ while (!file.eof())
+ {
+ file.read(floatWeight, 4);
+ assert(file.gcount() == 4);
+ weights.push_back(*reinterpret_cast<float*>(floatWeight));
+ if (file.peek() == std::istream::traits_type::eof()) break;
+ }
+ std::cout << "Loading complete!" << std::endl;
+ delete[] floatWeight;
+
+ // std::cout << "Total Number of weights read : " << weights.size() << std::endl;
+ return weights;
+}
+
+std::string dimsToString(const nvinfer1::Dims d)
+{
+ std::stringstream s;
+ assert(d.nbDims >= 1);
+ for (int i = 0; i < d.nbDims - 1; ++i)
+ {
+ s << std::setw(4) << d.d[i] << " x";
+ }
+ s << std::setw(4) << d.d[d.nbDims - 1];
+
+ return s.str();
+}
+
+nvinfer1::ILayer* netAddMaxpool(int layerIdx, std::map<std::string, std::string>& block,
+ nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network)
+{
+ assert(block.at("type") == "maxpool");
+ assert(block.find("size") != block.end());
+ assert(block.find("stride") != block.end());
+
+ int size = std::stoi(block.at("size"));
+ int stride = std::stoi(block.at("stride"));
+
+ nvinfer1::IPoolingLayer* pool
+ = network->addPoolingNd(*input, nvinfer1::PoolingType::kMAX, nvinfer1::DimsHW{size, size});
+ assert(pool);
+ std::string maxpoolLayerName = "maxpool_" + std::to_string(layerIdx);
+ int pad = (size - 1) / 2;
+ pool->setPaddingNd(nvinfer1::DimsHW{pad,pad});
+ pool->setStrideNd(nvinfer1::DimsHW{stride, stride});
+ pool->setName(maxpoolLayerName.c_str());
+
+ return pool;
+}
+
+nvinfer1::ILayer* netAddConvLinear(int layerIdx, std::map<std::string, std::string>& block,
+ std::vector<float>& weights,
+ std::vector<nvinfer1::Weights>& trtWeights, int& weightPtr,
+ int& inputChannels, nvinfer1::ITensor* input,
+ nvinfer1::INetworkDefinition* network)
+{
+ assert(block.at("type") == "convolutional");
+ assert(block.find("batch_normalize") == block.end());
+ assert(block.at("activation") == "linear");
+ assert(block.find("filters") != block.end());
+ assert(block.find("pad") != block.end());
+ assert(block.find("size") != block.end());
+ assert(block.find("stride") != block.end());
+
+ int filters = std::stoi(block.at("filters"));
+ int padding = std::stoi(block.at("pad"));
+ int kernelSize = std::stoi(block.at("size"));
+ int stride = std::stoi(block.at("stride"));
+ int pad;
+ if (padding)
+ pad = (kernelSize - 1) / 2;
+ else
+ pad = 0;
+ // load the convolution layer bias
+ nvinfer1::Weights convBias{nvinfer1::DataType::kFLOAT, nullptr, filters};
+ float* val = new float[filters];
+ for (int i = 0; i < filters; ++i)
+ {
+ val[i] = weights[weightPtr];
+ weightPtr++;
+ }
+ convBias.values = val;
+ trtWeights.push_back(convBias);
+ // load the convolutional layer weights
+ int size = filters * inputChannels * kernelSize * kernelSize;
+ nvinfer1::Weights convWt{nvinfer1::DataType::kFLOAT, nullptr, size};
+ val = new float[size];
+ for (int i = 0; i < size; ++i)
+ {
+ val[i] = weights[weightPtr];
+ weightPtr++;
+ }
+ convWt.values = val;
+ trtWeights.push_back(convWt);
+ nvinfer1::IConvolutionLayer* conv = network->addConvolution(
+ *input, filters, nvinfer1::DimsHW{kernelSize, kernelSize}, convWt, convBias);
+ assert(conv != nullptr);
+ std::string convLayerName = "conv_" + std::to_string(layerIdx);
+ conv->setName(convLayerName.c_str());
+ conv->setStride(nvinfer1::DimsHW{stride, stride});
+ conv->setPadding(nvinfer1::DimsHW{pad, pad});
+
+ return conv;
+}
+
+nvinfer1::ILayer* net_conv_bn_mish(int layerIdx,
+ std::map<std::string, std::string>& block,
+ std::vector<float>& weights,
+ std::vector<nvinfer1::Weights>& trtWeights,
+ int& weightPtr,
+ int& inputChannels,
+ nvinfer1::ITensor* input,
+ nvinfer1::INetworkDefinition* network)
+{
+ assert(block.at("type") == "convolutional");
+ assert(block.find("batch_normalize") != block.end());
+ assert(block.at("batch_normalize") == "1");
+ assert(block.at("activation") == "mish");
+ assert(block.find("filters") != block.end());
+ assert(block.find("pad") != block.end());
+ assert(block.find("size") != block.end());
+ assert(block.find("stride") != block.end());
+
+ bool batchNormalize, bias;
+ if (block.find("batch_normalize") != block.end())
+ {
+ batchNormalize = (block.at("batch_normalize") == "1");
+ bias = false;
+ }
+ else
+ {
+ batchNormalize = false;
+ bias = true;
+ }
+ // all conv_bn_leaky layers assume bias is false
+ assert(batchNormalize == true && bias == false);
+
+ int filters = std::stoi(block.at("filters"));
+ int padding = std::stoi(block.at("pad"));
+ int kernelSize = std::stoi(block.at("size"));
+ int stride = std::stoi(block.at("stride"));
+ int pad;
+ if (padding)
+ pad = (kernelSize - 1) / 2;
+ else
+ pad = 0;
+ std::vector<float> bnBiases;
+ for (int i = 0; i < filters; ++i)
+ {
+ bnBiases.push_back(weights[weightPtr]);
+ weightPtr++;
+ }
+ // load BN weights
+ std::vector<float> bnWeights;
+ for (int i = 0; i < filters; ++i)
+ {
+ bnWeights.push_back(weights[weightPtr]);
+ weightPtr++;
+ }
+ // load BN running_mean
+ std::vector<float> bnRunningMean;
+ for (int i = 0; i < filters; ++i)
+ {
+ bnRunningMean.push_back(weights[weightPtr]);
+ weightPtr++;
+ }
+ // load BN running_var
+ std::vector<float> bnRunningVar;
+ for (int i = 0; i < filters; ++i)
+ {
+ // 1e-05 for numerical stability
+ bnRunningVar.push_back(sqrt(weights[weightPtr] + 1.0e-5f));
+ weightPtr++;
+ }
+ // load Conv layer weights (GKCRS)
+ int size = filters * inputChannels * kernelSize * kernelSize;
+ nvinfer1::Weights convWt{ nvinfer1::DataType::kFLOAT, nullptr, size };
+ float* val = new float[size];
+ for (int i = 0; i < size; ++i)
+ {
+ val[i] = weights[weightPtr];
+ weightPtr++;
+ }
+ convWt.values = val;
+ trtWeights.push_back(convWt);
+ nvinfer1::Weights convBias{ nvinfer1::DataType::kFLOAT, nullptr, 0 };
+ trtWeights.push_back(convBias);
+ nvinfer1::IConvolutionLayer* conv = network->addConvolution(
+ *input, filters, nvinfer1::DimsHW{ kernelSize, kernelSize }, convWt, convBias);
+ assert(conv != nullptr);
+ std::string convLayerName = "conv_" + std::to_string(layerIdx);
+ conv->setName(convLayerName.c_str());
+ conv->setStride(nvinfer1::DimsHW{ stride, stride });
+ conv->setPadding(nvinfer1::DimsHW{ pad, pad });
+
+ /***** BATCHNORM LAYER *****/
+ /***************************/
+ size = filters;
+ // create the weights
+ nvinfer1::Weights shift{ nvinfer1::DataType::kFLOAT, nullptr, size };
+ nvinfer1::Weights scale{ nvinfer1::DataType::kFLOAT, nullptr, size };
+ nvinfer1::Weights power{ nvinfer1::DataType::kFLOAT, nullptr, size };
+ float* shiftWt = new float[size];
+ for (int i = 0; i < size; ++i)
+ {
+ shiftWt[i]
+ = bnBiases.at(i) - ((bnRunningMean.at(i) * bnWeights.at(i)) / bnRunningVar.at(i));
+ }
+ shift.values = shiftWt;
+ float* scaleWt = new float[size];
+ for (int i = 0; i < size; ++i)
+ {
+ scaleWt[i] = bnWeights.at(i) / bnRunningVar[i];
+ }
+ scale.values = scaleWt;
+ float* powerWt = new float[size];
+ for (int i = 0; i < size; ++i)
+ {
+ powerWt[i] = 1.0;
+ }
+ power.values = powerWt;
+ trtWeights.push_back(shift);
+ trtWeights.push_back(scale);
+ trtWeights.push_back(power);
+ // Add the batch norm layers
+ nvinfer1::IScaleLayer* bn = network->addScale(
+ *conv->getOutput(0), nvinfer1::ScaleMode::kCHANNEL, shift, scale, power);
+ assert(bn != nullptr);
+ std::string bnLayerName = "batch_norm_" + std::to_string(layerIdx);
+ bn->setName(bnLayerName.c_str());
+ /***** ACTIVATION LAYER *****/
+ /****************************/
+ auto creator = getPluginRegistry()->getPluginCreator("Mish_TRT", "1");
+ const nvinfer1::PluginFieldCollection* pluginData = creator->getFieldNames();
+ nvinfer1::IPluginV2 *pluginObj = creator->createPlugin(("mish" + std::to_string(layerIdx)).c_str(), pluginData);
+ nvinfer1::ITensor* inputTensors[] = { bn->getOutput(0) };
+ auto mish = network->addPluginV2(&inputTensors[0], 1, *pluginObj);
+ return mish;
+}
+
+int getNumChannels(nvinfer1::ITensor* t)
+{
+ nvinfer1::Dims d = t->getDimensions();
+ assert(d.nbDims == 3);
+
+ return d.d[0];
+}
+
+std::vector<int> split_layer_index(const std::string &s_,const std::string &delimiter_)
+{
+ std::vector<int> index;
+ std::string s = s_;
+ size_t pos = 0;
+ std::string token;
+ while ((pos = s.find(delimiter_)) != std::string::npos)
+ {
+ token = s.substr(0, pos);
+ index.push_back(std::stoi(trim(token)));
+ s.erase(0, pos + delimiter_.length());
+ }
+ index.push_back(std::stoi(trim(s)));
+ return index;
+}
+
+void printLayerInfo(std::string layerIndex, std::string layerName, std::string layerInput,
+ std::string layerOutput, std::string weightPtr)
+{
+ std::cout << std::setw(6) << std::left << layerIndex << std::setw(15) << std::left << layerName;
+ std::cout << std::setw(20) << std::left << layerInput << std::setw(20) << std::left
+ << layerOutput;
+ std::cout << std::setw(6) << std::left << weightPtr << std::endl;
+}
+
uint64_t get3DTensorVolume(nvinfer1::Dims inputDims)
{
assert(inputDims.nbDims == 3);
@@ -216,3 +515,236 @@
}
return out;
}
+
+nvinfer1::ILayer* netAddUpsample(int layerIdx, std::map<std::string, std::string>& block,
+ std::vector<float>& weights,
+ std::vector<nvinfer1::Weights>& trtWeights, int& inputChannels,
+ nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network)
+{
+ assert(block.at("type") == "upsample");
+ nvinfer1::Dims inpDims = input->getDimensions();
+ assert(inpDims.nbDims == 3);
+ // assert(inpDims.d[1] == inpDims.d[2]);
+ int n_scale = std::stoi(block.at("stride"));
+
+ int c1 = inpDims.d[0];
+ float *deval = new float[c1*n_scale*n_scale];
+ for (int i = 0; i < c1*n_scale*n_scale; i++)
+ {
+ deval[i] = 1.0;
+ }
+ nvinfer1::Weights wts{ DataType::kFLOAT, deval, c1*n_scale*n_scale };
+ nvinfer1::Weights bias{ DataType::kFLOAT, nullptr, 0 };
+ IDeconvolutionLayer* upsample = network->addDeconvolutionNd(*input, c1, DimsHW{ n_scale, n_scale }, wts, bias);
+ upsample->setStrideNd(DimsHW{ n_scale, n_scale });
+ upsample->setNbGroups(c1);
+ return upsample;
+
+ #if 0
+#endif
+}
+
+nvinfer1::ILayer * layer_split(const int n_layer_index_,
+ nvinfer1::ITensor *input_,
+ nvinfer1::INetworkDefinition* network)
+{
+ auto creator = getPluginRegistry()->getPluginCreator("CHUNK_TRT", "1.0");
+ const nvinfer1::PluginFieldCollection* pluginData = creator->getFieldNames();
+ nvinfer1::IPluginV2 *pluginObj = creator->createPlugin(("chunk" + std::to_string(n_layer_index_)).c_str(), pluginData);
+ auto chunk = network->addPluginV2(&input_, 1, *pluginObj);
+ return chunk;
+}
+
+nvinfer1::ILayer* netAddConvBNLeaky(int layerIdx,
+ std::map<std::string, std::string>& block,
+ std::vector<float>& weights,
+ std::vector<nvinfer1::Weights>& trtWeights,
+ int& weightPtr,
+ int& inputChannels,
+ nvinfer1::ITensor* input,
+ nvinfer1::INetworkDefinition* network)
+{
+ assert(block.at("type") == "convolutional");
+ assert(block.find("batch_normalize") != block.end());
+ assert(block.at("batch_normalize") == "1");
+ assert(block.at("activation") == "leaky");
+ assert(block.find("filters") != block.end());
+ assert(block.find("pad") != block.end());
+ assert(block.find("size") != block.end());
+ assert(block.find("stride") != block.end());
+
+ bool batchNormalize, bias;
+ if (block.find("batch_normalize") != block.end())
+ {
+ batchNormalize = (block.at("batch_normalize") == "1");
+ bias = false;
+ }
+ else
+ {
+ batchNormalize = false;
+ bias = true;
+ }
+ // all conv_bn_leaky layers assume bias is false
+ assert(batchNormalize == true && bias == false);
+
+ int filters = std::stoi(block.at("filters"));
+ int padding = std::stoi(block.at("pad"));
+ int kernelSize = std::stoi(block.at("size"));
+ int stride = std::stoi(block.at("stride"));
+ int pad;
+ if (padding)
+ pad = (kernelSize - 1) / 2;
+ else
+ pad = 0;
+
+ /***** CONVOLUTION LAYER *****/
+ /*****************************/
+ // batch norm weights are before the conv layer
+ // load BN biases (bn_biases)
+ std::vector<float> bnBiases;
+ for (int i = 0; i < filters; ++i)
+ {
+ bnBiases.push_back(weights[weightPtr]);
+ weightPtr++;
+ }
+ // load BN weights
+ std::vector<float> bnWeights;
+ for (int i = 0; i < filters; ++i)
+ {
+ bnWeights.push_back(weights[weightPtr]);
+ weightPtr++;
+ }
+ // load BN running_mean
+ std::vector<float> bnRunningMean;
+ for (int i = 0; i < filters; ++i)
+ {
+ bnRunningMean.push_back(weights[weightPtr]);
+ weightPtr++;
+ }
+ // load BN running_var
+ std::vector<float> bnRunningVar;
+ for (int i = 0; i < filters; ++i)
+ {
+ // 1e-05 for numerical stability
+ bnRunningVar.push_back(sqrt(weights[weightPtr] + 1.0e-5f));
+ weightPtr++;
+ }
+ // load Conv layer weights (GKCRS)
+ int size = filters * inputChannels * kernelSize * kernelSize;
+ nvinfer1::Weights convWt{nvinfer1::DataType::kFLOAT, nullptr, size};
+ float* val = new float[size];
+ for (int i = 0; i < size; ++i)
+ {
+ val[i] = weights[weightPtr];
+ weightPtr++;
+ }
+ convWt.values = val;
+ trtWeights.push_back(convWt);
+ nvinfer1::Weights convBias{nvinfer1::DataType::kFLOAT, nullptr, 0};
+ trtWeights.push_back(convBias);
+ nvinfer1::IConvolutionLayer* conv = network->addConvolution(
+ *input,
+ filters,
+ nvinfer1::DimsHW{kernelSize, kernelSize},
+ convWt,
+ convBias);
+ assert(conv != nullptr);
+ std::string convLayerName = "conv_" + std::to_string(layerIdx);
+ conv->setName(convLayerName.c_str());
+ conv->setStride(nvinfer1::DimsHW{stride, stride});
+ conv->setPadding(nvinfer1::DimsHW{pad, pad});
+
+ /***** BATCHNORM LAYER *****/
+ /***************************/
+ size = filters;
+ // create the weights
+ nvinfer1::Weights shift{nvinfer1::DataType::kFLOAT, nullptr, size};
+ nvinfer1::Weights scale{nvinfer1::DataType::kFLOAT, nullptr, size};
+ nvinfer1::Weights power{nvinfer1::DataType::kFLOAT, nullptr, size};
+ float* shiftWt = new float[size];
+ for (int i = 0; i < size; ++i)
+ {
+ shiftWt[i]
+ = bnBiases.at(i) - ((bnRunningMean.at(i) * bnWeights.at(i)) / bnRunningVar.at(i));
+ }
+ shift.values = shiftWt;
+ float* scaleWt = new float[size];
+ for (int i = 0; i < size; ++i)
+ {
+ scaleWt[i] = bnWeights.at(i) / bnRunningVar[i];
+ }
+ scale.values = scaleWt;
+ float* powerWt = new float[size];
+ for (int i = 0; i < size; ++i)
+ {
+ powerWt[i] = 1.0;
+ }
+ power.values = powerWt;
+ trtWeights.push_back(shift);
+ trtWeights.push_back(scale);
+ trtWeights.push_back(power);
+ // Add the batch norm layers
+ nvinfer1::IScaleLayer* bn = network->addScale(
+ *conv->getOutput(0), nvinfer1::ScaleMode::kCHANNEL, shift, scale, power);
+ assert(bn != nullptr);
+ std::string bnLayerName = "batch_norm_" + std::to_string(layerIdx);
+ bn->setName(bnLayerName.c_str());
+ /***** ACTIVATION LAYER *****/
+ /****************************/
+ auto leaky = network->addActivation(*bn->getOutput(0),nvinfer1::ActivationType::kLEAKY_RELU);
+ leaky->setAlpha(0.1f);
+ /*nvinfer1::IPlugin* leakyRELU = nvinfer1::plugin::createPReLUPlugin(0.1);
+ assert(leakyRELU != nullptr);
+ nvinfer1::ITensor* bnOutput = bn->getOutput(0);
+ nvinfer1::IPluginLayer* leaky = network->addPlugin(&bnOutput, 1, *leakyRELU);*/
+ assert(leaky != nullptr);
+ std::string leakyLayerName = "leaky_" + std::to_string(layerIdx);
+ leaky->setName(leakyLayerName.c_str());
+
+ return leaky;
+}
+
+
+std::vector<std::string> loadListFromTextFile(const std::string filename)
+{
+ assert(fileExists(filename));
+ std::vector<std::string> list;
+
+ std::ifstream f(filename);
+ if (!f)
+ {
+ std::cout << "failed to open " << filename;
+ assert(0);
+ }
+
+ std::string line;
+ while (std::getline(f, line))
+ {
+ if (line.empty())
+ continue;
+
+ else
+ list.push_back(trim(line));
+ }
+
+ return list;
+}
+std::vector<std::string> loadImageList(const std::string filename, const std::string prefix)
+{
+ std::vector<std::string> fileList = loadListFromTextFile(filename);
+ for (auto& file : fileList)
+ {
+ if (fileExists(file, false))
+ continue;
+ else
+ {
+ std::string prefixed = prefix + file;
+ if (fileExists(prefixed, false))
+ file = prefixed;
+ else
+ std::cerr << "WARNING: couldn't find: " << prefixed
+ << " while loading: " << filename << std::endl;
+ }
+ }
+ return fileList;
+}
diff --git a/src/detecter_tools/trt_utils.h b/src/detecter_tools/trt_utils.h
index 189a60b..a166a6f 100644
--- a/src/detecter_tools/trt_utils.h
+++ b/src/detecter_tools/trt_utils.h
@@ -67,6 +67,34 @@
}
}
};
+nvinfer1::ILayer* netAddMaxpool(int layerIdx, std::map<std::string, std::string>& block,
+ nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network);
+nvinfer1::ILayer* netAddConvLinear(int layerIdx, std::map<std::string, std::string>& block,
+ std::vector<float>& weights,
+ std::vector<nvinfer1::Weights>& trtWeights, int& weightPtr,
+ int& inputChannels, nvinfer1::ITensor* input,
+ nvinfer1::INetworkDefinition* network);
+
+nvinfer1::ILayer* net_conv_bn_mish(int layerIdx,
+ std::map<std::string, std::string>& block,
+ std::vector<float>& weights,
+ std::vector<nvinfer1::Weights>& trtWeights,
+ int& weightPtr,
+ int& inputChannels,
+ nvinfer1::ITensor* input,
+ nvinfer1::INetworkDefinition* network);
+
+nvinfer1::ILayer* netAddConvBNLeaky(int layerIdx, std::map<std::string, std::string>& block,
+ std::vector<float>& weights,
+ std::vector<nvinfer1::Weights>& trtWeights, int& weightPtr,
+ int& inputChannels, nvinfer1::ITensor* input,
+ nvinfer1::INetworkDefinition* network);
+std::string dimsToString(const nvinfer1::Dims d);
+std::vector<float> loadWeights(const std::string weightsFilePath, const std::string& networkType);
+int getNumChannels(nvinfer1::ITensor* t);
+std::string trim(std::string s);
+std::string triml(std::string s, const char* t);
+std::string trimr(std::string s, const char* t);
float clamp(const float val, const float minVal, const float maxVal);
// Common helper functions
cv::Mat blobFromDsImages(const std::vector<DsImage>& inputImages, const int& inputH,
@@ -77,9 +105,53 @@
nvinfer1::ICudaEngine* loadTRTEngine(const std::string planFilePath, PluginFactory* pluginFactory,
Logger& logger);
uint64_t get3DTensorVolume(nvinfer1::Dims inputDims);
-
+std::vector<std::string> loadImageList(const std::string filename, const std::string prefix);
std::vector<BBoxInfo> nmsAllClasses(const float nmsThresh, std::vector<BBoxInfo>& binfo,
const uint32_t numClasses, const std::string &model_type);
-
+void printLayerInfo(std::string layerIndex, std::string layerName, std::string layerInput,
+ std::string layerOutput, std::string weightPtr);
std::vector<BBoxInfo> nonMaximumSuppression(const float nmsThresh, std::vector<BBoxInfo> binfo);
+std::vector<int> split_layer_index(const std::string &s_,const std::string &delimiter_);
+nvinfer1::ILayer * layer_split(const int n_layer_index_,
+ nvinfer1::ITensor *input_,
+ nvinfer1::INetworkDefinition* network);
+nvinfer1::ILayer* netAddUpsample(int layerIdx, std::map<std::string, std::string>& block,
+ std::vector<float>& weights,
+ std::vector<nvinfer1::Weights>& trtWeights, int& inputChannels,
+ nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network);
+std::vector<std::string> loadListFromTextFile(const std::string filename);
+class YoloTinyMaxpoolPaddingFormula : public nvinfer1::IOutputDimensionsFormula
+{
+private:
+ std::set<std::string> m_SamePaddingLayers;
+
+ nvinfer1::DimsHW compute(nvinfer1::DimsHW inputDims, nvinfer1::DimsHW kernelSize,
+ nvinfer1::DimsHW stride, nvinfer1::DimsHW padding,
+ nvinfer1::DimsHW dilation, const char* layerName) const override
+ {
+ // assert(inputDims.d[0] == inputDims.d[1]);
+ assert(kernelSize.d[0] == kernelSize.d[1]);
+ assert(stride.d[0] == stride.d[1]);
+ assert(padding.d[0] == padding.d[1]);
+
+ int output_h, output_w;
+ // Only layer maxpool_12 makes use of same padding
+ if (m_SamePaddingLayers.find(layerName) != m_SamePaddingLayers.end())
+ {
+ output_h = (inputDims.d[0] + 2 * padding.d[0]) / stride.d[0];
+ output_w = (inputDims.d[1] + 2 * padding.d[1]) / stride.d[1];
+ }
+ // Valid Padding
+ else
+ {
+ output_h = (inputDims.d[0] - kernelSize.d[0]) / stride.d[0] + 1;
+ output_w = (inputDims.d[1] - kernelSize.d[1]) / stride.d[1] + 1;
+ }
+ return nvinfer1::DimsHW{output_h, output_w};
+ }
+
+public:
+ void addSamePaddingLayer(std::string input) { m_SamePaddingLayers.insert(input); }
+};
+
#endif
diff --git a/src/h_interface.cpp b/src/h_interface.cpp
index 98e2595..3b3a751 100644
--- a/src/h_interface.cpp
+++ b/src/h_interface.cpp
@@ -8,6 +8,8 @@
using namespace std;
string m_staticStruct::model_path = "path";
+string m_staticStruct::model_cfg = "0"; // 鍒濆鍖栫粨鏋勪綋闈欐�佸彉閲忓��
+string m_staticStruct::model_wts = "0"; // 鍒濆鍖栫粨鏋勪綋闈欐�佸彉閲忓��
int m_staticStruct::type = 1;
bool m_staticStruct::isTrack= true;
int m_staticStruct::max_cam_num = 0;
@@ -115,6 +117,8 @@
if(reader.parse(in,root))
{
std::string model_path = root["param"]["model_path"].asString();
+ std::string model_cfg= root["param"]["model_cfg"].asString();
+ std::string model_wts = root["param"]["model_wts"].asString();
int type = root["param"]["type"].asInt();
bool isTrack = root["param"]["isTrack"].asBool();
int max_cam_num = root["param"]["max_cam_num"].asInt();
@@ -122,6 +126,8 @@
int mv_velocity = root["param"]["mv_velocity"].asFloat();
int fall_rate = root["param"]["fall_rate"].asFloat();
m_staticStruct::model_path = model_path;
+ m_staticStruct::model_cfg = model_cfg;
+ m_staticStruct::model_wts = model_wts;
m_staticStruct::type = type;
m_staticStruct::isTrack = isTrack;
m_staticStruct::max_cam_num = max_cam_num;
--
Gitblit v1.8.0