config.json | ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史 | |
readme.txt | ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史 | |
src/config.h | ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史 | |
src/detecter_tools/calibrator.cpp | ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史 | |
src/detecter_tools/calibrator.h | ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史 | |
src/detecter_tools/detector.cpp | ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史 | |
src/detecter_tools/model.cpp | ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史 | |
src/detecter_tools/model.h | ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史 | |
src/detecter_tools/trt_utils.cpp | ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史 | |
src/detecter_tools/trt_utils.h | ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史 | |
src/h_interface.cpp | ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史 |
config.json
@@ -3,6 +3,9 @@ "runtime": "/opt/vasystem/libs/Detect:/usr/local/cuda-11.1/lib64:", "param": { "model_path": "/opt/vasystem/bin/models/baseDetector/baseDetector.bin", "model_cfg": "/opt/vasystem/bin/models/baseDetector/baseDetector.cfg", // para里边自己算法可能用到的参数 "model_wts": "/opt/vasystem/bin/models/baseDetector/baseDetector.weights", // para里边自己算法可能用到的参数 //"model_path": "/data/disk1/project/model_dump/02_yolo/baseDetetor_small.bin", // para里边自己算法可能用到的参数 "type":1, "max_cam_num": 8, "wander_time": 5, readme.txt
New file @@ -0,0 +1,4 @@ config.json 是配置信息的文件 model_path 是转化号的trt模型路径 如果没有转化好的模型文件,需要配置model_cfg:cfg文件和model_wts:weights权重文件。如果model_path存在则不需要 "type"字段如果是1,则为正常版本,如果为2,则是tiny版本 src/config.h
@@ -51,6 +51,8 @@ typedef struct m_staticStruct { static std::string model_path; static std::string model_cfg; static std::string model_wts; static int type; static bool isTrack; static int max_cam_num; src/detecter_tools/calibrator.cpp
New file @@ -0,0 +1,114 @@ /** MIT License Copyright (c) 2018 NVIDIA CORPORATION. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * */ #include "calibrator.h" #include <fstream> #include <iostream> #include <iterator> Int8EntropyCalibrator::Int8EntropyCalibrator(const uint32_t& batchSize, const std::string& calibImages, const std::string& calibImagesPath, const std::string& calibTableFilePath, const uint64_t& inputSize, const uint32_t& inputH, const uint32_t& inputW, const std::string& inputBlobName, const std::string &s_net_type_) : m_BatchSize(batchSize), m_InputH(inputH), m_InputW(inputW), m_InputSize(inputSize), m_InputCount(batchSize * inputSize), m_InputBlobName(inputBlobName), m_CalibTableFilePath(calibTableFilePath), m_ImageIndex(0), _s_net_type(s_net_type_) { if (!fileExists(m_CalibTableFilePath, false)) { m_ImageList = loadImageList(calibImages, calibImagesPath); m_ImageList.resize(static_cast<int>(m_ImageList.size() / m_BatchSize) * m_BatchSize); std::random_shuffle(m_ImageList.begin(), m_ImageList.end(), [](int i) { return rand() % i; }); } NV_CUDA_CHECK(cudaMalloc(&m_DeviceInput, m_InputCount * sizeof(float))); } Int8EntropyCalibrator::~Int8EntropyCalibrator() { NV_CUDA_CHECK(cudaFree(m_DeviceInput)); } bool Int8EntropyCalibrator::getBatch(void* bindings[], const char* names[], int nbBindings) { if (m_ImageIndex + m_BatchSize >= m_ImageList.size()) return false; // Load next batch std::vector<DsImage> dsImages(m_BatchSize); for (uint32_t j = m_ImageIndex; j < m_ImageIndex + m_BatchSize; ++j) { dsImages.at(j - m_ImageIndex) = DsImage(m_ImageList.at(j), _s_net_type, m_InputH, m_InputW); } m_ImageIndex += m_BatchSize; cv::Mat trtInput = blobFromDsImages(dsImages, m_InputH, m_InputW); NV_CUDA_CHECK(cudaMemcpy(m_DeviceInput, trtInput.ptr<float>(0), m_InputCount * sizeof(float), cudaMemcpyHostToDevice)); assert(!strcmp(names[0], m_InputBlobName.c_str())); bindings[0] = m_DeviceInput; return true; } const void* Int8EntropyCalibrator::readCalibrationCache(size_t& length) { void* output; m_CalibrationCache.clear(); assert(!m_CalibTableFilePath.empty()); std::ifstream input(m_CalibTableFilePath, std::ios::binary | std::ios::in); input >> std::noskipws; if (m_ReadCache && input.good()) std::copy(std::istream_iterator<char>(input), std::istream_iterator<char>(), std::back_inserter(m_CalibrationCache)); length = m_CalibrationCache.size(); if (length) { std::cout << "Using cached calibration table to build the engine" << std::endl; output = &m_CalibrationCache[0]; } else { std::cout << "New calibration table will be created to build the engine" << std::endl; output = nullptr; } return output; } void Int8EntropyCalibrator::writeCalibrationCache(const void* cache, size_t length) { assert(!m_CalibTableFilePath.empty()); std::ofstream output(m_CalibTableFilePath, std::ios::binary); output.write(reinterpret_cast<const char*>(cache), length); output.close(); } src/detecter_tools/calibrator.h
New file @@ -0,0 +1,62 @@ /** MIT License Copyright (c) 2018 NVIDIA CORPORATION. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * */ #ifndef _CALIBRATOR_H_ #define _CALIBRATOR_H_ #include "NvInfer.h" #include "ds_image.h" #include "trt_utils.h" class Int8EntropyCalibrator : public nvinfer1::IInt8EntropyCalibrator2 { public: Int8EntropyCalibrator(const uint32_t& batchSize, const std::string& calibImages, const std::string& calibImagesPath, const std::string& calibTableFilePath, const uint64_t& inputSize, const uint32_t& inputH, const uint32_t& inputW, const std::string& inputBlobName,const std::string &s_net_type_); virtual ~Int8EntropyCalibrator(); int getBatchSize() const override { return m_BatchSize; } bool getBatch(void* bindings[], const char* names[], int nbBindings) override; const void* readCalibrationCache(size_t& length) override; void writeCalibrationCache(const void* cache, size_t length) override; private: const uint32_t m_BatchSize; const uint32_t m_InputH; const uint32_t m_InputW; const uint64_t m_InputSize; const uint64_t m_InputCount; const std::string m_InputBlobName; const std::string _s_net_type; const std::string m_CalibTableFilePath{nullptr}; uint32_t m_ImageIndex; bool m_ReadCache{true}; void* m_DeviceInput{nullptr}; std::vector<std::string> m_ImageList; std::vector<char> m_CalibrationCache; }; #endif src/detecter_tools/detector.cpp
@@ -31,10 +31,10 @@ vec_ds_images.emplace_back(img, _vec_net_type[_config.net_type], _p_net->getInputH(), _p_net->getInputW()); } cv::Mat trtInput = blobFromDsImages(vec_ds_images, _p_net->getInputH(),_p_net->getInputW()); timer.out("pre"); timer.out("eve pre detect "); _p_net->doInference(trtInput.data, vec_ds_images.size()); timer.reset(); _p_net->doInference(trtInput.data, vec_ds_images.size()); for (uint32_t i = 0; i < vec_ds_images.size(); ++i) { auto curImage = vec_ds_images.at(i); @@ -65,9 +65,8 @@ } vec_batch_result[i] = vec_result; } timer.out("post"); timer.out("eve pre detect post"); DEBUG("--detect over--" ); } void Detector::set_gpu_id(const int id) @@ -95,9 +94,9 @@ void Detector::build_net() { if(_config.net_type == SMALL) _p_net = std::unique_ptr<Detecter>{new Detecter(_info,_infer_param,2)}; else{ if(_config.net_type == COMMON) _p_net = std::unique_ptr<Detecter>{new Detecter(_info,_infer_param,1)}; else{ _p_net = std::unique_ptr<Detecter>{new Detecter(_info,_infer_param,2)}; } } src/detecter_tools/model.cpp
@@ -11,6 +11,7 @@ Detecter::Detecter( const NetworkInfo& networkInfo, const InferParams& inferParams, int type) : m_NetworkType(networkInfo.networkType), m_InputBlobName(networkInfo.inputBlobName), m_InputH(416), m_InputW(416), @@ -26,10 +27,17 @@ m_Context(nullptr), m_InputBindingIndex(-1), m_CudaStream(nullptr), m_PluginFactory(new PluginFactory) m_PluginFactory(new PluginFactory), m_TinyMaxpoolPaddingFormula(new YoloTinyMaxpoolPaddingFormula) { setOutput(type); m_EnginePath = m_staticStruct::model_path; if(!fileExists(m_EnginePath)) { m_configBlocks = parseConfigFile(m_staticStruct::model_cfg); parseConfigBlocks(); createYOLOEngine(); } setOutput(type); DEBUG((boost::format("m_EnginePath:%s")%m_EnginePath).str()); assert(m_PluginFactory != nullptr); m_Engine = loadTRTEngine(m_EnginePath, m_PluginFactory, m_Logger); @@ -67,7 +75,482 @@ m_PluginFactory = nullptr; } // m_TinyMaxpoolPaddingFormula.reset(); m_TinyMaxpoolPaddingFormula.reset(); } std::vector<std::map<std::string, std::string>> Detecter::parseConfigFile(const std::string cfgFilePath) { std::cout << "::::::::::" << cfgFilePath <<std::endl; assert(fileExists(cfgFilePath)); std::ifstream file(cfgFilePath); assert(file.good()); std::string line; std::vector<std::map<std::string, std::string>> blocks; std::map<std::string, std::string> block; while (getline(file, line)) { if (line.empty()) continue; if (line.front() == '#') continue; line = trim(line); if (line.front() == '[') { if (!block.empty()) { blocks.push_back(block); block.clear(); } std::string key = "type"; std::string value = trim(line.substr(1, line.size() - 2)); block.insert(std::pair<std::string, std::string>(key, value)); } else { size_t cpos = line.find('='); std::string key = trim(line.substr(0, cpos)); std::string value = trim(line.substr(cpos + 1)); block.insert(std::pair<std::string, std::string>(key, value)); } } blocks.push_back(block); return blocks; } void Detecter::parseConfigBlocks() { for (auto block : m_configBlocks) { if (block.at("type") == "net") { assert((block.find("height") != block.end()) && "Missing 'height' param in network cfg"); assert((block.find("width") != block.end()) && "Missing 'width' param in network cfg"); assert((block.find("channels") != block.end()) && "Missing 'channels' param in network cfg"); assert((block.find("batch") != block.end()) && "Missing 'batch' param in network cfg"); m_InputH = std::stoul(trim(block.at("height"))); m_InputW = std::stoul(trim(block.at("width"))); m_InputC = std::stoul(trim(block.at("channels"))); m_BatchSize = std::stoi(trim(block.at("batch"))); // assert(m_InputW == m_InputH); m_InputSize = m_InputC * m_InputH * m_InputW; } else if ((block.at("type") == "region") || (block.at("type") == "yolo")) { assert((block.find("num") != block.end()) && std::string("Missing 'num' param in " + block.at("type") + " layer").c_str()); assert((block.find("classes") != block.end()) && std::string("Missing 'classes' param in " + block.at("type") + " layer") .c_str()); assert((block.find("anchors") != block.end()) && std::string("Missing 'anchors' param in " + block.at("type") + " layer") .c_str()); TensorInfo outputTensor; std::string anchorString = block.at("anchors"); while (!anchorString.empty()) { size_t npos = anchorString.find_first_of(','); if (npos != std::string::npos) { float anchor = std::stof(trim(anchorString.substr(0, npos))); outputTensor.anchors.push_back(anchor); anchorString.erase(0, npos + 1); } else { float anchor = std::stof(trim(anchorString)); outputTensor.anchors.push_back(anchor); break; } } assert((block.find("mask") != block.end()) && std::string("Missing 'mask' param in " + block.at("type") + " layer") .c_str()); std::string maskString = block.at("mask"); while (!maskString.empty()) { size_t npos = maskString.find_first_of(','); if (npos != std::string::npos) { uint32_t mask = std::stoul(trim(maskString.substr(0, npos))); outputTensor.masks.push_back(mask); maskString.erase(0, npos + 1); } else { uint32_t mask = std::stoul(trim(maskString)); outputTensor.masks.push_back(mask); break; } } outputTensor.numBBoxes = outputTensor.masks.size() > 0 ? outputTensor.masks.size() : std::stoul(trim(block.at("num"))); outputTensor.numClasses = std::stoul(block.at("classes")); if (m_ClassNames.empty()) { for (uint32_t i=0;i< outputTensor.numClasses;++i) { m_ClassNames.push_back(std::to_string(i)); } } outputTensor.blobName = "yolo_" + std::to_string(_n_yolo_ind); outputTensor.gridSize = (m_InputH / 32) * pow(2, _n_yolo_ind); outputTensor.grid_h = (m_InputH / 32) * pow(2, _n_yolo_ind); outputTensor.grid_w = (m_InputW / 32) * pow(2, _n_yolo_ind); outputTensor.gridSize = (m_InputH / 32) * pow(2, 2-_n_yolo_ind); outputTensor.grid_h = (m_InputH / 32) * pow(2, 2-_n_yolo_ind); outputTensor.grid_w = (m_InputW / 32) * pow(2, 2-_n_yolo_ind); outputTensor.stride = m_InputH / outputTensor.gridSize; outputTensor.stride_h = m_InputH / outputTensor.grid_h; outputTensor.stride_w = m_InputW / outputTensor.grid_w; outputTensor.volume = outputTensor.grid_h* outputTensor.grid_w *(outputTensor.numBBoxes*(5 + outputTensor.numClasses)); m_OutputTensors.push_back(outputTensor); _n_yolo_ind++; } } } void Detecter::createYOLOEngine(const nvinfer1::DataType dataType, Int8EntropyCalibrator* calibrator) { if (fileExists(m_EnginePath))return; std::vector<float> weights = loadWeights(m_staticStruct::model_wts, m_NetworkType); std::vector<nvinfer1::Weights> trtWeights; int weightPtr = 0; int channels = m_InputC; m_Builder = nvinfer1::createInferBuilder(m_Logger); nvinfer1::IBuilderConfig* config = m_Builder->createBuilderConfig(); m_Network = m_Builder->createNetworkV2(0U); if ((dataType == nvinfer1::DataType::kINT8 && !m_Builder->platformHasFastInt8()) || (dataType == nvinfer1::DataType::kHALF && !m_Builder->platformHasFastFp16())) { std::cout << "Platform doesn't support this precision." << std::endl; assert(0); } nvinfer1::ITensor* data = m_Network->addInput( m_InputBlobName.c_str(), nvinfer1::DataType::kFLOAT, nvinfer1::DimsCHW{static_cast<int>(m_InputC), static_cast<int>(m_InputH), static_cast<int>(m_InputW)}); assert(data != nullptr); // Add elementwise layer to normalize pixel values 0-1 nvinfer1::Dims divDims{ 3, {static_cast<int>(m_InputC), static_cast<int>(m_InputH), static_cast<int>(m_InputW)}, {nvinfer1::DimensionType::kCHANNEL, nvinfer1::DimensionType::kSPATIAL, nvinfer1::DimensionType::kSPATIAL}}; nvinfer1::Weights divWeights{nvinfer1::DataType::kFLOAT, nullptr, static_cast<int64_t>(m_InputSize)}; float* divWt = new float[m_InputSize]; for (uint32_t w = 0; w < m_InputSize; ++w) divWt[w] = 255.0; divWeights.values = divWt; trtWeights.push_back(divWeights); nvinfer1::IConstantLayer* constDivide = m_Network->addConstant(divDims, divWeights); assert(constDivide != nullptr); nvinfer1::IElementWiseLayer* elementDivide = m_Network->addElementWise( *data, *constDivide->getOutput(0), nvinfer1::ElementWiseOperation::kDIV); assert(elementDivide != nullptr); nvinfer1::ITensor* previous = elementDivide->getOutput(0); std::vector<nvinfer1::ITensor*> tensorOutputs; uint32_t outputTensorCount = 0; // build the network using the network API for (uint32_t i = 0; i < m_configBlocks.size(); ++i) { // check if num. of channels is correct assert(getNumChannels(previous) == channels); std::string layerIndex = "(" + std::to_string(i) + ")"; if (m_configBlocks.at(i).at("type") == "net") { printLayerInfo("", "layer", " inp_size", " out_size", "weightPtr"); } else if (m_configBlocks.at(i).at("type") == "convolutional") { std::string inputVol = dimsToString(previous->getDimensions()); nvinfer1::ILayer* out; std::string layerType; //check activation std::string activation = ""; if (m_configBlocks.at(i).find("activation") != m_configBlocks.at(i).end()) { activation = m_configBlocks[i]["activation"]; } // check if batch_norm enabled if ((m_configBlocks.at(i).find("batch_normalize") != m_configBlocks.at(i).end()) && ("leaky" == activation)) { out = netAddConvBNLeaky(i, m_configBlocks.at(i), weights, trtWeights, weightPtr, channels, previous, m_Network); layerType = "conv-bn-leaky"; } else if ((m_configBlocks.at(i).find("batch_normalize") != m_configBlocks.at(i).end()) && ("mish" == activation)) { out = net_conv_bn_mish(i, m_configBlocks.at(i), weights, trtWeights, weightPtr, channels, previous, m_Network); layerType = "conv-bn-mish"; } else// if("linear" == activation) { out = netAddConvLinear(i, m_configBlocks.at(i), weights, trtWeights, weightPtr, channels, previous, m_Network); layerType = "conv-linear"; } previous = out->getOutput(0); assert(previous != nullptr); channels = getNumChannels(previous); std::string outputVol = dimsToString(previous->getDimensions()); tensorOutputs.push_back(out->getOutput(0)); printLayerInfo(layerIndex, layerType, inputVol, outputVol, std::to_string(weightPtr)); } else if (m_configBlocks.at(i).at("type") == "shortcut") { assert(m_configBlocks.at(i).at("activation") == "linear"); assert(m_configBlocks.at(i).find("from") != m_configBlocks.at(i).end()); int from = stoi(m_configBlocks.at(i).at("from")); std::string inputVol = dimsToString(previous->getDimensions()); // check if indexes are correct assert((i - 2 >= 0) && (i - 2 < tensorOutputs.size())); assert((i + from - 1 >= 0) && (i + from - 1 < tensorOutputs.size())); assert(i + from - 1 < i - 2); nvinfer1::IElementWiseLayer* ew = m_Network->addElementWise(*tensorOutputs[i - 2], *tensorOutputs[i + from - 1], nvinfer1::ElementWiseOperation::kSUM); assert(ew != nullptr); std::string ewLayerName = "shortcut_" + std::to_string(i); ew->setName(ewLayerName.c_str()); previous = ew->getOutput(0); assert(previous != nullptr); std::string outputVol = dimsToString(previous->getDimensions()); tensorOutputs.push_back(ew->getOutput(0)); printLayerInfo(layerIndex, "skip", inputVol, outputVol, " -"); } else if (m_configBlocks.at(i).at("type") == "yolo") { nvinfer1::Dims prevTensorDims = previous->getDimensions(); // assert(prevTensorDims.d[1] == prevTensorDims.d[2]); TensorInfo& curYoloTensor = m_OutputTensors.at(outputTensorCount); curYoloTensor.gridSize = prevTensorDims.d[1]; curYoloTensor.grid_h = prevTensorDims.d[1]; curYoloTensor.grid_w = prevTensorDims.d[2]; curYoloTensor.stride = m_InputW / curYoloTensor.gridSize; curYoloTensor.stride_h = m_InputH / curYoloTensor.grid_h; curYoloTensor.stride_w = m_InputW / curYoloTensor.grid_w; m_OutputTensors.at(outputTensorCount).volume = curYoloTensor.grid_h * curYoloTensor.grid_w * (curYoloTensor.numBBoxes * (5 + curYoloTensor.numClasses)); std::string layerName = "yolo_" + std::to_string(outputTensorCount); curYoloTensor.blobName = layerName; nvinfer1::IPlugin* yoloPlugin = new SLayerV3(m_OutputTensors.at(outputTensorCount).numBBoxes, m_OutputTensors.at(outputTensorCount).numClasses, m_OutputTensors.at(outputTensorCount).grid_h, m_OutputTensors.at(outputTensorCount).grid_w); assert(yoloPlugin != nullptr); nvinfer1::IPluginLayer* yolo = m_Network->addPlugin(&previous, 1, *yoloPlugin); assert(yolo != nullptr); yolo->setName(layerName.c_str()); std::string inputVol = dimsToString(previous->getDimensions()); previous = yolo->getOutput(0); assert(previous != nullptr); previous->setName(layerName.c_str()); std::string outputVol = dimsToString(previous->getDimensions()); m_Network->markOutput(*previous); channels = getNumChannels(previous); tensorOutputs.push_back(yolo->getOutput(0)); printLayerInfo(layerIndex, "yolo", inputVol, outputVol, std::to_string(weightPtr)); ++outputTensorCount; } else if (m_configBlocks.at(i).at("type") == "route") { size_t found = m_configBlocks.at(i).at("layers").find(","); if (found != std::string::npos)//concate multi layers { std::vector<int> vec_index = split_layer_index(m_configBlocks.at(i).at("layers"), ","); for (auto &ind_layer:vec_index) { if (ind_layer < 0) { ind_layer = static_cast<int>(tensorOutputs.size()) + ind_layer; } assert(ind_layer < static_cast<int>(tensorOutputs.size()) && ind_layer >= 0); } nvinfer1::ITensor** concatInputs = reinterpret_cast<nvinfer1::ITensor**>(malloc(sizeof(nvinfer1::ITensor*) * vec_index.size())); for (size_t ind = 0; ind < vec_index.size(); ++ind) { concatInputs[ind] = tensorOutputs[vec_index[ind]]; } nvinfer1::IConcatenationLayer* concat = m_Network->addConcatenation(concatInputs, static_cast<int>(vec_index.size())); assert(concat != nullptr); std::string concatLayerName = "route_" + std::to_string(i - 1); concat->setName(concatLayerName.c_str()); // concatenate along the channel dimension concat->setAxis(0); previous = concat->getOutput(0); assert(previous != nullptr); nvinfer1::Dims debug = previous->getDimensions(); std::string outputVol = dimsToString(previous->getDimensions()); int nums = 0; for (auto &indx:vec_index) { nums += getNumChannels(tensorOutputs[indx]); } channels = nums; tensorOutputs.push_back(concat->getOutput(0)); printLayerInfo(layerIndex, "route", " -", outputVol,std::to_string(weightPtr)); } else //single layer { int idx = std::stoi(trim(m_configBlocks.at(i).at("layers"))); if (idx < 0) { idx = static_cast<int>(tensorOutputs.size()) + idx; } assert(idx < static_cast<int>(tensorOutputs.size()) && idx >= 0); //route if (m_configBlocks.at(i).find("groups") == m_configBlocks.at(i).end()) { previous = tensorOutputs[idx]; assert(previous != nullptr); std::string outputVol = dimsToString(previous->getDimensions()); // set the output volume depth channels = getNumChannels(tensorOutputs[idx]); tensorOutputs.push_back(tensorOutputs[idx]); printLayerInfo(layerIndex, "route", " -", outputVol, std::to_string(weightPtr)); } //yolov4-tiny route split layer else { if (m_configBlocks.at(i).find("group_id") == m_configBlocks.at(i).end()) { assert(0); } int chunk_idx = std::stoi(trim(m_configBlocks.at(i).at("group_id"))); nvinfer1::ILayer* out = layer_split(i, tensorOutputs[idx], m_Network); std::string inputVol = dimsToString(previous->getDimensions()); previous = out->getOutput(chunk_idx); assert(previous != nullptr); channels = getNumChannels(previous); std::string outputVol = dimsToString(previous->getDimensions()); tensorOutputs.push_back(out->getOutput(chunk_idx)); printLayerInfo(layerIndex,"chunk", inputVol, outputVol, std::to_string(weightPtr)); } } } else if (m_configBlocks.at(i).at("type") == "upsample") { std::string inputVol = dimsToString(previous->getDimensions()); nvinfer1::ILayer* out = netAddUpsample(i - 1, m_configBlocks[i], weights, trtWeights, channels, previous, m_Network); previous = out->getOutput(0); std::string outputVol = dimsToString(previous->getDimensions()); tensorOutputs.push_back(out->getOutput(0)); printLayerInfo(layerIndex, "upsample", inputVol, outputVol, " -"); } else if (m_configBlocks.at(i).at("type") == "maxpool") { // Add same padding layers if (m_configBlocks.at(i).at("size") == "2" && m_configBlocks.at(i).at("stride") == "1") { m_TinyMaxpoolPaddingFormula->addSamePaddingLayer("maxpool_" + std::to_string(i)); } std::string inputVol = dimsToString(previous->getDimensions()); nvinfer1::ILayer* out = netAddMaxpool(i, m_configBlocks.at(i), previous, m_Network); previous = out->getOutput(0); assert(previous != nullptr); std::string outputVol = dimsToString(previous->getDimensions()); tensorOutputs.push_back(out->getOutput(0)); printLayerInfo(layerIndex, "maxpool", inputVol, outputVol, std::to_string(weightPtr)); } else { std::cout << "Unsupported layer type --> \"" << m_configBlocks.at(i).at("type") << "\"" << std::endl; assert(0); } } if (static_cast<int>(weights.size()) != weightPtr) { std::cout << "Number of unused weights left : " << static_cast<int>(weights.size()) - weightPtr << std::endl; assert(0); } // std::cout << "Output blob names :" << std::endl; // for (auto& tensor : m_OutputTensors) std::cout << tensor.blobName << std::endl; // Create and cache the engine if not already present if (fileExists(m_EnginePath)) { std::cout << "Using previously generated plan file located at " << m_EnginePath << std::endl; destroyNetworkUtils(trtWeights); return; } /*std::cout << "Unable to find cached TensorRT engine for network : " << m_NetworkType << " precision : " << m_Precision << " and batch size :" << m_BatchSize << std::endl;*/ m_Builder->setMaxBatchSize(m_BatchSize); //m_Builder->setMaxWorkspaceSize(1 << 20); config->setMaxWorkspaceSize(1 << 20); if (dataType == nvinfer1::DataType::kINT8) { assert((calibrator != nullptr) && "Invalid calibrator for INT8 precision"); // m_Builder->setInt8Mode(true); config->setFlag(nvinfer1::BuilderFlag::kINT8); // m_Builder->setInt8Calibrator(calibrator); config->setInt8Calibrator(calibrator); // config->setTacticSources(1U << static_cast<uint32_t>(TacticSource::kCUBLAS) | 1U << static_cast<uint32_t>(TacticSource::kCUBLAS_LT)); } else if (dataType == nvinfer1::DataType::kHALF) { config->setFlag(nvinfer1::BuilderFlag::kFP16); // m_Builder->setHalf2Mode(true); } m_Builder->allowGPUFallback(true); int nbLayers = m_Network->getNbLayers(); int layersOnDLA = 0; // std::cout << "Total number of layers: " << nbLayers << std::endl; for (int i = 0; i < nbLayers; i++) { nvinfer1::ILayer* curLayer = m_Network->getLayer(i); if (m_DeviceType == "kDLA" && m_Builder->canRunOnDLA(curLayer)) { m_Builder->setDeviceType(curLayer, nvinfer1::DeviceType::kDLA); layersOnDLA++; std::cout << "Set layer " << curLayer->getName() << " to run on DLA" << std::endl; } } // std::cout << "Total number of layers on DLA: " << layersOnDLA << std::endl; // Build the engine std::cout << "Building the TensorRT Engine..." << std::endl; m_Engine = m_Builder->buildEngineWithConfig(*m_Network,*config); assert(m_Engine != nullptr); std::cout << "Building complete!" << std::endl; // Serialize the engine writePlanFileToDisk(); // destroy destroyNetworkUtils(trtWeights); } void Detecter::doInference(const unsigned char* input, const uint32_t batchSize) @@ -268,10 +751,10 @@ void Detecter::setOutput(int type) { m_OutputTensors.clear(); printf("0-0-0-0-0-0------------------%d",type); if(type==2) for (int i = 0; i < 2; ++i) { TensorInfo outputTensor; outputTensor.numClasses = CLASS_BUM; outputTensor.blobName = "yolo_" + std::to_string(i); @@ -323,7 +806,17 @@ { TensorInfo outputTensor; outputTensor.numClasses = CLASS_BUM; outputTensor.blobName = "yolo_" + std::to_string(i); outputTensor.blobName = "yolo_" + to_string(i); // if (i==0) // { // outputTensor.blobName = "139_convolutional_reshape_2"; // }else if (i==1) // { // outputTensor.blobName = "150_convolutional_reshape_2"; // }else if (i==2) // { // outputTensor.blobName = "161_convolutional_reshape_2"; // } outputTensor.gridSize = (m_InputH / 32) * pow(2, 2-i); outputTensor.grid_h = (m_InputH / 32) * pow(2, 2-i); outputTensor.grid_w = (m_InputW / 32) * pow(2, 2-i); @@ -380,3 +873,24 @@ m_OutputTensors.push_back(outputTensor); } } void Detecter::writePlanFileToDisk() { std::cout << "Serializing the TensorRT Engine..." << std::endl; assert(m_Engine && "Invalid TensorRT Engine"); m_ModelStream = m_Engine->serialize(); assert(m_ModelStream && "Unable to serialize engine"); assert(!m_EnginePath.empty() && "Enginepath is empty"); // write data to output file std::stringstream gieModelStream; gieModelStream.seekg(0, gieModelStream.beg); gieModelStream.write(static_cast<const char*>(m_ModelStream->data()), m_ModelStream->size()); std::ofstream outFile; outFile.open(m_EnginePath, std::ios::binary | std::ios::out); outFile << gieModelStream.rdbuf(); outFile.close(); std::cout << "Serialized plan file cached at location : " << m_EnginePath << std::endl; } src/detecter_tools/model.h
@@ -3,7 +3,7 @@ #include "plugin_factory.h" #include "trt_utils.h" #include "calibrator.h" #include "NvInfer.h" #include "NvInferPlugin.h" #include "NvInferRuntimeCommon.h" @@ -16,6 +16,7 @@ #include "../utils/time_util.h" #include "../config.h" #include "opencv2/opencv.hpp" #include <numeric> struct NetworkInfo { @@ -78,6 +79,7 @@ const std::string m_DeviceType; const std::string m_InputBlobName; std::vector<TensorInfo> m_OutputTensors; std::vector<std::map<std::string, std::string>> m_configBlocks; uint32_t m_InputH; uint32_t m_InputW; uint32_t m_InputC; @@ -172,12 +174,21 @@ void setOutput(int type); private: Logger m_Logger; void createYOLOEngine(const nvinfer1::DataType dataType = nvinfer1::DataType::kFLOAT, Int8EntropyCalibrator* calibrator = nullptr); void writePlanFileToDisk(); std::vector<std::map<std::string, std::string>> parseConfigFile(const std::string cfgFilePath); void parseConfigBlocks(); void allocateBuffers(); bool verifyEngine(); void destroyNetworkUtils(std::vector<nvinfer1::Weights>& trtWeights); protected: const std::string m_NetworkType; std::unique_ptr<YoloTinyMaxpoolPaddingFormula> m_TinyMaxpoolPaddingFormula; private: Timer _timer; int _n_yolo_ind = 0; }; #endif src/detecter_tools/trt_utils.cpp
@@ -21,34 +21,34 @@ cv::Scalar(0.0, 0.0, 0.0),true); } // static void leftTrim(std::string& s) // { // s.erase(s.begin(), find_if(s.begin(), s.end(), [](int ch) { return !isspace(ch); })); // } static void leftTrim(std::string& s) { s.erase(s.begin(), find_if(s.begin(), s.end(), [](int ch) { return !isspace(ch); })); } // static void rightTrim(std::string& s) // { // s.erase(find_if(s.rbegin(), s.rend(), [](int ch) { return !isspace(ch); }).base(), s.end()); // } static void rightTrim(std::string& s) { s.erase(find_if(s.rbegin(), s.rend(), [](int ch) { return !isspace(ch); }).base(), s.end()); } // std::string trim(std::string s) // { // leftTrim(s); // rightTrim(s); // return s; // } std::string trim(std::string s) { leftTrim(s); rightTrim(s); return s; } // std::string triml(std::string s,const char* t) // { // s.erase(0, s.find_first_not_of(t)); // return s; // } std::string triml(std::string s,const char* t) { s.erase(0, s.find_first_not_of(t)); return s; } // std::string trimr(std::string s, const char* t) // { // s.erase(s.find_last_not_of(t) + 1); // return s; // } std::string trimr(std::string s, const char* t) { s.erase(s.find_last_not_of(t) + 1); return s; } float clamp(const float val, const float minVal, const float maxVal) { @@ -115,6 +115,305 @@ // << " xmax:" << b.box.x2 << " ymax:" << b.box.y2 << std::endl; // } // std::vector<float> loadWeights(const std::string weightsFilePath, const std::string& networkType) { assert(fileExists(weightsFilePath)); std::cout << "Loading pre-trained weights..." << std::endl; std::ifstream file(weightsFilePath, std::ios_base::binary); assert(file.good()); std::string line; file.ignore(4); char buf[2]; file.read(buf, 1); if ((int)(unsigned char)buf[0] == 1) { file.ignore(11); } else if ((int)(unsigned char)buf[0] == 2) { file.ignore(15); } else { std::cout << "Invalid network type" << std::endl; assert(0); } std::vector<float> weights; char* floatWeight = new char[4]; while (!file.eof()) { file.read(floatWeight, 4); assert(file.gcount() == 4); weights.push_back(*reinterpret_cast<float*>(floatWeight)); if (file.peek() == std::istream::traits_type::eof()) break; } std::cout << "Loading complete!" << std::endl; delete[] floatWeight; // std::cout << "Total Number of weights read : " << weights.size() << std::endl; return weights; } std::string dimsToString(const nvinfer1::Dims d) { std::stringstream s; assert(d.nbDims >= 1); for (int i = 0; i < d.nbDims - 1; ++i) { s << std::setw(4) << d.d[i] << " x"; } s << std::setw(4) << d.d[d.nbDims - 1]; return s.str(); } nvinfer1::ILayer* netAddMaxpool(int layerIdx, std::map<std::string, std::string>& block, nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network) { assert(block.at("type") == "maxpool"); assert(block.find("size") != block.end()); assert(block.find("stride") != block.end()); int size = std::stoi(block.at("size")); int stride = std::stoi(block.at("stride")); nvinfer1::IPoolingLayer* pool = network->addPoolingNd(*input, nvinfer1::PoolingType::kMAX, nvinfer1::DimsHW{size, size}); assert(pool); std::string maxpoolLayerName = "maxpool_" + std::to_string(layerIdx); int pad = (size - 1) / 2; pool->setPaddingNd(nvinfer1::DimsHW{pad,pad}); pool->setStrideNd(nvinfer1::DimsHW{stride, stride}); pool->setName(maxpoolLayerName.c_str()); return pool; } nvinfer1::ILayer* netAddConvLinear(int layerIdx, std::map<std::string, std::string>& block, std::vector<float>& weights, std::vector<nvinfer1::Weights>& trtWeights, int& weightPtr, int& inputChannels, nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network) { assert(block.at("type") == "convolutional"); assert(block.find("batch_normalize") == block.end()); assert(block.at("activation") == "linear"); assert(block.find("filters") != block.end()); assert(block.find("pad") != block.end()); assert(block.find("size") != block.end()); assert(block.find("stride") != block.end()); int filters = std::stoi(block.at("filters")); int padding = std::stoi(block.at("pad")); int kernelSize = std::stoi(block.at("size")); int stride = std::stoi(block.at("stride")); int pad; if (padding) pad = (kernelSize - 1) / 2; else pad = 0; // load the convolution layer bias nvinfer1::Weights convBias{nvinfer1::DataType::kFLOAT, nullptr, filters}; float* val = new float[filters]; for (int i = 0; i < filters; ++i) { val[i] = weights[weightPtr]; weightPtr++; } convBias.values = val; trtWeights.push_back(convBias); // load the convolutional layer weights int size = filters * inputChannels * kernelSize * kernelSize; nvinfer1::Weights convWt{nvinfer1::DataType::kFLOAT, nullptr, size}; val = new float[size]; for (int i = 0; i < size; ++i) { val[i] = weights[weightPtr]; weightPtr++; } convWt.values = val; trtWeights.push_back(convWt); nvinfer1::IConvolutionLayer* conv = network->addConvolution( *input, filters, nvinfer1::DimsHW{kernelSize, kernelSize}, convWt, convBias); assert(conv != nullptr); std::string convLayerName = "conv_" + std::to_string(layerIdx); conv->setName(convLayerName.c_str()); conv->setStride(nvinfer1::DimsHW{stride, stride}); conv->setPadding(nvinfer1::DimsHW{pad, pad}); return conv; } nvinfer1::ILayer* net_conv_bn_mish(int layerIdx, std::map<std::string, std::string>& block, std::vector<float>& weights, std::vector<nvinfer1::Weights>& trtWeights, int& weightPtr, int& inputChannels, nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network) { assert(block.at("type") == "convolutional"); assert(block.find("batch_normalize") != block.end()); assert(block.at("batch_normalize") == "1"); assert(block.at("activation") == "mish"); assert(block.find("filters") != block.end()); assert(block.find("pad") != block.end()); assert(block.find("size") != block.end()); assert(block.find("stride") != block.end()); bool batchNormalize, bias; if (block.find("batch_normalize") != block.end()) { batchNormalize = (block.at("batch_normalize") == "1"); bias = false; } else { batchNormalize = false; bias = true; } // all conv_bn_leaky layers assume bias is false assert(batchNormalize == true && bias == false); int filters = std::stoi(block.at("filters")); int padding = std::stoi(block.at("pad")); int kernelSize = std::stoi(block.at("size")); int stride = std::stoi(block.at("stride")); int pad; if (padding) pad = (kernelSize - 1) / 2; else pad = 0; std::vector<float> bnBiases; for (int i = 0; i < filters; ++i) { bnBiases.push_back(weights[weightPtr]); weightPtr++; } // load BN weights std::vector<float> bnWeights; for (int i = 0; i < filters; ++i) { bnWeights.push_back(weights[weightPtr]); weightPtr++; } // load BN running_mean std::vector<float> bnRunningMean; for (int i = 0; i < filters; ++i) { bnRunningMean.push_back(weights[weightPtr]); weightPtr++; } // load BN running_var std::vector<float> bnRunningVar; for (int i = 0; i < filters; ++i) { // 1e-05 for numerical stability bnRunningVar.push_back(sqrt(weights[weightPtr] + 1.0e-5f)); weightPtr++; } // load Conv layer weights (GKCRS) int size = filters * inputChannels * kernelSize * kernelSize; nvinfer1::Weights convWt{ nvinfer1::DataType::kFLOAT, nullptr, size }; float* val = new float[size]; for (int i = 0; i < size; ++i) { val[i] = weights[weightPtr]; weightPtr++; } convWt.values = val; trtWeights.push_back(convWt); nvinfer1::Weights convBias{ nvinfer1::DataType::kFLOAT, nullptr, 0 }; trtWeights.push_back(convBias); nvinfer1::IConvolutionLayer* conv = network->addConvolution( *input, filters, nvinfer1::DimsHW{ kernelSize, kernelSize }, convWt, convBias); assert(conv != nullptr); std::string convLayerName = "conv_" + std::to_string(layerIdx); conv->setName(convLayerName.c_str()); conv->setStride(nvinfer1::DimsHW{ stride, stride }); conv->setPadding(nvinfer1::DimsHW{ pad, pad }); /***** BATCHNORM LAYER *****/ /***************************/ size = filters; // create the weights nvinfer1::Weights shift{ nvinfer1::DataType::kFLOAT, nullptr, size }; nvinfer1::Weights scale{ nvinfer1::DataType::kFLOAT, nullptr, size }; nvinfer1::Weights power{ nvinfer1::DataType::kFLOAT, nullptr, size }; float* shiftWt = new float[size]; for (int i = 0; i < size; ++i) { shiftWt[i] = bnBiases.at(i) - ((bnRunningMean.at(i) * bnWeights.at(i)) / bnRunningVar.at(i)); } shift.values = shiftWt; float* scaleWt = new float[size]; for (int i = 0; i < size; ++i) { scaleWt[i] = bnWeights.at(i) / bnRunningVar[i]; } scale.values = scaleWt; float* powerWt = new float[size]; for (int i = 0; i < size; ++i) { powerWt[i] = 1.0; } power.values = powerWt; trtWeights.push_back(shift); trtWeights.push_back(scale); trtWeights.push_back(power); // Add the batch norm layers nvinfer1::IScaleLayer* bn = network->addScale( *conv->getOutput(0), nvinfer1::ScaleMode::kCHANNEL, shift, scale, power); assert(bn != nullptr); std::string bnLayerName = "batch_norm_" + std::to_string(layerIdx); bn->setName(bnLayerName.c_str()); /***** ACTIVATION LAYER *****/ /****************************/ auto creator = getPluginRegistry()->getPluginCreator("Mish_TRT", "1"); const nvinfer1::PluginFieldCollection* pluginData = creator->getFieldNames(); nvinfer1::IPluginV2 *pluginObj = creator->createPlugin(("mish" + std::to_string(layerIdx)).c_str(), pluginData); nvinfer1::ITensor* inputTensors[] = { bn->getOutput(0) }; auto mish = network->addPluginV2(&inputTensors[0], 1, *pluginObj); return mish; } int getNumChannels(nvinfer1::ITensor* t) { nvinfer1::Dims d = t->getDimensions(); assert(d.nbDims == 3); return d.d[0]; } std::vector<int> split_layer_index(const std::string &s_,const std::string &delimiter_) { std::vector<int> index; std::string s = s_; size_t pos = 0; std::string token; while ((pos = s.find(delimiter_)) != std::string::npos) { token = s.substr(0, pos); index.push_back(std::stoi(trim(token))); s.erase(0, pos + delimiter_.length()); } index.push_back(std::stoi(trim(s))); return index; } void printLayerInfo(std::string layerIndex, std::string layerName, std::string layerInput, std::string layerOutput, std::string weightPtr) { std::cout << std::setw(6) << std::left << layerIndex << std::setw(15) << std::left << layerName; std::cout << std::setw(20) << std::left << layerInput << std::setw(20) << std::left << layerOutput; std::cout << std::setw(6) << std::left << weightPtr << std::endl; } uint64_t get3DTensorVolume(nvinfer1::Dims inputDims) { assert(inputDims.nbDims == 3); @@ -216,3 +515,236 @@ } return out; } nvinfer1::ILayer* netAddUpsample(int layerIdx, std::map<std::string, std::string>& block, std::vector<float>& weights, std::vector<nvinfer1::Weights>& trtWeights, int& inputChannels, nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network) { assert(block.at("type") == "upsample"); nvinfer1::Dims inpDims = input->getDimensions(); assert(inpDims.nbDims == 3); // assert(inpDims.d[1] == inpDims.d[2]); int n_scale = std::stoi(block.at("stride")); int c1 = inpDims.d[0]; float *deval = new float[c1*n_scale*n_scale]; for (int i = 0; i < c1*n_scale*n_scale; i++) { deval[i] = 1.0; } nvinfer1::Weights wts{ DataType::kFLOAT, deval, c1*n_scale*n_scale }; nvinfer1::Weights bias{ DataType::kFLOAT, nullptr, 0 }; IDeconvolutionLayer* upsample = network->addDeconvolutionNd(*input, c1, DimsHW{ n_scale, n_scale }, wts, bias); upsample->setStrideNd(DimsHW{ n_scale, n_scale }); upsample->setNbGroups(c1); return upsample; #if 0 #endif } nvinfer1::ILayer * layer_split(const int n_layer_index_, nvinfer1::ITensor *input_, nvinfer1::INetworkDefinition* network) { auto creator = getPluginRegistry()->getPluginCreator("CHUNK_TRT", "1.0"); const nvinfer1::PluginFieldCollection* pluginData = creator->getFieldNames(); nvinfer1::IPluginV2 *pluginObj = creator->createPlugin(("chunk" + std::to_string(n_layer_index_)).c_str(), pluginData); auto chunk = network->addPluginV2(&input_, 1, *pluginObj); return chunk; } nvinfer1::ILayer* netAddConvBNLeaky(int layerIdx, std::map<std::string, std::string>& block, std::vector<float>& weights, std::vector<nvinfer1::Weights>& trtWeights, int& weightPtr, int& inputChannels, nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network) { assert(block.at("type") == "convolutional"); assert(block.find("batch_normalize") != block.end()); assert(block.at("batch_normalize") == "1"); assert(block.at("activation") == "leaky"); assert(block.find("filters") != block.end()); assert(block.find("pad") != block.end()); assert(block.find("size") != block.end()); assert(block.find("stride") != block.end()); bool batchNormalize, bias; if (block.find("batch_normalize") != block.end()) { batchNormalize = (block.at("batch_normalize") == "1"); bias = false; } else { batchNormalize = false; bias = true; } // all conv_bn_leaky layers assume bias is false assert(batchNormalize == true && bias == false); int filters = std::stoi(block.at("filters")); int padding = std::stoi(block.at("pad")); int kernelSize = std::stoi(block.at("size")); int stride = std::stoi(block.at("stride")); int pad; if (padding) pad = (kernelSize - 1) / 2; else pad = 0; /***** CONVOLUTION LAYER *****/ /*****************************/ // batch norm weights are before the conv layer // load BN biases (bn_biases) std::vector<float> bnBiases; for (int i = 0; i < filters; ++i) { bnBiases.push_back(weights[weightPtr]); weightPtr++; } // load BN weights std::vector<float> bnWeights; for (int i = 0; i < filters; ++i) { bnWeights.push_back(weights[weightPtr]); weightPtr++; } // load BN running_mean std::vector<float> bnRunningMean; for (int i = 0; i < filters; ++i) { bnRunningMean.push_back(weights[weightPtr]); weightPtr++; } // load BN running_var std::vector<float> bnRunningVar; for (int i = 0; i < filters; ++i) { // 1e-05 for numerical stability bnRunningVar.push_back(sqrt(weights[weightPtr] + 1.0e-5f)); weightPtr++; } // load Conv layer weights (GKCRS) int size = filters * inputChannels * kernelSize * kernelSize; nvinfer1::Weights convWt{nvinfer1::DataType::kFLOAT, nullptr, size}; float* val = new float[size]; for (int i = 0; i < size; ++i) { val[i] = weights[weightPtr]; weightPtr++; } convWt.values = val; trtWeights.push_back(convWt); nvinfer1::Weights convBias{nvinfer1::DataType::kFLOAT, nullptr, 0}; trtWeights.push_back(convBias); nvinfer1::IConvolutionLayer* conv = network->addConvolution( *input, filters, nvinfer1::DimsHW{kernelSize, kernelSize}, convWt, convBias); assert(conv != nullptr); std::string convLayerName = "conv_" + std::to_string(layerIdx); conv->setName(convLayerName.c_str()); conv->setStride(nvinfer1::DimsHW{stride, stride}); conv->setPadding(nvinfer1::DimsHW{pad, pad}); /***** BATCHNORM LAYER *****/ /***************************/ size = filters; // create the weights nvinfer1::Weights shift{nvinfer1::DataType::kFLOAT, nullptr, size}; nvinfer1::Weights scale{nvinfer1::DataType::kFLOAT, nullptr, size}; nvinfer1::Weights power{nvinfer1::DataType::kFLOAT, nullptr, size}; float* shiftWt = new float[size]; for (int i = 0; i < size; ++i) { shiftWt[i] = bnBiases.at(i) - ((bnRunningMean.at(i) * bnWeights.at(i)) / bnRunningVar.at(i)); } shift.values = shiftWt; float* scaleWt = new float[size]; for (int i = 0; i < size; ++i) { scaleWt[i] = bnWeights.at(i) / bnRunningVar[i]; } scale.values = scaleWt; float* powerWt = new float[size]; for (int i = 0; i < size; ++i) { powerWt[i] = 1.0; } power.values = powerWt; trtWeights.push_back(shift); trtWeights.push_back(scale); trtWeights.push_back(power); // Add the batch norm layers nvinfer1::IScaleLayer* bn = network->addScale( *conv->getOutput(0), nvinfer1::ScaleMode::kCHANNEL, shift, scale, power); assert(bn != nullptr); std::string bnLayerName = "batch_norm_" + std::to_string(layerIdx); bn->setName(bnLayerName.c_str()); /***** ACTIVATION LAYER *****/ /****************************/ auto leaky = network->addActivation(*bn->getOutput(0),nvinfer1::ActivationType::kLEAKY_RELU); leaky->setAlpha(0.1f); /*nvinfer1::IPlugin* leakyRELU = nvinfer1::plugin::createPReLUPlugin(0.1); assert(leakyRELU != nullptr); nvinfer1::ITensor* bnOutput = bn->getOutput(0); nvinfer1::IPluginLayer* leaky = network->addPlugin(&bnOutput, 1, *leakyRELU);*/ assert(leaky != nullptr); std::string leakyLayerName = "leaky_" + std::to_string(layerIdx); leaky->setName(leakyLayerName.c_str()); return leaky; } std::vector<std::string> loadListFromTextFile(const std::string filename) { assert(fileExists(filename)); std::vector<std::string> list; std::ifstream f(filename); if (!f) { std::cout << "failed to open " << filename; assert(0); } std::string line; while (std::getline(f, line)) { if (line.empty()) continue; else list.push_back(trim(line)); } return list; } std::vector<std::string> loadImageList(const std::string filename, const std::string prefix) { std::vector<std::string> fileList = loadListFromTextFile(filename); for (auto& file : fileList) { if (fileExists(file, false)) continue; else { std::string prefixed = prefix + file; if (fileExists(prefixed, false)) file = prefixed; else std::cerr << "WARNING: couldn't find: " << prefixed << " while loading: " << filename << std::endl; } } return fileList; } src/detecter_tools/trt_utils.h
@@ -67,6 +67,34 @@ } } }; nvinfer1::ILayer* netAddMaxpool(int layerIdx, std::map<std::string, std::string>& block, nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network); nvinfer1::ILayer* netAddConvLinear(int layerIdx, std::map<std::string, std::string>& block, std::vector<float>& weights, std::vector<nvinfer1::Weights>& trtWeights, int& weightPtr, int& inputChannels, nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network); nvinfer1::ILayer* net_conv_bn_mish(int layerIdx, std::map<std::string, std::string>& block, std::vector<float>& weights, std::vector<nvinfer1::Weights>& trtWeights, int& weightPtr, int& inputChannels, nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network); nvinfer1::ILayer* netAddConvBNLeaky(int layerIdx, std::map<std::string, std::string>& block, std::vector<float>& weights, std::vector<nvinfer1::Weights>& trtWeights, int& weightPtr, int& inputChannels, nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network); std::string dimsToString(const nvinfer1::Dims d); std::vector<float> loadWeights(const std::string weightsFilePath, const std::string& networkType); int getNumChannels(nvinfer1::ITensor* t); std::string trim(std::string s); std::string triml(std::string s, const char* t); std::string trimr(std::string s, const char* t); float clamp(const float val, const float minVal, const float maxVal); // Common helper functions cv::Mat blobFromDsImages(const std::vector<DsImage>& inputImages, const int& inputH, @@ -77,9 +105,53 @@ nvinfer1::ICudaEngine* loadTRTEngine(const std::string planFilePath, PluginFactory* pluginFactory, Logger& logger); uint64_t get3DTensorVolume(nvinfer1::Dims inputDims); std::vector<std::string> loadImageList(const std::string filename, const std::string prefix); std::vector<BBoxInfo> nmsAllClasses(const float nmsThresh, std::vector<BBoxInfo>& binfo, const uint32_t numClasses, const std::string &model_type); void printLayerInfo(std::string layerIndex, std::string layerName, std::string layerInput, std::string layerOutput, std::string weightPtr); std::vector<BBoxInfo> nonMaximumSuppression(const float nmsThresh, std::vector<BBoxInfo> binfo); std::vector<int> split_layer_index(const std::string &s_,const std::string &delimiter_); nvinfer1::ILayer * layer_split(const int n_layer_index_, nvinfer1::ITensor *input_, nvinfer1::INetworkDefinition* network); nvinfer1::ILayer* netAddUpsample(int layerIdx, std::map<std::string, std::string>& block, std::vector<float>& weights, std::vector<nvinfer1::Weights>& trtWeights, int& inputChannels, nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network); std::vector<std::string> loadListFromTextFile(const std::string filename); class YoloTinyMaxpoolPaddingFormula : public nvinfer1::IOutputDimensionsFormula { private: std::set<std::string> m_SamePaddingLayers; nvinfer1::DimsHW compute(nvinfer1::DimsHW inputDims, nvinfer1::DimsHW kernelSize, nvinfer1::DimsHW stride, nvinfer1::DimsHW padding, nvinfer1::DimsHW dilation, const char* layerName) const override { // assert(inputDims.d[0] == inputDims.d[1]); assert(kernelSize.d[0] == kernelSize.d[1]); assert(stride.d[0] == stride.d[1]); assert(padding.d[0] == padding.d[1]); int output_h, output_w; // Only layer maxpool_12 makes use of same padding if (m_SamePaddingLayers.find(layerName) != m_SamePaddingLayers.end()) { output_h = (inputDims.d[0] + 2 * padding.d[0]) / stride.d[0]; output_w = (inputDims.d[1] + 2 * padding.d[1]) / stride.d[1]; } // Valid Padding else { output_h = (inputDims.d[0] - kernelSize.d[0]) / stride.d[0] + 1; output_w = (inputDims.d[1] - kernelSize.d[1]) / stride.d[1] + 1; } return nvinfer1::DimsHW{output_h, output_w}; } public: void addSamePaddingLayer(std::string input) { m_SamePaddingLayers.insert(input); } }; #endif src/h_interface.cpp
@@ -8,6 +8,8 @@ using namespace std; string m_staticStruct::model_path = "path"; string m_staticStruct::model_cfg = "0"; // 初始化结构体静态变量值 string m_staticStruct::model_wts = "0"; // 初始化结构体静态变量值 int m_staticStruct::type = 1; bool m_staticStruct::isTrack= true; int m_staticStruct::max_cam_num = 0; @@ -116,6 +118,8 @@ if(reader.parse(in,root)) { std::string model_path = root["param"]["model_path"].asString(); std::string model_cfg= root["param"]["model_cfg"].asString(); std::string model_wts = root["param"]["model_wts"].asString(); int type = root["param"]["type"].asInt(); bool isTrack = root["param"]["isTrack"].asBool(); int max_cam_num = root["param"]["max_cam_num"].asInt(); @@ -123,6 +127,8 @@ int mv_velocity = root["param"]["mv_velocity"].asFloat(); int fall_rate = root["param"]["fall_rate"].asFloat(); m_staticStruct::model_path = model_path; m_staticStruct::model_cfg = model_cfg; m_staticStruct::model_wts = model_wts; m_staticStruct::type = type; m_staticStruct::isTrack = isTrack; m_staticStruct::max_cam_num = max_cam_num;