From ec3cf462234c96bf9d6c648db3e8fc1d781b2fe7 Mon Sep 17 00:00:00 2001 From: Scheaven <xuepengqiang> Date: 星期三, 08 九月 2021 18:02:06 +0800 Subject: [PATCH] update --- config.json | 3 src/config.h | 2 src/detecter_tools/trt_utils.h | 76 +++ src/detecter_tools/calibrator.h | 62 ++ src/detecter_tools/detector.cpp | 13 src/detecter_tools/trt_utils.cpp | 580 +++++++++++++++++++++++- src/h_interface.cpp | 6 src/detecter_tools/calibrator.cpp | 114 ++++ readme.txt | 4 src/detecter_tools/model.cpp | 524 +++++++++++++++++++++ src/detecter_tools/model.h | 13 11 files changed, 1,358 insertions(+), 39 deletions(-) diff --git a/config.json b/config.json index 8d2d1ff..407a517 100644 --- a/config.json +++ b/config.json @@ -3,6 +3,9 @@ "runtime": "/opt/vasystem/libs/Detect:/usr/local/cuda-11.1/lib64:", "param": { "model_path": "/opt/vasystem/bin/models/baseDetector/baseDetector.bin", + "model_cfg": "/opt/vasystem/bin/models/baseDetector/baseDetector.cfg", // para閲岃竟鑷繁绠楁硶鍙兘鐢ㄥ埌鐨勫弬鏁� + "model_wts": "/opt/vasystem/bin/models/baseDetector/baseDetector.weights", // para閲岃竟鑷繁绠楁硶鍙兘鐢ㄥ埌鐨勫弬鏁� + //"model_path": "/data/disk1/project/model_dump/02_yolo/baseDetetor_small.bin", // para閲岃竟鑷繁绠楁硶鍙兘鐢ㄥ埌鐨勫弬鏁� "type":1, "max_cam_num": 8, "wander_time": 5, diff --git a/readme.txt b/readme.txt new file mode 100644 index 0000000..0f0a17d --- /dev/null +++ b/readme.txt @@ -0,0 +1,4 @@ +config.json 鏄厤缃俊鎭殑鏂囦欢 +model_path 鏄浆鍖栧彿鐨則rt妯″瀷璺緞 +濡傛灉娌℃湁杞寲濂界殑妯″瀷鏂囦欢锛岄渶瑕侀厤缃甿odel_cfg锛歝fg鏂囦欢鍜宮odel_wts锛歸eights鏉冮噸鏂囦欢銆傚鏋渕odel_path瀛樺湪鍒欎笉闇�瑕� +"type"瀛楁濡傛灉鏄�1锛屽垯涓烘甯哥増鏈紝濡傛灉涓�2锛屽垯鏄痶iny鐗堟湰 diff --git a/src/config.h b/src/config.h index e7f6f80..9673e66 100644 --- a/src/config.h +++ b/src/config.h @@ -51,6 +51,8 @@ typedef struct m_staticStruct { static std::string model_path; + static std::string model_cfg; + static std::string model_wts; static int type; static bool isTrack; static int max_cam_num; diff --git a/src/detecter_tools/calibrator.cpp b/src/detecter_tools/calibrator.cpp new file mode 100644 index 0000000..0a9bf2f --- /dev/null +++ b/src/detecter_tools/calibrator.cpp @@ -0,0 +1,114 @@ +/** +MIT License + +Copyright (c) 2018 NVIDIA CORPORATION. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +* +*/ + +#include "calibrator.h" +#include <fstream> +#include <iostream> +#include <iterator> + +Int8EntropyCalibrator::Int8EntropyCalibrator(const uint32_t& batchSize, const std::string& calibImages, + const std::string& calibImagesPath, + const std::string& calibTableFilePath, + const uint64_t& inputSize, const uint32_t& inputH, + const uint32_t& inputW, const std::string& inputBlobName, + const std::string &s_net_type_) : + m_BatchSize(batchSize), + m_InputH(inputH), + m_InputW(inputW), + m_InputSize(inputSize), + m_InputCount(batchSize * inputSize), + m_InputBlobName(inputBlobName), + m_CalibTableFilePath(calibTableFilePath), + m_ImageIndex(0), + _s_net_type(s_net_type_) +{ + if (!fileExists(m_CalibTableFilePath, false)) + { + m_ImageList = loadImageList(calibImages, calibImagesPath); + m_ImageList.resize(static_cast<int>(m_ImageList.size() / m_BatchSize) * m_BatchSize); + std::random_shuffle(m_ImageList.begin(), m_ImageList.end(), + [](int i) { return rand() % i; }); + } + + NV_CUDA_CHECK(cudaMalloc(&m_DeviceInput, m_InputCount * sizeof(float))); +} + +Int8EntropyCalibrator::~Int8EntropyCalibrator() { NV_CUDA_CHECK(cudaFree(m_DeviceInput)); } + +bool Int8EntropyCalibrator::getBatch(void* bindings[], const char* names[], int nbBindings) +{ + if (m_ImageIndex + m_BatchSize >= m_ImageList.size()) return false; + + // Load next batch + std::vector<DsImage> dsImages(m_BatchSize); + for (uint32_t j = m_ImageIndex; j < m_ImageIndex + m_BatchSize; ++j) + { + dsImages.at(j - m_ImageIndex) = DsImage(m_ImageList.at(j), _s_net_type, m_InputH, m_InputW); + } + m_ImageIndex += m_BatchSize; + + cv::Mat trtInput = blobFromDsImages(dsImages, m_InputH, m_InputW); + + NV_CUDA_CHECK(cudaMemcpy(m_DeviceInput, trtInput.ptr<float>(0), m_InputCount * sizeof(float), + cudaMemcpyHostToDevice)); + assert(!strcmp(names[0], m_InputBlobName.c_str())); + bindings[0] = m_DeviceInput; + return true; +} + +const void* Int8EntropyCalibrator::readCalibrationCache(size_t& length) +{ + void* output; + m_CalibrationCache.clear(); + assert(!m_CalibTableFilePath.empty()); + std::ifstream input(m_CalibTableFilePath, std::ios::binary | std::ios::in); + input >> std::noskipws; + if (m_ReadCache && input.good()) + std::copy(std::istream_iterator<char>(input), std::istream_iterator<char>(), + std::back_inserter(m_CalibrationCache)); + + length = m_CalibrationCache.size(); + if (length) + { + std::cout << "Using cached calibration table to build the engine" << std::endl; + output = &m_CalibrationCache[0]; + } + + else + { + std::cout << "New calibration table will be created to build the engine" << std::endl; + output = nullptr; + } + + return output; +} + +void Int8EntropyCalibrator::writeCalibrationCache(const void* cache, size_t length) +{ + assert(!m_CalibTableFilePath.empty()); + std::ofstream output(m_CalibTableFilePath, std::ios::binary); + output.write(reinterpret_cast<const char*>(cache), length); + output.close(); +} diff --git a/src/detecter_tools/calibrator.h b/src/detecter_tools/calibrator.h new file mode 100644 index 0000000..4eb44a7 --- /dev/null +++ b/src/detecter_tools/calibrator.h @@ -0,0 +1,62 @@ +/** +MIT License + +Copyright (c) 2018 NVIDIA CORPORATION. All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +* +*/ +#ifndef _CALIBRATOR_H_ +#define _CALIBRATOR_H_ + +#include "NvInfer.h" +#include "ds_image.h" +#include "trt_utils.h" + +class Int8EntropyCalibrator : public nvinfer1::IInt8EntropyCalibrator2 +{ +public: + Int8EntropyCalibrator(const uint32_t& batchSize, const std::string& calibImages, + const std::string& calibImagesPath, const std::string& calibTableFilePath, + const uint64_t& inputSize, const uint32_t& inputH, const uint32_t& inputW, + const std::string& inputBlobName,const std::string &s_net_type_); + virtual ~Int8EntropyCalibrator(); + + int getBatchSize() const override { return m_BatchSize; } + bool getBatch(void* bindings[], const char* names[], int nbBindings) override; + const void* readCalibrationCache(size_t& length) override; + void writeCalibrationCache(const void* cache, size_t length) override; + +private: + const uint32_t m_BatchSize; + const uint32_t m_InputH; + const uint32_t m_InputW; + const uint64_t m_InputSize; + const uint64_t m_InputCount; + const std::string m_InputBlobName; + const std::string _s_net_type; + const std::string m_CalibTableFilePath{nullptr}; + uint32_t m_ImageIndex; + bool m_ReadCache{true}; + void* m_DeviceInput{nullptr}; + std::vector<std::string> m_ImageList; + std::vector<char> m_CalibrationCache; +}; + +#endif diff --git a/src/detecter_tools/detector.cpp b/src/detecter_tools/detector.cpp index 78e136f..be5f86a 100644 --- a/src/detecter_tools/detector.cpp +++ b/src/detecter_tools/detector.cpp @@ -31,10 +31,10 @@ vec_ds_images.emplace_back(img, _vec_net_type[_config.net_type], _p_net->getInputH(), _p_net->getInputW()); } cv::Mat trtInput = blobFromDsImages(vec_ds_images, _p_net->getInputH(),_p_net->getInputW()); - timer.out("pre"); + timer.out("eve pre detect "); - _p_net->doInference(trtInput.data, vec_ds_images.size()); timer.reset(); + _p_net->doInference(trtInput.data, vec_ds_images.size()); for (uint32_t i = 0; i < vec_ds_images.size(); ++i) { auto curImage = vec_ds_images.at(i); @@ -65,9 +65,8 @@ } vec_batch_result[i] = vec_result; } - timer.out("post"); + timer.out("eve pre detect post"); DEBUG("--detect over--" ); - } void Detector::set_gpu_id(const int id) @@ -95,9 +94,9 @@ void Detector::build_net() { - if(_config.net_type == SMALL) - _p_net = std::unique_ptr<Detecter>{new Detecter(_info,_infer_param,2)}; - else{ + if(_config.net_type == COMMON) _p_net = std::unique_ptr<Detecter>{new Detecter(_info,_infer_param,1)}; + else{ + _p_net = std::unique_ptr<Detecter>{new Detecter(_info,_infer_param,2)}; } } diff --git a/src/detecter_tools/model.cpp b/src/detecter_tools/model.cpp index c548561..0bccfb3 100644 --- a/src/detecter_tools/model.cpp +++ b/src/detecter_tools/model.cpp @@ -11,6 +11,7 @@ Detecter::Detecter( const NetworkInfo& networkInfo, const InferParams& inferParams, int type) : + m_NetworkType(networkInfo.networkType), m_InputBlobName(networkInfo.inputBlobName), m_InputH(416), m_InputW(416), @@ -26,10 +27,17 @@ m_Context(nullptr), m_InputBindingIndex(-1), m_CudaStream(nullptr), - m_PluginFactory(new PluginFactory) + m_PluginFactory(new PluginFactory), + m_TinyMaxpoolPaddingFormula(new YoloTinyMaxpoolPaddingFormula) { - setOutput(type); m_EnginePath = m_staticStruct::model_path; + if(!fileExists(m_EnginePath)) + { + m_configBlocks = parseConfigFile(m_staticStruct::model_cfg); + parseConfigBlocks(); + createYOLOEngine(); + } + setOutput(type); DEBUG((boost::format("m_EnginePath:%s")%m_EnginePath).str()); assert(m_PluginFactory != nullptr); m_Engine = loadTRTEngine(m_EnginePath, m_PluginFactory, m_Logger); @@ -67,7 +75,482 @@ m_PluginFactory = nullptr; } - // m_TinyMaxpoolPaddingFormula.reset(); + m_TinyMaxpoolPaddingFormula.reset(); +} + +std::vector<std::map<std::string, std::string>> Detecter::parseConfigFile(const std::string cfgFilePath) +{ + std::cout << "::::::::::" << cfgFilePath <<std::endl; + assert(fileExists(cfgFilePath)); + std::ifstream file(cfgFilePath); + assert(file.good()); + std::string line; + std::vector<std::map<std::string, std::string>> blocks; + std::map<std::string, std::string> block; + + while (getline(file, line)) + { + if (line.empty()) continue; + if (line.front() == '#') continue; + line = trim(line); + if (line.front() == '[') + { + if (!block.empty()) + { + blocks.push_back(block); + block.clear(); + } + std::string key = "type"; + std::string value = trim(line.substr(1, line.size() - 2)); + block.insert(std::pair<std::string, std::string>(key, value)); + } + else + { + size_t cpos = line.find('='); + std::string key = trim(line.substr(0, cpos)); + std::string value = trim(line.substr(cpos + 1)); + block.insert(std::pair<std::string, std::string>(key, value)); + } + } + blocks.push_back(block); + return blocks; +} + +void Detecter::parseConfigBlocks() +{ + for (auto block : m_configBlocks) + { + if (block.at("type") == "net") + { + assert((block.find("height") != block.end()) + && "Missing 'height' param in network cfg"); + assert((block.find("width") != block.end()) && "Missing 'width' param in network cfg"); + assert((block.find("channels") != block.end()) + && "Missing 'channels' param in network cfg"); + assert((block.find("batch") != block.end()) + && "Missing 'batch' param in network cfg"); + + m_InputH = std::stoul(trim(block.at("height"))); + m_InputW = std::stoul(trim(block.at("width"))); + m_InputC = std::stoul(trim(block.at("channels"))); + m_BatchSize = std::stoi(trim(block.at("batch"))); + // assert(m_InputW == m_InputH); + m_InputSize = m_InputC * m_InputH * m_InputW; + } + else if ((block.at("type") == "region") || (block.at("type") == "yolo")) + { + assert((block.find("num") != block.end()) + && std::string("Missing 'num' param in " + block.at("type") + " layer").c_str()); + assert((block.find("classes") != block.end()) + && std::string("Missing 'classes' param in " + block.at("type") + " layer") + .c_str()); + assert((block.find("anchors") != block.end()) + && std::string("Missing 'anchors' param in " + block.at("type") + " layer") + .c_str()); + + TensorInfo outputTensor; + std::string anchorString = block.at("anchors"); + while (!anchorString.empty()) + { + size_t npos = anchorString.find_first_of(','); + if (npos != std::string::npos) + { + float anchor = std::stof(trim(anchorString.substr(0, npos))); + outputTensor.anchors.push_back(anchor); + anchorString.erase(0, npos + 1); + } + else + { + float anchor = std::stof(trim(anchorString)); + outputTensor.anchors.push_back(anchor); + break; + } + } + + assert((block.find("mask") != block.end()) + && std::string("Missing 'mask' param in " + block.at("type") + " layer") + .c_str()); + + std::string maskString = block.at("mask"); + while (!maskString.empty()) + { + size_t npos = maskString.find_first_of(','); + if (npos != std::string::npos) + { + uint32_t mask = std::stoul(trim(maskString.substr(0, npos))); + outputTensor.masks.push_back(mask); + maskString.erase(0, npos + 1); + } + else + { + uint32_t mask = std::stoul(trim(maskString)); + outputTensor.masks.push_back(mask); + break; + } + } + + outputTensor.numBBoxes = outputTensor.masks.size() > 0 + ? outputTensor.masks.size() + : std::stoul(trim(block.at("num"))); + outputTensor.numClasses = std::stoul(block.at("classes")); + if (m_ClassNames.empty()) + { + for (uint32_t i=0;i< outputTensor.numClasses;++i) + { + m_ClassNames.push_back(std::to_string(i)); + } + } + outputTensor.blobName = "yolo_" + std::to_string(_n_yolo_ind); + outputTensor.gridSize = (m_InputH / 32) * pow(2, _n_yolo_ind); + outputTensor.grid_h = (m_InputH / 32) * pow(2, _n_yolo_ind); + outputTensor.grid_w = (m_InputW / 32) * pow(2, _n_yolo_ind); + outputTensor.gridSize = (m_InputH / 32) * pow(2, 2-_n_yolo_ind); + outputTensor.grid_h = (m_InputH / 32) * pow(2, 2-_n_yolo_ind); + outputTensor.grid_w = (m_InputW / 32) * pow(2, 2-_n_yolo_ind); + outputTensor.stride = m_InputH / outputTensor.gridSize; + outputTensor.stride_h = m_InputH / outputTensor.grid_h; + outputTensor.stride_w = m_InputW / outputTensor.grid_w; + outputTensor.volume = outputTensor.grid_h* outputTensor.grid_w + *(outputTensor.numBBoxes*(5 + outputTensor.numClasses)); + m_OutputTensors.push_back(outputTensor); + _n_yolo_ind++; + } + } +} + +void Detecter::createYOLOEngine(const nvinfer1::DataType dataType, Int8EntropyCalibrator* calibrator) +{ + if (fileExists(m_EnginePath))return; + std::vector<float> weights = loadWeights(m_staticStruct::model_wts, m_NetworkType); + std::vector<nvinfer1::Weights> trtWeights; + int weightPtr = 0; + int channels = m_InputC; + m_Builder = nvinfer1::createInferBuilder(m_Logger); + nvinfer1::IBuilderConfig* config = m_Builder->createBuilderConfig(); + m_Network = m_Builder->createNetworkV2(0U); + if ((dataType == nvinfer1::DataType::kINT8 && !m_Builder->platformHasFastInt8()) + || (dataType == nvinfer1::DataType::kHALF && !m_Builder->platformHasFastFp16())) + { + std::cout << "Platform doesn't support this precision." << std::endl; + assert(0); + } + + nvinfer1::ITensor* data = m_Network->addInput( + m_InputBlobName.c_str(), nvinfer1::DataType::kFLOAT, + nvinfer1::DimsCHW{static_cast<int>(m_InputC), static_cast<int>(m_InputH), + static_cast<int>(m_InputW)}); + assert(data != nullptr); + // Add elementwise layer to normalize pixel values 0-1 + nvinfer1::Dims divDims{ + 3, + {static_cast<int>(m_InputC), static_cast<int>(m_InputH), static_cast<int>(m_InputW)}, + {nvinfer1::DimensionType::kCHANNEL, nvinfer1::DimensionType::kSPATIAL, + nvinfer1::DimensionType::kSPATIAL}}; + nvinfer1::Weights divWeights{nvinfer1::DataType::kFLOAT, nullptr, + static_cast<int64_t>(m_InputSize)}; + float* divWt = new float[m_InputSize]; + for (uint32_t w = 0; w < m_InputSize; ++w) divWt[w] = 255.0; + divWeights.values = divWt; + trtWeights.push_back(divWeights); + nvinfer1::IConstantLayer* constDivide = m_Network->addConstant(divDims, divWeights); + assert(constDivide != nullptr); + nvinfer1::IElementWiseLayer* elementDivide = m_Network->addElementWise( + *data, *constDivide->getOutput(0), nvinfer1::ElementWiseOperation::kDIV); + assert(elementDivide != nullptr); + + nvinfer1::ITensor* previous = elementDivide->getOutput(0); + std::vector<nvinfer1::ITensor*> tensorOutputs; + uint32_t outputTensorCount = 0; + + // build the network using the network API + for (uint32_t i = 0; i < m_configBlocks.size(); ++i) + { + // check if num. of channels is correct + assert(getNumChannels(previous) == channels); + std::string layerIndex = "(" + std::to_string(i) + ")"; + + if (m_configBlocks.at(i).at("type") == "net") + { + printLayerInfo("", "layer", " inp_size", " out_size", "weightPtr"); + } + else if (m_configBlocks.at(i).at("type") == "convolutional") + { + std::string inputVol = dimsToString(previous->getDimensions()); + nvinfer1::ILayer* out; + std::string layerType; + //check activation + std::string activation = ""; + if (m_configBlocks.at(i).find("activation") != m_configBlocks.at(i).end()) + { + activation = m_configBlocks[i]["activation"]; + } + // check if batch_norm enabled + if ((m_configBlocks.at(i).find("batch_normalize") != m_configBlocks.at(i).end()) && + ("leaky" == activation)) + { + out = netAddConvBNLeaky(i, m_configBlocks.at(i), weights, trtWeights, weightPtr, + channels, previous, m_Network); + layerType = "conv-bn-leaky"; + } + else if ((m_configBlocks.at(i).find("batch_normalize") != m_configBlocks.at(i).end()) && + ("mish" == activation)) + { + out = net_conv_bn_mish(i, m_configBlocks.at(i), weights, trtWeights, weightPtr, + channels, previous, m_Network); + layerType = "conv-bn-mish"; + } + else// if("linear" == activation) + { + out = netAddConvLinear(i, m_configBlocks.at(i), weights, trtWeights, weightPtr, + channels, previous, m_Network); + layerType = "conv-linear"; + } + previous = out->getOutput(0); + assert(previous != nullptr); + channels = getNumChannels(previous); + std::string outputVol = dimsToString(previous->getDimensions()); + tensorOutputs.push_back(out->getOutput(0)); + printLayerInfo(layerIndex, layerType, inputVol, outputVol, std::to_string(weightPtr)); + } + else if (m_configBlocks.at(i).at("type") == "shortcut") + { + assert(m_configBlocks.at(i).at("activation") == "linear"); + assert(m_configBlocks.at(i).find("from") != m_configBlocks.at(i).end()); + int from = stoi(m_configBlocks.at(i).at("from")); + + std::string inputVol = dimsToString(previous->getDimensions()); + // check if indexes are correct + assert((i - 2 >= 0) && (i - 2 < tensorOutputs.size())); + assert((i + from - 1 >= 0) && (i + from - 1 < tensorOutputs.size())); + assert(i + from - 1 < i - 2); + nvinfer1::IElementWiseLayer* ew + = m_Network->addElementWise(*tensorOutputs[i - 2], *tensorOutputs[i + from - 1], + nvinfer1::ElementWiseOperation::kSUM); + assert(ew != nullptr); + std::string ewLayerName = "shortcut_" + std::to_string(i); + ew->setName(ewLayerName.c_str()); + previous = ew->getOutput(0); + assert(previous != nullptr); + std::string outputVol = dimsToString(previous->getDimensions()); + tensorOutputs.push_back(ew->getOutput(0)); + printLayerInfo(layerIndex, "skip", inputVol, outputVol, " -"); + } + else if (m_configBlocks.at(i).at("type") == "yolo") + { + nvinfer1::Dims prevTensorDims = previous->getDimensions(); + // assert(prevTensorDims.d[1] == prevTensorDims.d[2]); + TensorInfo& curYoloTensor = m_OutputTensors.at(outputTensorCount); + curYoloTensor.gridSize = prevTensorDims.d[1]; + curYoloTensor.grid_h = prevTensorDims.d[1]; + curYoloTensor.grid_w = prevTensorDims.d[2]; + curYoloTensor.stride = m_InputW / curYoloTensor.gridSize; + curYoloTensor.stride_h = m_InputH / curYoloTensor.grid_h; + curYoloTensor.stride_w = m_InputW / curYoloTensor.grid_w; + m_OutputTensors.at(outputTensorCount).volume = curYoloTensor.grid_h + * curYoloTensor.grid_w + * (curYoloTensor.numBBoxes * (5 + curYoloTensor.numClasses)); + std::string layerName = "yolo_" + std::to_string(outputTensorCount); + curYoloTensor.blobName = layerName; + nvinfer1::IPlugin* yoloPlugin + = new SLayerV3(m_OutputTensors.at(outputTensorCount).numBBoxes, + m_OutputTensors.at(outputTensorCount).numClasses, + m_OutputTensors.at(outputTensorCount).grid_h, + m_OutputTensors.at(outputTensorCount).grid_w); + assert(yoloPlugin != nullptr); + nvinfer1::IPluginLayer* yolo = m_Network->addPlugin(&previous, 1, *yoloPlugin); + assert(yolo != nullptr); + yolo->setName(layerName.c_str()); + std::string inputVol = dimsToString(previous->getDimensions()); + previous = yolo->getOutput(0); + assert(previous != nullptr); + previous->setName(layerName.c_str()); + std::string outputVol = dimsToString(previous->getDimensions()); + m_Network->markOutput(*previous); + channels = getNumChannels(previous); + tensorOutputs.push_back(yolo->getOutput(0)); + printLayerInfo(layerIndex, "yolo", inputVol, outputVol, std::to_string(weightPtr)); + ++outputTensorCount; + } + else if (m_configBlocks.at(i).at("type") == "route") + { + size_t found = m_configBlocks.at(i).at("layers").find(","); + if (found != std::string::npos)//concate multi layers + { + std::vector<int> vec_index = split_layer_index(m_configBlocks.at(i).at("layers"), ","); + for (auto &ind_layer:vec_index) + { + if (ind_layer < 0) + { + ind_layer = static_cast<int>(tensorOutputs.size()) + ind_layer; + } + assert(ind_layer < static_cast<int>(tensorOutputs.size()) && ind_layer >= 0); + } + nvinfer1::ITensor** concatInputs + = reinterpret_cast<nvinfer1::ITensor**>(malloc(sizeof(nvinfer1::ITensor*) * vec_index.size())); + for (size_t ind = 0; ind < vec_index.size(); ++ind) + { + concatInputs[ind] = tensorOutputs[vec_index[ind]]; + } + nvinfer1::IConcatenationLayer* concat + = m_Network->addConcatenation(concatInputs, static_cast<int>(vec_index.size())); + assert(concat != nullptr); + std::string concatLayerName = "route_" + std::to_string(i - 1); + concat->setName(concatLayerName.c_str()); + // concatenate along the channel dimension + concat->setAxis(0); + previous = concat->getOutput(0); + assert(previous != nullptr); + nvinfer1::Dims debug = previous->getDimensions(); + std::string outputVol = dimsToString(previous->getDimensions()); + int nums = 0; + for (auto &indx:vec_index) + { + nums += getNumChannels(tensorOutputs[indx]); + } + channels = nums; + tensorOutputs.push_back(concat->getOutput(0)); + printLayerInfo(layerIndex, "route", " -", outputVol,std::to_string(weightPtr)); + } + else //single layer + { + int idx = std::stoi(trim(m_configBlocks.at(i).at("layers"))); + if (idx < 0) + { + idx = static_cast<int>(tensorOutputs.size()) + idx; + } + assert(idx < static_cast<int>(tensorOutputs.size()) && idx >= 0); + + //route + if (m_configBlocks.at(i).find("groups") == m_configBlocks.at(i).end()) + { + previous = tensorOutputs[idx]; + assert(previous != nullptr); + std::string outputVol = dimsToString(previous->getDimensions()); + // set the output volume depth + channels = getNumChannels(tensorOutputs[idx]); + tensorOutputs.push_back(tensorOutputs[idx]); + printLayerInfo(layerIndex, "route", " -", outputVol, std::to_string(weightPtr)); + + } + //yolov4-tiny route split layer + else + { + if (m_configBlocks.at(i).find("group_id") == m_configBlocks.at(i).end()) + { + assert(0); + } + int chunk_idx = std::stoi(trim(m_configBlocks.at(i).at("group_id"))); + nvinfer1::ILayer* out = layer_split(i, tensorOutputs[idx], m_Network); + std::string inputVol = dimsToString(previous->getDimensions()); + previous = out->getOutput(chunk_idx); + assert(previous != nullptr); + channels = getNumChannels(previous); + std::string outputVol = dimsToString(previous->getDimensions()); + tensorOutputs.push_back(out->getOutput(chunk_idx)); + printLayerInfo(layerIndex,"chunk", inputVol, outputVol, std::to_string(weightPtr)); + } + } + } + else if (m_configBlocks.at(i).at("type") == "upsample") + { + std::string inputVol = dimsToString(previous->getDimensions()); + nvinfer1::ILayer* out = netAddUpsample(i - 1, m_configBlocks[i], weights, trtWeights, + channels, previous, m_Network); + previous = out->getOutput(0); + std::string outputVol = dimsToString(previous->getDimensions()); + tensorOutputs.push_back(out->getOutput(0)); + printLayerInfo(layerIndex, "upsample", inputVol, outputVol, " -"); + } + else if (m_configBlocks.at(i).at("type") == "maxpool") + { + // Add same padding layers + if (m_configBlocks.at(i).at("size") == "2" && m_configBlocks.at(i).at("stride") == "1") + { + m_TinyMaxpoolPaddingFormula->addSamePaddingLayer("maxpool_" + std::to_string(i)); + } + std::string inputVol = dimsToString(previous->getDimensions()); + nvinfer1::ILayer* out = netAddMaxpool(i, m_configBlocks.at(i), previous, m_Network); + previous = out->getOutput(0); + assert(previous != nullptr); + std::string outputVol = dimsToString(previous->getDimensions()); + tensorOutputs.push_back(out->getOutput(0)); + printLayerInfo(layerIndex, "maxpool", inputVol, outputVol, std::to_string(weightPtr)); + } + else + { + std::cout << "Unsupported layer type --> \"" << m_configBlocks.at(i).at("type") << "\"" + << std::endl; + assert(0); + } + } + + if (static_cast<int>(weights.size()) != weightPtr) + { + std::cout << "Number of unused weights left : " << static_cast<int>(weights.size()) - weightPtr << std::endl; + assert(0); + } + + // std::cout << "Output blob names :" << std::endl; + // for (auto& tensor : m_OutputTensors) std::cout << tensor.blobName << std::endl; + + // Create and cache the engine if not already present + if (fileExists(m_EnginePath)) + { + std::cout << "Using previously generated plan file located at " << m_EnginePath + << std::endl; + destroyNetworkUtils(trtWeights); + return; + } + + /*std::cout << "Unable to find cached TensorRT engine for network : " << m_NetworkType + << " precision : " << m_Precision << " and batch size :" << m_BatchSize << std::endl;*/ + + m_Builder->setMaxBatchSize(m_BatchSize); + //m_Builder->setMaxWorkspaceSize(1 << 20); + + config->setMaxWorkspaceSize(1 << 20); + if (dataType == nvinfer1::DataType::kINT8) + { + assert((calibrator != nullptr) && "Invalid calibrator for INT8 precision"); + // m_Builder->setInt8Mode(true); + config->setFlag(nvinfer1::BuilderFlag::kINT8); + // m_Builder->setInt8Calibrator(calibrator); + config->setInt8Calibrator(calibrator); + // config->setTacticSources(1U << static_cast<uint32_t>(TacticSource::kCUBLAS) | 1U << static_cast<uint32_t>(TacticSource::kCUBLAS_LT)); + } + else if (dataType == nvinfer1::DataType::kHALF) + { + config->setFlag(nvinfer1::BuilderFlag::kFP16); + // m_Builder->setHalf2Mode(true); + } + + m_Builder->allowGPUFallback(true); + int nbLayers = m_Network->getNbLayers(); + int layersOnDLA = 0; + // std::cout << "Total number of layers: " << nbLayers << std::endl; + for (int i = 0; i < nbLayers; i++) + { + nvinfer1::ILayer* curLayer = m_Network->getLayer(i); + if (m_DeviceType == "kDLA" && m_Builder->canRunOnDLA(curLayer)) + { + m_Builder->setDeviceType(curLayer, nvinfer1::DeviceType::kDLA); + layersOnDLA++; + std::cout << "Set layer " << curLayer->getName() << " to run on DLA" << std::endl; + } + } + // std::cout << "Total number of layers on DLA: " << layersOnDLA << std::endl; + + // Build the engine + std::cout << "Building the TensorRT Engine..." << std::endl; + m_Engine = m_Builder->buildEngineWithConfig(*m_Network,*config); + assert(m_Engine != nullptr); + std::cout << "Building complete!" << std::endl; + + // Serialize the engine + writePlanFileToDisk(); + + // destroy + destroyNetworkUtils(trtWeights); } void Detecter::doInference(const unsigned char* input, const uint32_t batchSize) @@ -268,10 +751,10 @@ void Detecter::setOutput(int type) { m_OutputTensors.clear(); + printf("0-0-0-0-0-0------------------%d",type); if(type==2) for (int i = 0; i < 2; ++i) { - TensorInfo outputTensor; outputTensor.numClasses = CLASS_BUM; outputTensor.blobName = "yolo_" + std::to_string(i); @@ -323,7 +806,17 @@ { TensorInfo outputTensor; outputTensor.numClasses = CLASS_BUM; - outputTensor.blobName = "yolo_" + std::to_string(i); + outputTensor.blobName = "yolo_" + to_string(i); + // if (i==0) + // { + // outputTensor.blobName = "139_convolutional_reshape_2"; + // }else if (i==1) + // { + // outputTensor.blobName = "150_convolutional_reshape_2"; + // }else if (i==2) + // { + // outputTensor.blobName = "161_convolutional_reshape_2"; + // } outputTensor.gridSize = (m_InputH / 32) * pow(2, 2-i); outputTensor.grid_h = (m_InputH / 32) * pow(2, 2-i); outputTensor.grid_w = (m_InputW / 32) * pow(2, 2-i); @@ -380,3 +873,24 @@ m_OutputTensors.push_back(outputTensor); } } + +void Detecter::writePlanFileToDisk() +{ + std::cout << "Serializing the TensorRT Engine..." << std::endl; + assert(m_Engine && "Invalid TensorRT Engine"); + m_ModelStream = m_Engine->serialize(); + assert(m_ModelStream && "Unable to serialize engine"); + assert(!m_EnginePath.empty() && "Enginepath is empty"); + + // write data to output file + std::stringstream gieModelStream; + gieModelStream.seekg(0, gieModelStream.beg); + gieModelStream.write(static_cast<const char*>(m_ModelStream->data()), m_ModelStream->size()); + std::ofstream outFile; + outFile.open(m_EnginePath, std::ios::binary | std::ios::out); + outFile << gieModelStream.rdbuf(); + outFile.close(); + + std::cout << "Serialized plan file cached at location : " << m_EnginePath << std::endl; +} + diff --git a/src/detecter_tools/model.h b/src/detecter_tools/model.h index eb3adff..e993f44 100644 --- a/src/detecter_tools/model.h +++ b/src/detecter_tools/model.h @@ -3,7 +3,7 @@ #include "plugin_factory.h" #include "trt_utils.h" - +#include "calibrator.h" #include "NvInfer.h" #include "NvInferPlugin.h" #include "NvInferRuntimeCommon.h" @@ -16,6 +16,7 @@ #include "../utils/time_util.h" #include "../config.h" #include "opencv2/opencv.hpp" +#include <numeric> struct NetworkInfo { @@ -78,6 +79,7 @@ const std::string m_DeviceType; const std::string m_InputBlobName; std::vector<TensorInfo> m_OutputTensors; + std::vector<std::map<std::string, std::string>> m_configBlocks; uint32_t m_InputH; uint32_t m_InputW; uint32_t m_InputC; @@ -172,12 +174,21 @@ void setOutput(int type); private: Logger m_Logger; + void createYOLOEngine(const nvinfer1::DataType dataType = nvinfer1::DataType::kFLOAT, + Int8EntropyCalibrator* calibrator = nullptr); + void writePlanFileToDisk(); + std::vector<std::map<std::string, std::string>> parseConfigFile(const std::string cfgFilePath); + void parseConfigBlocks(); void allocateBuffers(); bool verifyEngine(); void destroyNetworkUtils(std::vector<nvinfer1::Weights>& trtWeights); +protected: + const std::string m_NetworkType; + std::unique_ptr<YoloTinyMaxpoolPaddingFormula> m_TinyMaxpoolPaddingFormula; private: Timer _timer; + int _n_yolo_ind = 0; }; #endif diff --git a/src/detecter_tools/trt_utils.cpp b/src/detecter_tools/trt_utils.cpp index 75d5d6a..ebf2864 100644 --- a/src/detecter_tools/trt_utils.cpp +++ b/src/detecter_tools/trt_utils.cpp @@ -21,34 +21,34 @@ cv::Scalar(0.0, 0.0, 0.0),true); } -// static void leftTrim(std::string& s) -// { -// s.erase(s.begin(), find_if(s.begin(), s.end(), [](int ch) { return !isspace(ch); })); -// } +static void leftTrim(std::string& s) +{ + s.erase(s.begin(), find_if(s.begin(), s.end(), [](int ch) { return !isspace(ch); })); +} -// static void rightTrim(std::string& s) -// { -// s.erase(find_if(s.rbegin(), s.rend(), [](int ch) { return !isspace(ch); }).base(), s.end()); -// } +static void rightTrim(std::string& s) +{ + s.erase(find_if(s.rbegin(), s.rend(), [](int ch) { return !isspace(ch); }).base(), s.end()); +} -// std::string trim(std::string s) -// { -// leftTrim(s); -// rightTrim(s); -// return s; -// } +std::string trim(std::string s) +{ + leftTrim(s); + rightTrim(s); + return s; +} -// std::string triml(std::string s,const char* t) -// { -// s.erase(0, s.find_first_not_of(t)); -// return s; -// } +std::string triml(std::string s,const char* t) +{ + s.erase(0, s.find_first_not_of(t)); + return s; +} -// std::string trimr(std::string s, const char* t) -// { -// s.erase(s.find_last_not_of(t) + 1); -// return s; -// } +std::string trimr(std::string s, const char* t) +{ + s.erase(s.find_last_not_of(t) + 1); + return s; +} float clamp(const float val, const float minVal, const float maxVal) { @@ -115,6 +115,305 @@ // << " xmax:" << b.box.x2 << " ymax:" << b.box.y2 << std::endl; // } // + +std::vector<float> loadWeights(const std::string weightsFilePath, const std::string& networkType) +{ + assert(fileExists(weightsFilePath)); + std::cout << "Loading pre-trained weights..." << std::endl; + std::ifstream file(weightsFilePath, std::ios_base::binary); + assert(file.good()); + std::string line; + file.ignore(4); + char buf[2]; + file.read(buf, 1); + if ((int)(unsigned char)buf[0] == 1) + { + file.ignore(11); + } + else if ((int)(unsigned char)buf[0] == 2) + { + file.ignore(15); + } + else + { + std::cout << "Invalid network type" << std::endl; + assert(0); + } + + std::vector<float> weights; + char* floatWeight = new char[4]; + while (!file.eof()) + { + file.read(floatWeight, 4); + assert(file.gcount() == 4); + weights.push_back(*reinterpret_cast<float*>(floatWeight)); + if (file.peek() == std::istream::traits_type::eof()) break; + } + std::cout << "Loading complete!" << std::endl; + delete[] floatWeight; + + // std::cout << "Total Number of weights read : " << weights.size() << std::endl; + return weights; +} + +std::string dimsToString(const nvinfer1::Dims d) +{ + std::stringstream s; + assert(d.nbDims >= 1); + for (int i = 0; i < d.nbDims - 1; ++i) + { + s << std::setw(4) << d.d[i] << " x"; + } + s << std::setw(4) << d.d[d.nbDims - 1]; + + return s.str(); +} + +nvinfer1::ILayer* netAddMaxpool(int layerIdx, std::map<std::string, std::string>& block, + nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network) +{ + assert(block.at("type") == "maxpool"); + assert(block.find("size") != block.end()); + assert(block.find("stride") != block.end()); + + int size = std::stoi(block.at("size")); + int stride = std::stoi(block.at("stride")); + + nvinfer1::IPoolingLayer* pool + = network->addPoolingNd(*input, nvinfer1::PoolingType::kMAX, nvinfer1::DimsHW{size, size}); + assert(pool); + std::string maxpoolLayerName = "maxpool_" + std::to_string(layerIdx); + int pad = (size - 1) / 2; + pool->setPaddingNd(nvinfer1::DimsHW{pad,pad}); + pool->setStrideNd(nvinfer1::DimsHW{stride, stride}); + pool->setName(maxpoolLayerName.c_str()); + + return pool; +} + +nvinfer1::ILayer* netAddConvLinear(int layerIdx, std::map<std::string, std::string>& block, + std::vector<float>& weights, + std::vector<nvinfer1::Weights>& trtWeights, int& weightPtr, + int& inputChannels, nvinfer1::ITensor* input, + nvinfer1::INetworkDefinition* network) +{ + assert(block.at("type") == "convolutional"); + assert(block.find("batch_normalize") == block.end()); + assert(block.at("activation") == "linear"); + assert(block.find("filters") != block.end()); + assert(block.find("pad") != block.end()); + assert(block.find("size") != block.end()); + assert(block.find("stride") != block.end()); + + int filters = std::stoi(block.at("filters")); + int padding = std::stoi(block.at("pad")); + int kernelSize = std::stoi(block.at("size")); + int stride = std::stoi(block.at("stride")); + int pad; + if (padding) + pad = (kernelSize - 1) / 2; + else + pad = 0; + // load the convolution layer bias + nvinfer1::Weights convBias{nvinfer1::DataType::kFLOAT, nullptr, filters}; + float* val = new float[filters]; + for (int i = 0; i < filters; ++i) + { + val[i] = weights[weightPtr]; + weightPtr++; + } + convBias.values = val; + trtWeights.push_back(convBias); + // load the convolutional layer weights + int size = filters * inputChannels * kernelSize * kernelSize; + nvinfer1::Weights convWt{nvinfer1::DataType::kFLOAT, nullptr, size}; + val = new float[size]; + for (int i = 0; i < size; ++i) + { + val[i] = weights[weightPtr]; + weightPtr++; + } + convWt.values = val; + trtWeights.push_back(convWt); + nvinfer1::IConvolutionLayer* conv = network->addConvolution( + *input, filters, nvinfer1::DimsHW{kernelSize, kernelSize}, convWt, convBias); + assert(conv != nullptr); + std::string convLayerName = "conv_" + std::to_string(layerIdx); + conv->setName(convLayerName.c_str()); + conv->setStride(nvinfer1::DimsHW{stride, stride}); + conv->setPadding(nvinfer1::DimsHW{pad, pad}); + + return conv; +} + +nvinfer1::ILayer* net_conv_bn_mish(int layerIdx, + std::map<std::string, std::string>& block, + std::vector<float>& weights, + std::vector<nvinfer1::Weights>& trtWeights, + int& weightPtr, + int& inputChannels, + nvinfer1::ITensor* input, + nvinfer1::INetworkDefinition* network) +{ + assert(block.at("type") == "convolutional"); + assert(block.find("batch_normalize") != block.end()); + assert(block.at("batch_normalize") == "1"); + assert(block.at("activation") == "mish"); + assert(block.find("filters") != block.end()); + assert(block.find("pad") != block.end()); + assert(block.find("size") != block.end()); + assert(block.find("stride") != block.end()); + + bool batchNormalize, bias; + if (block.find("batch_normalize") != block.end()) + { + batchNormalize = (block.at("batch_normalize") == "1"); + bias = false; + } + else + { + batchNormalize = false; + bias = true; + } + // all conv_bn_leaky layers assume bias is false + assert(batchNormalize == true && bias == false); + + int filters = std::stoi(block.at("filters")); + int padding = std::stoi(block.at("pad")); + int kernelSize = std::stoi(block.at("size")); + int stride = std::stoi(block.at("stride")); + int pad; + if (padding) + pad = (kernelSize - 1) / 2; + else + pad = 0; + std::vector<float> bnBiases; + for (int i = 0; i < filters; ++i) + { + bnBiases.push_back(weights[weightPtr]); + weightPtr++; + } + // load BN weights + std::vector<float> bnWeights; + for (int i = 0; i < filters; ++i) + { + bnWeights.push_back(weights[weightPtr]); + weightPtr++; + } + // load BN running_mean + std::vector<float> bnRunningMean; + for (int i = 0; i < filters; ++i) + { + bnRunningMean.push_back(weights[weightPtr]); + weightPtr++; + } + // load BN running_var + std::vector<float> bnRunningVar; + for (int i = 0; i < filters; ++i) + { + // 1e-05 for numerical stability + bnRunningVar.push_back(sqrt(weights[weightPtr] + 1.0e-5f)); + weightPtr++; + } + // load Conv layer weights (GKCRS) + int size = filters * inputChannels * kernelSize * kernelSize; + nvinfer1::Weights convWt{ nvinfer1::DataType::kFLOAT, nullptr, size }; + float* val = new float[size]; + for (int i = 0; i < size; ++i) + { + val[i] = weights[weightPtr]; + weightPtr++; + } + convWt.values = val; + trtWeights.push_back(convWt); + nvinfer1::Weights convBias{ nvinfer1::DataType::kFLOAT, nullptr, 0 }; + trtWeights.push_back(convBias); + nvinfer1::IConvolutionLayer* conv = network->addConvolution( + *input, filters, nvinfer1::DimsHW{ kernelSize, kernelSize }, convWt, convBias); + assert(conv != nullptr); + std::string convLayerName = "conv_" + std::to_string(layerIdx); + conv->setName(convLayerName.c_str()); + conv->setStride(nvinfer1::DimsHW{ stride, stride }); + conv->setPadding(nvinfer1::DimsHW{ pad, pad }); + + /***** BATCHNORM LAYER *****/ + /***************************/ + size = filters; + // create the weights + nvinfer1::Weights shift{ nvinfer1::DataType::kFLOAT, nullptr, size }; + nvinfer1::Weights scale{ nvinfer1::DataType::kFLOAT, nullptr, size }; + nvinfer1::Weights power{ nvinfer1::DataType::kFLOAT, nullptr, size }; + float* shiftWt = new float[size]; + for (int i = 0; i < size; ++i) + { + shiftWt[i] + = bnBiases.at(i) - ((bnRunningMean.at(i) * bnWeights.at(i)) / bnRunningVar.at(i)); + } + shift.values = shiftWt; + float* scaleWt = new float[size]; + for (int i = 0; i < size; ++i) + { + scaleWt[i] = bnWeights.at(i) / bnRunningVar[i]; + } + scale.values = scaleWt; + float* powerWt = new float[size]; + for (int i = 0; i < size; ++i) + { + powerWt[i] = 1.0; + } + power.values = powerWt; + trtWeights.push_back(shift); + trtWeights.push_back(scale); + trtWeights.push_back(power); + // Add the batch norm layers + nvinfer1::IScaleLayer* bn = network->addScale( + *conv->getOutput(0), nvinfer1::ScaleMode::kCHANNEL, shift, scale, power); + assert(bn != nullptr); + std::string bnLayerName = "batch_norm_" + std::to_string(layerIdx); + bn->setName(bnLayerName.c_str()); + /***** ACTIVATION LAYER *****/ + /****************************/ + auto creator = getPluginRegistry()->getPluginCreator("Mish_TRT", "1"); + const nvinfer1::PluginFieldCollection* pluginData = creator->getFieldNames(); + nvinfer1::IPluginV2 *pluginObj = creator->createPlugin(("mish" + std::to_string(layerIdx)).c_str(), pluginData); + nvinfer1::ITensor* inputTensors[] = { bn->getOutput(0) }; + auto mish = network->addPluginV2(&inputTensors[0], 1, *pluginObj); + return mish; +} + +int getNumChannels(nvinfer1::ITensor* t) +{ + nvinfer1::Dims d = t->getDimensions(); + assert(d.nbDims == 3); + + return d.d[0]; +} + +std::vector<int> split_layer_index(const std::string &s_,const std::string &delimiter_) +{ + std::vector<int> index; + std::string s = s_; + size_t pos = 0; + std::string token; + while ((pos = s.find(delimiter_)) != std::string::npos) + { + token = s.substr(0, pos); + index.push_back(std::stoi(trim(token))); + s.erase(0, pos + delimiter_.length()); + } + index.push_back(std::stoi(trim(s))); + return index; +} + +void printLayerInfo(std::string layerIndex, std::string layerName, std::string layerInput, + std::string layerOutput, std::string weightPtr) +{ + std::cout << std::setw(6) << std::left << layerIndex << std::setw(15) << std::left << layerName; + std::cout << std::setw(20) << std::left << layerInput << std::setw(20) << std::left + << layerOutput; + std::cout << std::setw(6) << std::left << weightPtr << std::endl; +} + uint64_t get3DTensorVolume(nvinfer1::Dims inputDims) { assert(inputDims.nbDims == 3); @@ -216,3 +515,236 @@ } return out; } + +nvinfer1::ILayer* netAddUpsample(int layerIdx, std::map<std::string, std::string>& block, + std::vector<float>& weights, + std::vector<nvinfer1::Weights>& trtWeights, int& inputChannels, + nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network) +{ + assert(block.at("type") == "upsample"); + nvinfer1::Dims inpDims = input->getDimensions(); + assert(inpDims.nbDims == 3); + // assert(inpDims.d[1] == inpDims.d[2]); + int n_scale = std::stoi(block.at("stride")); + + int c1 = inpDims.d[0]; + float *deval = new float[c1*n_scale*n_scale]; + for (int i = 0; i < c1*n_scale*n_scale; i++) + { + deval[i] = 1.0; + } + nvinfer1::Weights wts{ DataType::kFLOAT, deval, c1*n_scale*n_scale }; + nvinfer1::Weights bias{ DataType::kFLOAT, nullptr, 0 }; + IDeconvolutionLayer* upsample = network->addDeconvolutionNd(*input, c1, DimsHW{ n_scale, n_scale }, wts, bias); + upsample->setStrideNd(DimsHW{ n_scale, n_scale }); + upsample->setNbGroups(c1); + return upsample; + + #if 0 +#endif +} + +nvinfer1::ILayer * layer_split(const int n_layer_index_, + nvinfer1::ITensor *input_, + nvinfer1::INetworkDefinition* network) +{ + auto creator = getPluginRegistry()->getPluginCreator("CHUNK_TRT", "1.0"); + const nvinfer1::PluginFieldCollection* pluginData = creator->getFieldNames(); + nvinfer1::IPluginV2 *pluginObj = creator->createPlugin(("chunk" + std::to_string(n_layer_index_)).c_str(), pluginData); + auto chunk = network->addPluginV2(&input_, 1, *pluginObj); + return chunk; +} + +nvinfer1::ILayer* netAddConvBNLeaky(int layerIdx, + std::map<std::string, std::string>& block, + std::vector<float>& weights, + std::vector<nvinfer1::Weights>& trtWeights, + int& weightPtr, + int& inputChannels, + nvinfer1::ITensor* input, + nvinfer1::INetworkDefinition* network) +{ + assert(block.at("type") == "convolutional"); + assert(block.find("batch_normalize") != block.end()); + assert(block.at("batch_normalize") == "1"); + assert(block.at("activation") == "leaky"); + assert(block.find("filters") != block.end()); + assert(block.find("pad") != block.end()); + assert(block.find("size") != block.end()); + assert(block.find("stride") != block.end()); + + bool batchNormalize, bias; + if (block.find("batch_normalize") != block.end()) + { + batchNormalize = (block.at("batch_normalize") == "1"); + bias = false; + } + else + { + batchNormalize = false; + bias = true; + } + // all conv_bn_leaky layers assume bias is false + assert(batchNormalize == true && bias == false); + + int filters = std::stoi(block.at("filters")); + int padding = std::stoi(block.at("pad")); + int kernelSize = std::stoi(block.at("size")); + int stride = std::stoi(block.at("stride")); + int pad; + if (padding) + pad = (kernelSize - 1) / 2; + else + pad = 0; + + /***** CONVOLUTION LAYER *****/ + /*****************************/ + // batch norm weights are before the conv layer + // load BN biases (bn_biases) + std::vector<float> bnBiases; + for (int i = 0; i < filters; ++i) + { + bnBiases.push_back(weights[weightPtr]); + weightPtr++; + } + // load BN weights + std::vector<float> bnWeights; + for (int i = 0; i < filters; ++i) + { + bnWeights.push_back(weights[weightPtr]); + weightPtr++; + } + // load BN running_mean + std::vector<float> bnRunningMean; + for (int i = 0; i < filters; ++i) + { + bnRunningMean.push_back(weights[weightPtr]); + weightPtr++; + } + // load BN running_var + std::vector<float> bnRunningVar; + for (int i = 0; i < filters; ++i) + { + // 1e-05 for numerical stability + bnRunningVar.push_back(sqrt(weights[weightPtr] + 1.0e-5f)); + weightPtr++; + } + // load Conv layer weights (GKCRS) + int size = filters * inputChannels * kernelSize * kernelSize; + nvinfer1::Weights convWt{nvinfer1::DataType::kFLOAT, nullptr, size}; + float* val = new float[size]; + for (int i = 0; i < size; ++i) + { + val[i] = weights[weightPtr]; + weightPtr++; + } + convWt.values = val; + trtWeights.push_back(convWt); + nvinfer1::Weights convBias{nvinfer1::DataType::kFLOAT, nullptr, 0}; + trtWeights.push_back(convBias); + nvinfer1::IConvolutionLayer* conv = network->addConvolution( + *input, + filters, + nvinfer1::DimsHW{kernelSize, kernelSize}, + convWt, + convBias); + assert(conv != nullptr); + std::string convLayerName = "conv_" + std::to_string(layerIdx); + conv->setName(convLayerName.c_str()); + conv->setStride(nvinfer1::DimsHW{stride, stride}); + conv->setPadding(nvinfer1::DimsHW{pad, pad}); + + /***** BATCHNORM LAYER *****/ + /***************************/ + size = filters; + // create the weights + nvinfer1::Weights shift{nvinfer1::DataType::kFLOAT, nullptr, size}; + nvinfer1::Weights scale{nvinfer1::DataType::kFLOAT, nullptr, size}; + nvinfer1::Weights power{nvinfer1::DataType::kFLOAT, nullptr, size}; + float* shiftWt = new float[size]; + for (int i = 0; i < size; ++i) + { + shiftWt[i] + = bnBiases.at(i) - ((bnRunningMean.at(i) * bnWeights.at(i)) / bnRunningVar.at(i)); + } + shift.values = shiftWt; + float* scaleWt = new float[size]; + for (int i = 0; i < size; ++i) + { + scaleWt[i] = bnWeights.at(i) / bnRunningVar[i]; + } + scale.values = scaleWt; + float* powerWt = new float[size]; + for (int i = 0; i < size; ++i) + { + powerWt[i] = 1.0; + } + power.values = powerWt; + trtWeights.push_back(shift); + trtWeights.push_back(scale); + trtWeights.push_back(power); + // Add the batch norm layers + nvinfer1::IScaleLayer* bn = network->addScale( + *conv->getOutput(0), nvinfer1::ScaleMode::kCHANNEL, shift, scale, power); + assert(bn != nullptr); + std::string bnLayerName = "batch_norm_" + std::to_string(layerIdx); + bn->setName(bnLayerName.c_str()); + /***** ACTIVATION LAYER *****/ + /****************************/ + auto leaky = network->addActivation(*bn->getOutput(0),nvinfer1::ActivationType::kLEAKY_RELU); + leaky->setAlpha(0.1f); + /*nvinfer1::IPlugin* leakyRELU = nvinfer1::plugin::createPReLUPlugin(0.1); + assert(leakyRELU != nullptr); + nvinfer1::ITensor* bnOutput = bn->getOutput(0); + nvinfer1::IPluginLayer* leaky = network->addPlugin(&bnOutput, 1, *leakyRELU);*/ + assert(leaky != nullptr); + std::string leakyLayerName = "leaky_" + std::to_string(layerIdx); + leaky->setName(leakyLayerName.c_str()); + + return leaky; +} + + +std::vector<std::string> loadListFromTextFile(const std::string filename) +{ + assert(fileExists(filename)); + std::vector<std::string> list; + + std::ifstream f(filename); + if (!f) + { + std::cout << "failed to open " << filename; + assert(0); + } + + std::string line; + while (std::getline(f, line)) + { + if (line.empty()) + continue; + + else + list.push_back(trim(line)); + } + + return list; +} +std::vector<std::string> loadImageList(const std::string filename, const std::string prefix) +{ + std::vector<std::string> fileList = loadListFromTextFile(filename); + for (auto& file : fileList) + { + if (fileExists(file, false)) + continue; + else + { + std::string prefixed = prefix + file; + if (fileExists(prefixed, false)) + file = prefixed; + else + std::cerr << "WARNING: couldn't find: " << prefixed + << " while loading: " << filename << std::endl; + } + } + return fileList; +} diff --git a/src/detecter_tools/trt_utils.h b/src/detecter_tools/trt_utils.h index 189a60b..a166a6f 100644 --- a/src/detecter_tools/trt_utils.h +++ b/src/detecter_tools/trt_utils.h @@ -67,6 +67,34 @@ } } }; +nvinfer1::ILayer* netAddMaxpool(int layerIdx, std::map<std::string, std::string>& block, + nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network); +nvinfer1::ILayer* netAddConvLinear(int layerIdx, std::map<std::string, std::string>& block, + std::vector<float>& weights, + std::vector<nvinfer1::Weights>& trtWeights, int& weightPtr, + int& inputChannels, nvinfer1::ITensor* input, + nvinfer1::INetworkDefinition* network); + +nvinfer1::ILayer* net_conv_bn_mish(int layerIdx, + std::map<std::string, std::string>& block, + std::vector<float>& weights, + std::vector<nvinfer1::Weights>& trtWeights, + int& weightPtr, + int& inputChannels, + nvinfer1::ITensor* input, + nvinfer1::INetworkDefinition* network); + +nvinfer1::ILayer* netAddConvBNLeaky(int layerIdx, std::map<std::string, std::string>& block, + std::vector<float>& weights, + std::vector<nvinfer1::Weights>& trtWeights, int& weightPtr, + int& inputChannels, nvinfer1::ITensor* input, + nvinfer1::INetworkDefinition* network); +std::string dimsToString(const nvinfer1::Dims d); +std::vector<float> loadWeights(const std::string weightsFilePath, const std::string& networkType); +int getNumChannels(nvinfer1::ITensor* t); +std::string trim(std::string s); +std::string triml(std::string s, const char* t); +std::string trimr(std::string s, const char* t); float clamp(const float val, const float minVal, const float maxVal); // Common helper functions cv::Mat blobFromDsImages(const std::vector<DsImage>& inputImages, const int& inputH, @@ -77,9 +105,53 @@ nvinfer1::ICudaEngine* loadTRTEngine(const std::string planFilePath, PluginFactory* pluginFactory, Logger& logger); uint64_t get3DTensorVolume(nvinfer1::Dims inputDims); - +std::vector<std::string> loadImageList(const std::string filename, const std::string prefix); std::vector<BBoxInfo> nmsAllClasses(const float nmsThresh, std::vector<BBoxInfo>& binfo, const uint32_t numClasses, const std::string &model_type); - +void printLayerInfo(std::string layerIndex, std::string layerName, std::string layerInput, + std::string layerOutput, std::string weightPtr); std::vector<BBoxInfo> nonMaximumSuppression(const float nmsThresh, std::vector<BBoxInfo> binfo); +std::vector<int> split_layer_index(const std::string &s_,const std::string &delimiter_); +nvinfer1::ILayer * layer_split(const int n_layer_index_, + nvinfer1::ITensor *input_, + nvinfer1::INetworkDefinition* network); +nvinfer1::ILayer* netAddUpsample(int layerIdx, std::map<std::string, std::string>& block, + std::vector<float>& weights, + std::vector<nvinfer1::Weights>& trtWeights, int& inputChannels, + nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network); +std::vector<std::string> loadListFromTextFile(const std::string filename); +class YoloTinyMaxpoolPaddingFormula : public nvinfer1::IOutputDimensionsFormula +{ +private: + std::set<std::string> m_SamePaddingLayers; + + nvinfer1::DimsHW compute(nvinfer1::DimsHW inputDims, nvinfer1::DimsHW kernelSize, + nvinfer1::DimsHW stride, nvinfer1::DimsHW padding, + nvinfer1::DimsHW dilation, const char* layerName) const override + { + // assert(inputDims.d[0] == inputDims.d[1]); + assert(kernelSize.d[0] == kernelSize.d[1]); + assert(stride.d[0] == stride.d[1]); + assert(padding.d[0] == padding.d[1]); + + int output_h, output_w; + // Only layer maxpool_12 makes use of same padding + if (m_SamePaddingLayers.find(layerName) != m_SamePaddingLayers.end()) + { + output_h = (inputDims.d[0] + 2 * padding.d[0]) / stride.d[0]; + output_w = (inputDims.d[1] + 2 * padding.d[1]) / stride.d[1]; + } + // Valid Padding + else + { + output_h = (inputDims.d[0] - kernelSize.d[0]) / stride.d[0] + 1; + output_w = (inputDims.d[1] - kernelSize.d[1]) / stride.d[1] + 1; + } + return nvinfer1::DimsHW{output_h, output_w}; + } + +public: + void addSamePaddingLayer(std::string input) { m_SamePaddingLayers.insert(input); } +}; + #endif diff --git a/src/h_interface.cpp b/src/h_interface.cpp index c1adcb5..32d519e 100644 --- a/src/h_interface.cpp +++ b/src/h_interface.cpp @@ -8,6 +8,8 @@ using namespace std; string m_staticStruct::model_path = "path"; +string m_staticStruct::model_cfg = "0"; // 鍒濆鍖栫粨鏋勪綋闈欐�佸彉閲忓�� +string m_staticStruct::model_wts = "0"; // 鍒濆鍖栫粨鏋勪綋闈欐�佸彉閲忓�� int m_staticStruct::type = 1; bool m_staticStruct::isTrack= true; int m_staticStruct::max_cam_num = 0; @@ -116,6 +118,8 @@ if(reader.parse(in,root)) { std::string model_path = root["param"]["model_path"].asString(); + std::string model_cfg= root["param"]["model_cfg"].asString(); + std::string model_wts = root["param"]["model_wts"].asString(); int type = root["param"]["type"].asInt(); bool isTrack = root["param"]["isTrack"].asBool(); int max_cam_num = root["param"]["max_cam_num"].asInt(); @@ -123,6 +127,8 @@ int mv_velocity = root["param"]["mv_velocity"].asFloat(); int fall_rate = root["param"]["fall_rate"].asFloat(); m_staticStruct::model_path = model_path; + m_staticStruct::model_cfg = model_cfg; + m_staticStruct::model_wts = model_wts; m_staticStruct::type = type; m_staticStruct::isTrack = isTrack; m_staticStruct::max_cam_num = max_cam_num; -- Gitblit v1.8.0