From ec3cf462234c96bf9d6c648db3e8fc1d781b2fe7 Mon Sep 17 00:00:00 2001 From: Scheaven <xuepengqiang> Date: 星期三, 08 九月 2021 18:02:06 +0800 Subject: [PATCH] update --- src/detecter_tools/model.cpp | 524 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 519 insertions(+), 5 deletions(-) diff --git a/src/detecter_tools/model.cpp b/src/detecter_tools/model.cpp index c548561..0bccfb3 100644 --- a/src/detecter_tools/model.cpp +++ b/src/detecter_tools/model.cpp @@ -11,6 +11,7 @@ Detecter::Detecter( const NetworkInfo& networkInfo, const InferParams& inferParams, int type) : + m_NetworkType(networkInfo.networkType), m_InputBlobName(networkInfo.inputBlobName), m_InputH(416), m_InputW(416), @@ -26,10 +27,17 @@ m_Context(nullptr), m_InputBindingIndex(-1), m_CudaStream(nullptr), - m_PluginFactory(new PluginFactory) + m_PluginFactory(new PluginFactory), + m_TinyMaxpoolPaddingFormula(new YoloTinyMaxpoolPaddingFormula) { - setOutput(type); m_EnginePath = m_staticStruct::model_path; + if(!fileExists(m_EnginePath)) + { + m_configBlocks = parseConfigFile(m_staticStruct::model_cfg); + parseConfigBlocks(); + createYOLOEngine(); + } + setOutput(type); DEBUG((boost::format("m_EnginePath:%s")%m_EnginePath).str()); assert(m_PluginFactory != nullptr); m_Engine = loadTRTEngine(m_EnginePath, m_PluginFactory, m_Logger); @@ -67,7 +75,482 @@ m_PluginFactory = nullptr; } - // m_TinyMaxpoolPaddingFormula.reset(); + m_TinyMaxpoolPaddingFormula.reset(); +} + +std::vector<std::map<std::string, std::string>> Detecter::parseConfigFile(const std::string cfgFilePath) +{ + std::cout << "::::::::::" << cfgFilePath <<std::endl; + assert(fileExists(cfgFilePath)); + std::ifstream file(cfgFilePath); + assert(file.good()); + std::string line; + std::vector<std::map<std::string, std::string>> blocks; + std::map<std::string, std::string> block; + + while (getline(file, line)) + { + if (line.empty()) continue; + if (line.front() == '#') continue; + line = trim(line); + if (line.front() == '[') + { + if (!block.empty()) + { + blocks.push_back(block); + block.clear(); + } + std::string key = "type"; + std::string value = trim(line.substr(1, line.size() - 2)); + block.insert(std::pair<std::string, std::string>(key, value)); + } + else + { + size_t cpos = line.find('='); + std::string key = trim(line.substr(0, cpos)); + std::string value = trim(line.substr(cpos + 1)); + block.insert(std::pair<std::string, std::string>(key, value)); + } + } + blocks.push_back(block); + return blocks; +} + +void Detecter::parseConfigBlocks() +{ + for (auto block : m_configBlocks) + { + if (block.at("type") == "net") + { + assert((block.find("height") != block.end()) + && "Missing 'height' param in network cfg"); + assert((block.find("width") != block.end()) && "Missing 'width' param in network cfg"); + assert((block.find("channels") != block.end()) + && "Missing 'channels' param in network cfg"); + assert((block.find("batch") != block.end()) + && "Missing 'batch' param in network cfg"); + + m_InputH = std::stoul(trim(block.at("height"))); + m_InputW = std::stoul(trim(block.at("width"))); + m_InputC = std::stoul(trim(block.at("channels"))); + m_BatchSize = std::stoi(trim(block.at("batch"))); + // assert(m_InputW == m_InputH); + m_InputSize = m_InputC * m_InputH * m_InputW; + } + else if ((block.at("type") == "region") || (block.at("type") == "yolo")) + { + assert((block.find("num") != block.end()) + && std::string("Missing 'num' param in " + block.at("type") + " layer").c_str()); + assert((block.find("classes") != block.end()) + && std::string("Missing 'classes' param in " + block.at("type") + " layer") + .c_str()); + assert((block.find("anchors") != block.end()) + && std::string("Missing 'anchors' param in " + block.at("type") + " layer") + .c_str()); + + TensorInfo outputTensor; + std::string anchorString = block.at("anchors"); + while (!anchorString.empty()) + { + size_t npos = anchorString.find_first_of(','); + if (npos != std::string::npos) + { + float anchor = std::stof(trim(anchorString.substr(0, npos))); + outputTensor.anchors.push_back(anchor); + anchorString.erase(0, npos + 1); + } + else + { + float anchor = std::stof(trim(anchorString)); + outputTensor.anchors.push_back(anchor); + break; + } + } + + assert((block.find("mask") != block.end()) + && std::string("Missing 'mask' param in " + block.at("type") + " layer") + .c_str()); + + std::string maskString = block.at("mask"); + while (!maskString.empty()) + { + size_t npos = maskString.find_first_of(','); + if (npos != std::string::npos) + { + uint32_t mask = std::stoul(trim(maskString.substr(0, npos))); + outputTensor.masks.push_back(mask); + maskString.erase(0, npos + 1); + } + else + { + uint32_t mask = std::stoul(trim(maskString)); + outputTensor.masks.push_back(mask); + break; + } + } + + outputTensor.numBBoxes = outputTensor.masks.size() > 0 + ? outputTensor.masks.size() + : std::stoul(trim(block.at("num"))); + outputTensor.numClasses = std::stoul(block.at("classes")); + if (m_ClassNames.empty()) + { + for (uint32_t i=0;i< outputTensor.numClasses;++i) + { + m_ClassNames.push_back(std::to_string(i)); + } + } + outputTensor.blobName = "yolo_" + std::to_string(_n_yolo_ind); + outputTensor.gridSize = (m_InputH / 32) * pow(2, _n_yolo_ind); + outputTensor.grid_h = (m_InputH / 32) * pow(2, _n_yolo_ind); + outputTensor.grid_w = (m_InputW / 32) * pow(2, _n_yolo_ind); + outputTensor.gridSize = (m_InputH / 32) * pow(2, 2-_n_yolo_ind); + outputTensor.grid_h = (m_InputH / 32) * pow(2, 2-_n_yolo_ind); + outputTensor.grid_w = (m_InputW / 32) * pow(2, 2-_n_yolo_ind); + outputTensor.stride = m_InputH / outputTensor.gridSize; + outputTensor.stride_h = m_InputH / outputTensor.grid_h; + outputTensor.stride_w = m_InputW / outputTensor.grid_w; + outputTensor.volume = outputTensor.grid_h* outputTensor.grid_w + *(outputTensor.numBBoxes*(5 + outputTensor.numClasses)); + m_OutputTensors.push_back(outputTensor); + _n_yolo_ind++; + } + } +} + +void Detecter::createYOLOEngine(const nvinfer1::DataType dataType, Int8EntropyCalibrator* calibrator) +{ + if (fileExists(m_EnginePath))return; + std::vector<float> weights = loadWeights(m_staticStruct::model_wts, m_NetworkType); + std::vector<nvinfer1::Weights> trtWeights; + int weightPtr = 0; + int channels = m_InputC; + m_Builder = nvinfer1::createInferBuilder(m_Logger); + nvinfer1::IBuilderConfig* config = m_Builder->createBuilderConfig(); + m_Network = m_Builder->createNetworkV2(0U); + if ((dataType == nvinfer1::DataType::kINT8 && !m_Builder->platformHasFastInt8()) + || (dataType == nvinfer1::DataType::kHALF && !m_Builder->platformHasFastFp16())) + { + std::cout << "Platform doesn't support this precision." << std::endl; + assert(0); + } + + nvinfer1::ITensor* data = m_Network->addInput( + m_InputBlobName.c_str(), nvinfer1::DataType::kFLOAT, + nvinfer1::DimsCHW{static_cast<int>(m_InputC), static_cast<int>(m_InputH), + static_cast<int>(m_InputW)}); + assert(data != nullptr); + // Add elementwise layer to normalize pixel values 0-1 + nvinfer1::Dims divDims{ + 3, + {static_cast<int>(m_InputC), static_cast<int>(m_InputH), static_cast<int>(m_InputW)}, + {nvinfer1::DimensionType::kCHANNEL, nvinfer1::DimensionType::kSPATIAL, + nvinfer1::DimensionType::kSPATIAL}}; + nvinfer1::Weights divWeights{nvinfer1::DataType::kFLOAT, nullptr, + static_cast<int64_t>(m_InputSize)}; + float* divWt = new float[m_InputSize]; + for (uint32_t w = 0; w < m_InputSize; ++w) divWt[w] = 255.0; + divWeights.values = divWt; + trtWeights.push_back(divWeights); + nvinfer1::IConstantLayer* constDivide = m_Network->addConstant(divDims, divWeights); + assert(constDivide != nullptr); + nvinfer1::IElementWiseLayer* elementDivide = m_Network->addElementWise( + *data, *constDivide->getOutput(0), nvinfer1::ElementWiseOperation::kDIV); + assert(elementDivide != nullptr); + + nvinfer1::ITensor* previous = elementDivide->getOutput(0); + std::vector<nvinfer1::ITensor*> tensorOutputs; + uint32_t outputTensorCount = 0; + + // build the network using the network API + for (uint32_t i = 0; i < m_configBlocks.size(); ++i) + { + // check if num. of channels is correct + assert(getNumChannels(previous) == channels); + std::string layerIndex = "(" + std::to_string(i) + ")"; + + if (m_configBlocks.at(i).at("type") == "net") + { + printLayerInfo("", "layer", " inp_size", " out_size", "weightPtr"); + } + else if (m_configBlocks.at(i).at("type") == "convolutional") + { + std::string inputVol = dimsToString(previous->getDimensions()); + nvinfer1::ILayer* out; + std::string layerType; + //check activation + std::string activation = ""; + if (m_configBlocks.at(i).find("activation") != m_configBlocks.at(i).end()) + { + activation = m_configBlocks[i]["activation"]; + } + // check if batch_norm enabled + if ((m_configBlocks.at(i).find("batch_normalize") != m_configBlocks.at(i).end()) && + ("leaky" == activation)) + { + out = netAddConvBNLeaky(i, m_configBlocks.at(i), weights, trtWeights, weightPtr, + channels, previous, m_Network); + layerType = "conv-bn-leaky"; + } + else if ((m_configBlocks.at(i).find("batch_normalize") != m_configBlocks.at(i).end()) && + ("mish" == activation)) + { + out = net_conv_bn_mish(i, m_configBlocks.at(i), weights, trtWeights, weightPtr, + channels, previous, m_Network); + layerType = "conv-bn-mish"; + } + else// if("linear" == activation) + { + out = netAddConvLinear(i, m_configBlocks.at(i), weights, trtWeights, weightPtr, + channels, previous, m_Network); + layerType = "conv-linear"; + } + previous = out->getOutput(0); + assert(previous != nullptr); + channels = getNumChannels(previous); + std::string outputVol = dimsToString(previous->getDimensions()); + tensorOutputs.push_back(out->getOutput(0)); + printLayerInfo(layerIndex, layerType, inputVol, outputVol, std::to_string(weightPtr)); + } + else if (m_configBlocks.at(i).at("type") == "shortcut") + { + assert(m_configBlocks.at(i).at("activation") == "linear"); + assert(m_configBlocks.at(i).find("from") != m_configBlocks.at(i).end()); + int from = stoi(m_configBlocks.at(i).at("from")); + + std::string inputVol = dimsToString(previous->getDimensions()); + // check if indexes are correct + assert((i - 2 >= 0) && (i - 2 < tensorOutputs.size())); + assert((i + from - 1 >= 0) && (i + from - 1 < tensorOutputs.size())); + assert(i + from - 1 < i - 2); + nvinfer1::IElementWiseLayer* ew + = m_Network->addElementWise(*tensorOutputs[i - 2], *tensorOutputs[i + from - 1], + nvinfer1::ElementWiseOperation::kSUM); + assert(ew != nullptr); + std::string ewLayerName = "shortcut_" + std::to_string(i); + ew->setName(ewLayerName.c_str()); + previous = ew->getOutput(0); + assert(previous != nullptr); + std::string outputVol = dimsToString(previous->getDimensions()); + tensorOutputs.push_back(ew->getOutput(0)); + printLayerInfo(layerIndex, "skip", inputVol, outputVol, " -"); + } + else if (m_configBlocks.at(i).at("type") == "yolo") + { + nvinfer1::Dims prevTensorDims = previous->getDimensions(); + // assert(prevTensorDims.d[1] == prevTensorDims.d[2]); + TensorInfo& curYoloTensor = m_OutputTensors.at(outputTensorCount); + curYoloTensor.gridSize = prevTensorDims.d[1]; + curYoloTensor.grid_h = prevTensorDims.d[1]; + curYoloTensor.grid_w = prevTensorDims.d[2]; + curYoloTensor.stride = m_InputW / curYoloTensor.gridSize; + curYoloTensor.stride_h = m_InputH / curYoloTensor.grid_h; + curYoloTensor.stride_w = m_InputW / curYoloTensor.grid_w; + m_OutputTensors.at(outputTensorCount).volume = curYoloTensor.grid_h + * curYoloTensor.grid_w + * (curYoloTensor.numBBoxes * (5 + curYoloTensor.numClasses)); + std::string layerName = "yolo_" + std::to_string(outputTensorCount); + curYoloTensor.blobName = layerName; + nvinfer1::IPlugin* yoloPlugin + = new SLayerV3(m_OutputTensors.at(outputTensorCount).numBBoxes, + m_OutputTensors.at(outputTensorCount).numClasses, + m_OutputTensors.at(outputTensorCount).grid_h, + m_OutputTensors.at(outputTensorCount).grid_w); + assert(yoloPlugin != nullptr); + nvinfer1::IPluginLayer* yolo = m_Network->addPlugin(&previous, 1, *yoloPlugin); + assert(yolo != nullptr); + yolo->setName(layerName.c_str()); + std::string inputVol = dimsToString(previous->getDimensions()); + previous = yolo->getOutput(0); + assert(previous != nullptr); + previous->setName(layerName.c_str()); + std::string outputVol = dimsToString(previous->getDimensions()); + m_Network->markOutput(*previous); + channels = getNumChannels(previous); + tensorOutputs.push_back(yolo->getOutput(0)); + printLayerInfo(layerIndex, "yolo", inputVol, outputVol, std::to_string(weightPtr)); + ++outputTensorCount; + } + else if (m_configBlocks.at(i).at("type") == "route") + { + size_t found = m_configBlocks.at(i).at("layers").find(","); + if (found != std::string::npos)//concate multi layers + { + std::vector<int> vec_index = split_layer_index(m_configBlocks.at(i).at("layers"), ","); + for (auto &ind_layer:vec_index) + { + if (ind_layer < 0) + { + ind_layer = static_cast<int>(tensorOutputs.size()) + ind_layer; + } + assert(ind_layer < static_cast<int>(tensorOutputs.size()) && ind_layer >= 0); + } + nvinfer1::ITensor** concatInputs + = reinterpret_cast<nvinfer1::ITensor**>(malloc(sizeof(nvinfer1::ITensor*) * vec_index.size())); + for (size_t ind = 0; ind < vec_index.size(); ++ind) + { + concatInputs[ind] = tensorOutputs[vec_index[ind]]; + } + nvinfer1::IConcatenationLayer* concat + = m_Network->addConcatenation(concatInputs, static_cast<int>(vec_index.size())); + assert(concat != nullptr); + std::string concatLayerName = "route_" + std::to_string(i - 1); + concat->setName(concatLayerName.c_str()); + // concatenate along the channel dimension + concat->setAxis(0); + previous = concat->getOutput(0); + assert(previous != nullptr); + nvinfer1::Dims debug = previous->getDimensions(); + std::string outputVol = dimsToString(previous->getDimensions()); + int nums = 0; + for (auto &indx:vec_index) + { + nums += getNumChannels(tensorOutputs[indx]); + } + channels = nums; + tensorOutputs.push_back(concat->getOutput(0)); + printLayerInfo(layerIndex, "route", " -", outputVol,std::to_string(weightPtr)); + } + else //single layer + { + int idx = std::stoi(trim(m_configBlocks.at(i).at("layers"))); + if (idx < 0) + { + idx = static_cast<int>(tensorOutputs.size()) + idx; + } + assert(idx < static_cast<int>(tensorOutputs.size()) && idx >= 0); + + //route + if (m_configBlocks.at(i).find("groups") == m_configBlocks.at(i).end()) + { + previous = tensorOutputs[idx]; + assert(previous != nullptr); + std::string outputVol = dimsToString(previous->getDimensions()); + // set the output volume depth + channels = getNumChannels(tensorOutputs[idx]); + tensorOutputs.push_back(tensorOutputs[idx]); + printLayerInfo(layerIndex, "route", " -", outputVol, std::to_string(weightPtr)); + + } + //yolov4-tiny route split layer + else + { + if (m_configBlocks.at(i).find("group_id") == m_configBlocks.at(i).end()) + { + assert(0); + } + int chunk_idx = std::stoi(trim(m_configBlocks.at(i).at("group_id"))); + nvinfer1::ILayer* out = layer_split(i, tensorOutputs[idx], m_Network); + std::string inputVol = dimsToString(previous->getDimensions()); + previous = out->getOutput(chunk_idx); + assert(previous != nullptr); + channels = getNumChannels(previous); + std::string outputVol = dimsToString(previous->getDimensions()); + tensorOutputs.push_back(out->getOutput(chunk_idx)); + printLayerInfo(layerIndex,"chunk", inputVol, outputVol, std::to_string(weightPtr)); + } + } + } + else if (m_configBlocks.at(i).at("type") == "upsample") + { + std::string inputVol = dimsToString(previous->getDimensions()); + nvinfer1::ILayer* out = netAddUpsample(i - 1, m_configBlocks[i], weights, trtWeights, + channels, previous, m_Network); + previous = out->getOutput(0); + std::string outputVol = dimsToString(previous->getDimensions()); + tensorOutputs.push_back(out->getOutput(0)); + printLayerInfo(layerIndex, "upsample", inputVol, outputVol, " -"); + } + else if (m_configBlocks.at(i).at("type") == "maxpool") + { + // Add same padding layers + if (m_configBlocks.at(i).at("size") == "2" && m_configBlocks.at(i).at("stride") == "1") + { + m_TinyMaxpoolPaddingFormula->addSamePaddingLayer("maxpool_" + std::to_string(i)); + } + std::string inputVol = dimsToString(previous->getDimensions()); + nvinfer1::ILayer* out = netAddMaxpool(i, m_configBlocks.at(i), previous, m_Network); + previous = out->getOutput(0); + assert(previous != nullptr); + std::string outputVol = dimsToString(previous->getDimensions()); + tensorOutputs.push_back(out->getOutput(0)); + printLayerInfo(layerIndex, "maxpool", inputVol, outputVol, std::to_string(weightPtr)); + } + else + { + std::cout << "Unsupported layer type --> \"" << m_configBlocks.at(i).at("type") << "\"" + << std::endl; + assert(0); + } + } + + if (static_cast<int>(weights.size()) != weightPtr) + { + std::cout << "Number of unused weights left : " << static_cast<int>(weights.size()) - weightPtr << std::endl; + assert(0); + } + + // std::cout << "Output blob names :" << std::endl; + // for (auto& tensor : m_OutputTensors) std::cout << tensor.blobName << std::endl; + + // Create and cache the engine if not already present + if (fileExists(m_EnginePath)) + { + std::cout << "Using previously generated plan file located at " << m_EnginePath + << std::endl; + destroyNetworkUtils(trtWeights); + return; + } + + /*std::cout << "Unable to find cached TensorRT engine for network : " << m_NetworkType + << " precision : " << m_Precision << " and batch size :" << m_BatchSize << std::endl;*/ + + m_Builder->setMaxBatchSize(m_BatchSize); + //m_Builder->setMaxWorkspaceSize(1 << 20); + + config->setMaxWorkspaceSize(1 << 20); + if (dataType == nvinfer1::DataType::kINT8) + { + assert((calibrator != nullptr) && "Invalid calibrator for INT8 precision"); + // m_Builder->setInt8Mode(true); + config->setFlag(nvinfer1::BuilderFlag::kINT8); + // m_Builder->setInt8Calibrator(calibrator); + config->setInt8Calibrator(calibrator); + // config->setTacticSources(1U << static_cast<uint32_t>(TacticSource::kCUBLAS) | 1U << static_cast<uint32_t>(TacticSource::kCUBLAS_LT)); + } + else if (dataType == nvinfer1::DataType::kHALF) + { + config->setFlag(nvinfer1::BuilderFlag::kFP16); + // m_Builder->setHalf2Mode(true); + } + + m_Builder->allowGPUFallback(true); + int nbLayers = m_Network->getNbLayers(); + int layersOnDLA = 0; + // std::cout << "Total number of layers: " << nbLayers << std::endl; + for (int i = 0; i < nbLayers; i++) + { + nvinfer1::ILayer* curLayer = m_Network->getLayer(i); + if (m_DeviceType == "kDLA" && m_Builder->canRunOnDLA(curLayer)) + { + m_Builder->setDeviceType(curLayer, nvinfer1::DeviceType::kDLA); + layersOnDLA++; + std::cout << "Set layer " << curLayer->getName() << " to run on DLA" << std::endl; + } + } + // std::cout << "Total number of layers on DLA: " << layersOnDLA << std::endl; + + // Build the engine + std::cout << "Building the TensorRT Engine..." << std::endl; + m_Engine = m_Builder->buildEngineWithConfig(*m_Network,*config); + assert(m_Engine != nullptr); + std::cout << "Building complete!" << std::endl; + + // Serialize the engine + writePlanFileToDisk(); + + // destroy + destroyNetworkUtils(trtWeights); } void Detecter::doInference(const unsigned char* input, const uint32_t batchSize) @@ -268,10 +751,10 @@ void Detecter::setOutput(int type) { m_OutputTensors.clear(); + printf("0-0-0-0-0-0------------------%d",type); if(type==2) for (int i = 0; i < 2; ++i) { - TensorInfo outputTensor; outputTensor.numClasses = CLASS_BUM; outputTensor.blobName = "yolo_" + std::to_string(i); @@ -323,7 +806,17 @@ { TensorInfo outputTensor; outputTensor.numClasses = CLASS_BUM; - outputTensor.blobName = "yolo_" + std::to_string(i); + outputTensor.blobName = "yolo_" + to_string(i); + // if (i==0) + // { + // outputTensor.blobName = "139_convolutional_reshape_2"; + // }else if (i==1) + // { + // outputTensor.blobName = "150_convolutional_reshape_2"; + // }else if (i==2) + // { + // outputTensor.blobName = "161_convolutional_reshape_2"; + // } outputTensor.gridSize = (m_InputH / 32) * pow(2, 2-i); outputTensor.grid_h = (m_InputH / 32) * pow(2, 2-i); outputTensor.grid_w = (m_InputW / 32) * pow(2, 2-i); @@ -380,3 +873,24 @@ m_OutputTensors.push_back(outputTensor); } } + +void Detecter::writePlanFileToDisk() +{ + std::cout << "Serializing the TensorRT Engine..." << std::endl; + assert(m_Engine && "Invalid TensorRT Engine"); + m_ModelStream = m_Engine->serialize(); + assert(m_ModelStream && "Unable to serialize engine"); + assert(!m_EnginePath.empty() && "Enginepath is empty"); + + // write data to output file + std::stringstream gieModelStream; + gieModelStream.seekg(0, gieModelStream.beg); + gieModelStream.write(static_cast<const char*>(m_ModelStream->data()), m_ModelStream->size()); + std::ofstream outFile; + outFile.open(m_EnginePath, std::ios::binary | std::ios::out); + outFile << gieModelStream.rdbuf(); + outFile.close(); + + std::cout << "Serialized plan file cached at location : " << m_EnginePath << std::endl; +} + -- Gitblit v1.8.0