#include "trt_utils.h" #include #include #include #include using namespace nvinfer1; REGISTER_TENSORRT_PLUGIN(MishPluginCreator); REGISTER_TENSORRT_PLUGIN(ChunkPluginCreator); REGISTER_TENSORRT_PLUGIN(HardswishPluginCreator); cv::Mat blobFromDsImages(const std::vector& inputImages, const int& inputH, const int& inputW) { std::vector letterboxStack(inputImages.size()); for (uint32_t i = 0; i < inputImages.size(); ++i) { inputImages.at(i).getLetterBoxedImage().copyTo(letterboxStack.at(i)); } return cv::dnn::blobFromImages(letterboxStack, 1.0, cv::Size(inputW, inputH), cv::Scalar(0.0, 0.0, 0.0),true); } static void leftTrim(std::string& s) { s.erase(s.begin(), find_if(s.begin(), s.end(), [](int ch) { return !isspace(ch); })); } static void rightTrim(std::string& s) { s.erase(find_if(s.rbegin(), s.rend(), [](int ch) { return !isspace(ch); }).base(), s.end()); } std::string trim(std::string s) { leftTrim(s); rightTrim(s); return s; } std::string triml(std::string s,const char* t) { s.erase(0, s.find_first_not_of(t)); return s; } std::string trimr(std::string s, const char* t) { s.erase(s.find_last_not_of(t) + 1); return s; } float clamp(const float val, const float minVal, const float maxVal) { assert(minVal <= maxVal); return std::min(maxVal, std::max(minVal, val)); } bool fileExists(const std::string fileName, bool verbose) { if (!std::experimental::filesystem::exists(std::experimental::filesystem::path(fileName))) { if (verbose) std::cout << "File does not exist : " << fileName << std::endl; return false; } return true; } // BBox convertBBoxNetRes(const float& bx, const float& by, const float& bw, const float& bh, // const uint32_t& stride, const uint32_t& netW, const uint32_t& netH) // { // BBox b; // // Restore coordinates to network input resolution // float x = bx * stride; // float y = by * stride; // b.x1 = x - bw / 2; // b.x2 = x + bw / 2; // b.y1 = y - bh / 2; // b.y2 = y + bh / 2; // b.x1 = clamp(b.x1, 0, netW); // b.x2 = clamp(b.x2, 0, netW); // b.y1 = clamp(b.y1, 0, netH); // b.y2 = clamp(b.y2, 0, netH); // return b; // } // void convertBBoxImgRes(const float scalingFactor, // const float xOffset, // const float yOffset, // BBox& bbox) // { // //// Undo Letterbox // bbox.x1 -= xOffset; // bbox.x2 -= xOffset; // bbox.y1 -= yOffset; // bbox.y2 -= yOffset; // //// Restore to input resolution // bbox.x1 /= scalingFactor; // bbox.x2 /= scalingFactor; // bbox.y1 /= scalingFactor; // bbox.y2 /= scalingFactor; // std::cout << "convertBBoxImgRes" << std::endl; // } // void printPredictions(const BBoxInfo& b, const std::string& className) // { // std::cout << " label:" << b.label << "(" << className << ")" // << " confidence:" << b.prob << " xmin:" << b.box.x1 << " ymin:" << b.box.y1 // << " xmax:" << b.box.x2 << " ymax:" << b.box.y2 << std::endl; // } // std::vector loadWeights(const std::string weightsFilePath, const std::string& networkType) { assert(fileExists(weightsFilePath)); std::cout << "Loading pre-trained weights..." << std::endl; std::ifstream file(weightsFilePath, std::ios_base::binary); assert(file.good()); std::string line; file.ignore(4); char buf[2]; file.read(buf, 1); if ((int)(unsigned char)buf[0] == 1) { file.ignore(11); } else if ((int)(unsigned char)buf[0] == 2) { file.ignore(15); } else { std::cout << "Invalid network type" << std::endl; assert(0); } std::vector weights; char* floatWeight = new char[4]; while (!file.eof()) { file.read(floatWeight, 4); assert(file.gcount() == 4); weights.push_back(*reinterpret_cast(floatWeight)); if (file.peek() == std::istream::traits_type::eof()) break; } std::cout << "Loading complete!" << std::endl; delete[] floatWeight; // std::cout << "Total Number of weights read : " << weights.size() << std::endl; return weights; } std::string dimsToString(const nvinfer1::Dims d) { std::stringstream s; assert(d.nbDims >= 1); for (int i = 0; i < d.nbDims - 1; ++i) { s << std::setw(4) << d.d[i] << " x"; } s << std::setw(4) << d.d[d.nbDims - 1]; return s.str(); } nvinfer1::ILayer* netAddMaxpool(int layerIdx, std::map& block, nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network) { assert(block.at("type") == "maxpool"); assert(block.find("size") != block.end()); assert(block.find("stride") != block.end()); int size = std::stoi(block.at("size")); int stride = std::stoi(block.at("stride")); nvinfer1::IPoolingLayer* pool = network->addPoolingNd(*input, nvinfer1::PoolingType::kMAX, nvinfer1::DimsHW{size, size}); assert(pool); std::string maxpoolLayerName = "maxpool_" + std::to_string(layerIdx); int pad = (size - 1) / 2; pool->setPaddingNd(nvinfer1::DimsHW{pad,pad}); pool->setStrideNd(nvinfer1::DimsHW{stride, stride}); pool->setName(maxpoolLayerName.c_str()); return pool; } nvinfer1::ILayer* netAddConvLinear(int layerIdx, std::map& block, std::vector& weights, std::vector& trtWeights, int& weightPtr, int& inputChannels, nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network) { assert(block.at("type") == "convolutional"); assert(block.find("batch_normalize") == block.end()); assert(block.at("activation") == "linear"); assert(block.find("filters") != block.end()); assert(block.find("pad") != block.end()); assert(block.find("size") != block.end()); assert(block.find("stride") != block.end()); int filters = std::stoi(block.at("filters")); int padding = std::stoi(block.at("pad")); int kernelSize = std::stoi(block.at("size")); int stride = std::stoi(block.at("stride")); int pad; if (padding) pad = (kernelSize - 1) / 2; else pad = 0; // load the convolution layer bias nvinfer1::Weights convBias{nvinfer1::DataType::kFLOAT, nullptr, filters}; float* val = new float[filters]; for (int i = 0; i < filters; ++i) { val[i] = weights[weightPtr]; weightPtr++; } convBias.values = val; trtWeights.push_back(convBias); // load the convolutional layer weights int size = filters * inputChannels * kernelSize * kernelSize; nvinfer1::Weights convWt{nvinfer1::DataType::kFLOAT, nullptr, size}; val = new float[size]; for (int i = 0; i < size; ++i) { val[i] = weights[weightPtr]; weightPtr++; } convWt.values = val; trtWeights.push_back(convWt); nvinfer1::IConvolutionLayer* conv = network->addConvolution( *input, filters, nvinfer1::DimsHW{kernelSize, kernelSize}, convWt, convBias); assert(conv != nullptr); std::string convLayerName = "conv_" + std::to_string(layerIdx); conv->setName(convLayerName.c_str()); conv->setStride(nvinfer1::DimsHW{stride, stride}); conv->setPadding(nvinfer1::DimsHW{pad, pad}); return conv; } nvinfer1::ILayer* net_conv_bn_mish(int layerIdx, std::map& block, std::vector& weights, std::vector& trtWeights, int& weightPtr, int& inputChannels, nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network) { assert(block.at("type") == "convolutional"); assert(block.find("batch_normalize") != block.end()); assert(block.at("batch_normalize") == "1"); assert(block.at("activation") == "mish"); assert(block.find("filters") != block.end()); assert(block.find("pad") != block.end()); assert(block.find("size") != block.end()); assert(block.find("stride") != block.end()); bool batchNormalize, bias; if (block.find("batch_normalize") != block.end()) { batchNormalize = (block.at("batch_normalize") == "1"); bias = false; } else { batchNormalize = false; bias = true; } // all conv_bn_leaky layers assume bias is false assert(batchNormalize == true && bias == false); int filters = std::stoi(block.at("filters")); int padding = std::stoi(block.at("pad")); int kernelSize = std::stoi(block.at("size")); int stride = std::stoi(block.at("stride")); int pad; if (padding) pad = (kernelSize - 1) / 2; else pad = 0; std::vector bnBiases; for (int i = 0; i < filters; ++i) { bnBiases.push_back(weights[weightPtr]); weightPtr++; } // load BN weights std::vector bnWeights; for (int i = 0; i < filters; ++i) { bnWeights.push_back(weights[weightPtr]); weightPtr++; } // load BN running_mean std::vector bnRunningMean; for (int i = 0; i < filters; ++i) { bnRunningMean.push_back(weights[weightPtr]); weightPtr++; } // load BN running_var std::vector bnRunningVar; for (int i = 0; i < filters; ++i) { // 1e-05 for numerical stability bnRunningVar.push_back(sqrt(weights[weightPtr] + 1.0e-5f)); weightPtr++; } // load Conv layer weights (GKCRS) int size = filters * inputChannels * kernelSize * kernelSize; nvinfer1::Weights convWt{ nvinfer1::DataType::kFLOAT, nullptr, size }; float* val = new float[size]; for (int i = 0; i < size; ++i) { val[i] = weights[weightPtr]; weightPtr++; } convWt.values = val; trtWeights.push_back(convWt); nvinfer1::Weights convBias{ nvinfer1::DataType::kFLOAT, nullptr, 0 }; trtWeights.push_back(convBias); nvinfer1::IConvolutionLayer* conv = network->addConvolution( *input, filters, nvinfer1::DimsHW{ kernelSize, kernelSize }, convWt, convBias); assert(conv != nullptr); std::string convLayerName = "conv_" + std::to_string(layerIdx); conv->setName(convLayerName.c_str()); conv->setStride(nvinfer1::DimsHW{ stride, stride }); conv->setPadding(nvinfer1::DimsHW{ pad, pad }); /***** BATCHNORM LAYER *****/ /***************************/ size = filters; // create the weights nvinfer1::Weights shift{ nvinfer1::DataType::kFLOAT, nullptr, size }; nvinfer1::Weights scale{ nvinfer1::DataType::kFLOAT, nullptr, size }; nvinfer1::Weights power{ nvinfer1::DataType::kFLOAT, nullptr, size }; float* shiftWt = new float[size]; for (int i = 0; i < size; ++i) { shiftWt[i] = bnBiases.at(i) - ((bnRunningMean.at(i) * bnWeights.at(i)) / bnRunningVar.at(i)); } shift.values = shiftWt; float* scaleWt = new float[size]; for (int i = 0; i < size; ++i) { scaleWt[i] = bnWeights.at(i) / bnRunningVar[i]; } scale.values = scaleWt; float* powerWt = new float[size]; for (int i = 0; i < size; ++i) { powerWt[i] = 1.0; } power.values = powerWt; trtWeights.push_back(shift); trtWeights.push_back(scale); trtWeights.push_back(power); // Add the batch norm layers nvinfer1::IScaleLayer* bn = network->addScale( *conv->getOutput(0), nvinfer1::ScaleMode::kCHANNEL, shift, scale, power); assert(bn != nullptr); std::string bnLayerName = "batch_norm_" + std::to_string(layerIdx); bn->setName(bnLayerName.c_str()); /***** ACTIVATION LAYER *****/ /****************************/ auto creator = getPluginRegistry()->getPluginCreator("Mish_TRT", "1"); const nvinfer1::PluginFieldCollection* pluginData = creator->getFieldNames(); nvinfer1::IPluginV2 *pluginObj = creator->createPlugin(("mish" + std::to_string(layerIdx)).c_str(), pluginData); nvinfer1::ITensor* inputTensors[] = { bn->getOutput(0) }; auto mish = network->addPluginV2(&inputTensors[0], 1, *pluginObj); return mish; } int getNumChannels(nvinfer1::ITensor* t) { nvinfer1::Dims d = t->getDimensions(); assert(d.nbDims == 3); return d.d[0]; } std::vector split_layer_index(const std::string &s_,const std::string &delimiter_) { std::vector index; std::string s = s_; size_t pos = 0; std::string token; while ((pos = s.find(delimiter_)) != std::string::npos) { token = s.substr(0, pos); index.push_back(std::stoi(trim(token))); s.erase(0, pos + delimiter_.length()); } index.push_back(std::stoi(trim(s))); return index; } void printLayerInfo(std::string layerIndex, std::string layerName, std::string layerInput, std::string layerOutput, std::string weightPtr) { std::cout << std::setw(6) << std::left << layerIndex << std::setw(15) << std::left << layerName; std::cout << std::setw(20) << std::left << layerInput << std::setw(20) << std::left << layerOutput; std::cout << std::setw(6) << std::left << weightPtr << std::endl; } uint64_t get3DTensorVolume(nvinfer1::Dims inputDims) { assert(inputDims.nbDims == 3); return inputDims.d[0] * inputDims.d[1] * inputDims.d[2]; } nvinfer1::ICudaEngine* loadTRTEngine(const std::string planFilePath, PluginFactory* pluginFactory, Logger& logger) { // reading the model in memory std::cout << "Loading TRT Engine..." << std::endl; assert(fileExists(planFilePath)); std::stringstream trtModelStream; trtModelStream.seekg(0, trtModelStream.beg); std::ifstream cache(planFilePath,std::ios::binary | std::ios::in); assert(cache.good()); trtModelStream << cache.rdbuf(); cache.close(); // calculating model size trtModelStream.seekg(0, std::ios::end); const auto modelSize = trtModelStream.tellg(); trtModelStream.seekg(0, std::ios::beg); void* modelMem = malloc(modelSize); trtModelStream.read((char*) modelMem, modelSize); nvinfer1::IRuntime* runtime = nvinfer1::createInferRuntime(logger); std::cout << "test................................" << std::endl; nvinfer1::ICudaEngine* engine = runtime->deserializeCudaEngine(modelMem, modelSize, pluginFactory); free(modelMem); runtime->destroy(); std::cout << "Loading Complete!" << std::endl; return engine; } std::vector nmsAllClasses(const float nmsThresh, std::vector& binfo, const uint32_t numClasses, const std::string &model_type) { std::vector result; std::vector> splitBoxes(numClasses); for (auto& box : binfo) { splitBoxes.at(box.label).push_back(box); } for (auto& boxes : splitBoxes) { boxes = nonMaximumSuppression(nmsThresh, boxes); result.insert(result.end(), boxes.begin(), boxes.end()); } return result; } std::vector nonMaximumSuppression(const float nmsThresh, std::vector binfo) { auto overlap1D = [](float x1min, float x1max, float x2min, float x2max) -> float { if (x1min > x2min) { std::swap(x1min, x2min); std::swap(x1max, x2max); } return x1max < x2min ? 0 : std::min(x1max, x2max) - x2min; }; auto computeIoU = [&overlap1D](BBox& bbox1, BBox& bbox2) -> float { float overlapX = overlap1D(bbox1.x1, bbox1.x2, bbox2.x1, bbox2.x2); float overlapY = overlap1D(bbox1.y1, bbox1.y2, bbox2.y1, bbox2.y2); float area1 = (bbox1.x2 - bbox1.x1) * (bbox1.y2 - bbox1.y1); float area2 = (bbox2.x2 - bbox2.x1) * (bbox2.y2 - bbox2.y1); float overlap2D = overlapX * overlapY; float u = area1 + area2 - overlap2D; return u == 0 ? 0 : overlap2D / u; }; std::stable_sort(binfo.begin(), binfo.end(), [](const BBoxInfo& b1, const BBoxInfo& b2) { return b1.prob > b2.prob; }); std::vector out; for (auto& i : binfo) { bool keep = true; for (auto& j : out) { if (keep) { float overlap = computeIoU(i.box, j.box); keep = overlap <= nmsThresh; } else break; } if (keep) out.push_back(i); } return out; } nvinfer1::ILayer* netAddUpsample(int layerIdx, std::map& block, std::vector& weights, std::vector& trtWeights, int& inputChannels, nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network) { assert(block.at("type") == "upsample"); nvinfer1::Dims inpDims = input->getDimensions(); assert(inpDims.nbDims == 3); // assert(inpDims.d[1] == inpDims.d[2]); int n_scale = std::stoi(block.at("stride")); int c1 = inpDims.d[0]; float *deval = new float[c1*n_scale*n_scale]; for (int i = 0; i < c1*n_scale*n_scale; i++) { deval[i] = 1.0; } nvinfer1::Weights wts{ DataType::kFLOAT, deval, c1*n_scale*n_scale }; nvinfer1::Weights bias{ DataType::kFLOAT, nullptr, 0 }; IDeconvolutionLayer* upsample = network->addDeconvolutionNd(*input, c1, DimsHW{ n_scale, n_scale }, wts, bias); upsample->setStrideNd(DimsHW{ n_scale, n_scale }); upsample->setNbGroups(c1); return upsample; #if 0 #endif } nvinfer1::ILayer * layer_split(const int n_layer_index_, nvinfer1::ITensor *input_, nvinfer1::INetworkDefinition* network) { auto creator = getPluginRegistry()->getPluginCreator("CHUNK_TRT", "1.0"); const nvinfer1::PluginFieldCollection* pluginData = creator->getFieldNames(); nvinfer1::IPluginV2 *pluginObj = creator->createPlugin(("chunk" + std::to_string(n_layer_index_)).c_str(), pluginData); auto chunk = network->addPluginV2(&input_, 1, *pluginObj); return chunk; } nvinfer1::ILayer* netAddConvBNLeaky(int layerIdx, std::map& block, std::vector& weights, std::vector& trtWeights, int& weightPtr, int& inputChannels, nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network) { assert(block.at("type") == "convolutional"); assert(block.find("batch_normalize") != block.end()); assert(block.at("batch_normalize") == "1"); assert(block.at("activation") == "leaky"); assert(block.find("filters") != block.end()); assert(block.find("pad") != block.end()); assert(block.find("size") != block.end()); assert(block.find("stride") != block.end()); bool batchNormalize, bias; if (block.find("batch_normalize") != block.end()) { batchNormalize = (block.at("batch_normalize") == "1"); bias = false; } else { batchNormalize = false; bias = true; } // all conv_bn_leaky layers assume bias is false assert(batchNormalize == true && bias == false); int filters = std::stoi(block.at("filters")); int padding = std::stoi(block.at("pad")); int kernelSize = std::stoi(block.at("size")); int stride = std::stoi(block.at("stride")); int pad; if (padding) pad = (kernelSize - 1) / 2; else pad = 0; /***** CONVOLUTION LAYER *****/ /*****************************/ // batch norm weights are before the conv layer // load BN biases (bn_biases) std::vector bnBiases; for (int i = 0; i < filters; ++i) { bnBiases.push_back(weights[weightPtr]); weightPtr++; } // load BN weights std::vector bnWeights; for (int i = 0; i < filters; ++i) { bnWeights.push_back(weights[weightPtr]); weightPtr++; } // load BN running_mean std::vector bnRunningMean; for (int i = 0; i < filters; ++i) { bnRunningMean.push_back(weights[weightPtr]); weightPtr++; } // load BN running_var std::vector bnRunningVar; for (int i = 0; i < filters; ++i) { // 1e-05 for numerical stability bnRunningVar.push_back(sqrt(weights[weightPtr] + 1.0e-5f)); weightPtr++; } // load Conv layer weights (GKCRS) int size = filters * inputChannels * kernelSize * kernelSize; nvinfer1::Weights convWt{nvinfer1::DataType::kFLOAT, nullptr, size}; float* val = new float[size]; for (int i = 0; i < size; ++i) { val[i] = weights[weightPtr]; weightPtr++; } convWt.values = val; trtWeights.push_back(convWt); nvinfer1::Weights convBias{nvinfer1::DataType::kFLOAT, nullptr, 0}; trtWeights.push_back(convBias); nvinfer1::IConvolutionLayer* conv = network->addConvolution( *input, filters, nvinfer1::DimsHW{kernelSize, kernelSize}, convWt, convBias); assert(conv != nullptr); std::string convLayerName = "conv_" + std::to_string(layerIdx); conv->setName(convLayerName.c_str()); conv->setStride(nvinfer1::DimsHW{stride, stride}); conv->setPadding(nvinfer1::DimsHW{pad, pad}); /***** BATCHNORM LAYER *****/ /***************************/ size = filters; // create the weights nvinfer1::Weights shift{nvinfer1::DataType::kFLOAT, nullptr, size}; nvinfer1::Weights scale{nvinfer1::DataType::kFLOAT, nullptr, size}; nvinfer1::Weights power{nvinfer1::DataType::kFLOAT, nullptr, size}; float* shiftWt = new float[size]; for (int i = 0; i < size; ++i) { shiftWt[i] = bnBiases.at(i) - ((bnRunningMean.at(i) * bnWeights.at(i)) / bnRunningVar.at(i)); } shift.values = shiftWt; float* scaleWt = new float[size]; for (int i = 0; i < size; ++i) { scaleWt[i] = bnWeights.at(i) / bnRunningVar[i]; } scale.values = scaleWt; float* powerWt = new float[size]; for (int i = 0; i < size; ++i) { powerWt[i] = 1.0; } power.values = powerWt; trtWeights.push_back(shift); trtWeights.push_back(scale); trtWeights.push_back(power); // Add the batch norm layers nvinfer1::IScaleLayer* bn = network->addScale( *conv->getOutput(0), nvinfer1::ScaleMode::kCHANNEL, shift, scale, power); assert(bn != nullptr); std::string bnLayerName = "batch_norm_" + std::to_string(layerIdx); bn->setName(bnLayerName.c_str()); /***** ACTIVATION LAYER *****/ /****************************/ auto leaky = network->addActivation(*bn->getOutput(0),nvinfer1::ActivationType::kLEAKY_RELU); leaky->setAlpha(0.1f); /*nvinfer1::IPlugin* leakyRELU = nvinfer1::plugin::createPReLUPlugin(0.1); assert(leakyRELU != nullptr); nvinfer1::ITensor* bnOutput = bn->getOutput(0); nvinfer1::IPluginLayer* leaky = network->addPlugin(&bnOutput, 1, *leakyRELU);*/ assert(leaky != nullptr); std::string leakyLayerName = "leaky_" + std::to_string(layerIdx); leaky->setName(leakyLayerName.c_str()); return leaky; } std::vector loadListFromTextFile(const std::string filename) { assert(fileExists(filename)); std::vector list; std::ifstream f(filename); if (!f) { std::cout << "failed to open " << filename; assert(0); } std::string line; while (std::getline(f, line)) { if (line.empty()) continue; else list.push_back(trim(line)); } return list; } std::vector loadImageList(const std::string filename, const std::string prefix) { std::vector fileList = loadListFromTextFile(filename); for (auto& file : fileList) { if (fileExists(file, false)) continue; else { std::string prefixed = prefix + file; if (fileExists(prefixed, false)) file = prefixed; else std::cerr << "WARNING: couldn't find: " << prefixed << " while loading: " << filename << std::endl; } } return fileList; }