#ifndef _MODEL_H_ #define _MODEL_H_ #include "plugin_factory.h" #include "trt_utils.h" #include "calibrator.h" #include "NvInfer.h" #include "NvInferPlugin.h" #include "NvInferRuntimeCommon.h" #include "cuda_runtime_api.h" #include #include #include #include #include #include "../utils/time_util.h" #include "../config.h" #include "opencv2/opencv.hpp" #include struct NetworkInfo { std::string networkType; std::string labelsFilePath; std::string precision; std::string deviceType; std::string calibrationTablePath; std::string enginePath; std::string inputBlobName; std::string data_path; }; struct InferParams { bool printPerfInfo; bool printPredictionInfo; std::string calibImages; std::string calibImagesPath; float probThresh; float nmsThresh; }; struct TensorInfo { std::string blobName; uint32_t stride{0}; uint32_t stride_h{0}; uint32_t stride_w{0}; uint32_t gridSize{0}; uint32_t grid_h{ 0 }; uint32_t grid_w{ 0 }; uint32_t numClasses{0}; uint32_t numBBoxes{0}; uint64_t volume{0}; std::vector masks; std::vector anchors; int bindingIndex{-1}; float* hostBuffer{nullptr}; }; class Detecter { public: float getNMSThresh() const { return m_NMSThresh; } std::string getClassName(const int& label) const { return m_ClassNames.at(label); } int getClassId(const int& label) const { return m_ClassIds.at(label); } uint32_t getInputH() const { return m_InputH; } uint32_t getInputW() const { return m_InputW; } uint32_t getNumClasses() const { return static_cast(m_ClassNames.size()); } void doInference(const unsigned char* input, const uint32_t batchSize); std::vector decodeDetections(const int& imageIdx, const int& imageH, const int& imageW); Detecter(const NetworkInfo& networkInfo, const InferParams& inferParams, int type); ~Detecter(); std::string m_EnginePath; const std::string m_DeviceType; const std::string m_InputBlobName; std::vector m_OutputTensors; std::vector> m_configBlocks; uint32_t m_InputH; uint32_t m_InputW; uint32_t m_InputC; uint64_t m_InputSize; uint32_t _n_classes = 0; float _f_depth_multiple = 0; float _f_width_multiple = 0; const float m_ProbThresh; const float m_NMSThresh; std::vector m_ClassNames; const std::vector m_ClassIds{ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90}; uint32_t m_BatchSize = 1; nvinfer1::INetworkDefinition* m_Network; nvinfer1::IBuilder* m_Builder ; nvinfer1::IHostMemory* m_ModelStream; nvinfer1::ICudaEngine* m_Engine; nvinfer1::IExecutionContext* m_Context; std::vector m_DeviceBuffers; int m_InputBindingIndex; cudaStream_t m_CudaStream; PluginFactory* m_PluginFactory; std::vector decodeTensor(const int imageIdx, const int imageH, const int imageW, const TensorInfo& tensor); BBox convert_bbox_res(const float& bx, const float& by, const float& bw, const float& bh, const uint32_t& stride_h_, const uint32_t& stride_w_, const uint32_t& netW, const uint32_t& netH) { BBox b; float x = bx * stride_w_; float y = by * stride_h_; b.x1 = x - bw / 2; b.x2 = x + bw / 2; b.y1 = y - bh / 2; b.y2 = y + bh / 2; b.x1 = clamp(b.x1, 0, netW); b.x2 = clamp(b.x2, 0, netW); b.y1 = clamp(b.y1, 0, netH); b.y2 = clamp(b.y2, 0, netH); return b; } inline void add_bbox_proposal(const float bx, const float by, const float bw, const float bh, const uint32_t stride_h_, const uint32_t stride_w_, const float scaleH, const float scaleW, const float xoffset_, const float yoffset, const int maxIndex, const float maxProb, const uint32_t image_w, const uint32_t image_h, std::vector& binfo) { BBoxInfo bbi; bbi.box = convert_bbox_res(bx, by, bw, bh, stride_h_, stride_w_, m_InputW, m_InputH); if ((bbi.box.x1 > bbi.box.x2) || (bbi.box.y1 > bbi.box.y2)) { return; } bbi.box.x1 = ((float)bbi.box.x1 / (float)m_InputW)*(float)image_w; bbi.box.y1 = ((float)bbi.box.y1 / (float)m_InputH)*(float)image_h; bbi.box.x2 = ((float)bbi.box.x2 / (float)m_InputW)*(float)image_w; bbi.box.y2 = ((float)bbi.box.y2 / (float)m_InputH)*(float)image_h; bbi.label = maxIndex; bbi.prob = maxProb; bbi.classId = getClassId(maxIndex); binfo.push_back(bbi); }; inline int64_t volume(const nvinfer1::Dims& d) { return std::accumulate(d.d,d.d+d.nbDims,1,std::multiplies()); } inline unsigned int getElementSize(nvinfer1::DataType t) { switch (t) { case nvinfer1::DataType::kINT32: return 4; case nvinfer1::DataType::kFLOAT: return 4; case nvinfer1::DataType::kHALF: return 2; case nvinfer1::DataType::kBOOL: case nvinfer1::DataType::kINT8: return 1; } throw std::runtime_error("Invalid DataType."); return 0; } void setOutput(int type); private: Logger m_Logger; void createYOLOEngine(const nvinfer1::DataType dataType = nvinfer1::DataType::kFLOAT, Int8EntropyCalibrator* calibrator = nullptr); void writePlanFileToDisk(); std::vector> parseConfigFile(const std::string cfgFilePath); void parseConfigBlocks(); void allocateBuffers(); bool verifyEngine(); void destroyNetworkUtils(std::vector& trtWeights); protected: const std::string m_NetworkType; std::unique_ptr m_TinyMaxpoolPaddingFormula; private: Timer _timer; int _n_yolo_ind = 0; }; #endif