~suntianyu/baseDetector.git

//sys
#include <cmath>
#include <stdio.h>
#include <cassert>
#include <iostream>
#include <cuda_runtime.h>
#include <cuda.h>
#include <stdint.h>
#include <string.h>
//my
#include "hardswish.h"
 
#define NV_CUDA_CHECK(status)                                                                      \
    {                                                                                              \
        if (status != 0)                                                                           \
        {                                                                                          \
            std::cout << "Cuda failure: " << cudaGetErrorString(status) << " in file " << __FILE__ \
                      << " at line " << __LINE__ << std::endl;                                     \
            abort();                                                                               \
        }                                                                                          \
    }
 
namespace nvinfer1 {
    Hardswish::Hardswish() {
        cudaDeviceProp prop;
        cudaGetDeviceProperties(&prop, 0);
        _n_max_thread_pre_block = prop.maxThreadsPerBlock;
        //    printf("Hardswish():%d\n", _n_max_thread_pre_block);
    }
 
    Hardswish::Hardswish(const void *data, size_t length) {
        const char *d = reinterpret_cast<const char *>(data), *a = d;
        r(d, _n_max_thread_pre_block);
        r(d, _n_output_size);
//        printf("r:threads:%d,size:%d\n", _n_max_thread_pre_block, _n_output_size);
        assert(d == a + length);
    }
 
    Hardswish::~Hardswish() {}
 
    __global__ void kernel_hardswish(const float *input_, float *output_, int n_data_size_) {
        int i = threadIdx.x + blockIdx.x * blockDim.x;
        if (i >= n_data_size_)return;
        if (input_[i] >= 3.0f) {
            output_[i] = input_[i];
        } else if (input_[i] <= -3.0f) {
            output_[i] = 0.0f;
        } else {
            output_[i] = input_[i] * (input_[i] + 3.0f) / 6.0f;
        }
    }
 
    cudaError_t cuda_hardswish_layer(const void *input_,
                                     void *output_,
                                     const int n_batch_size_,
                                     const int n_output_size_,
                                     const int threads_,
                                     cudaStream_t stream_) {
        int n_data_size = n_batch_size_ * n_output_size_;
//        printf("cuda_hardswish_layer:%d,size:%d\n", n_batch_size_, n_output_size_);
        kernel_hardswish << < (n_data_size + threads_ - 1) / threads_, threads_ >> >(
                reinterpret_cast<const float *>(input_),
                        reinterpret_cast<float *>(output_),
                        n_data_size);
        return cudaGetLastError();
    }
 
    int Hardswish::enqueue(int batchSize,
                           const void *const *inputs,
                           void **outputs,
                           void *workspace,
                           cudaStream_t stream) noexcept {
//        printf("batch_size:%d,output_size:%d,threads:%d\n", batchSize, _n_output_size, _n_max_thread_pre_block);
        NV_CUDA_CHECK(cuda_hardswish_layer(inputs[0], outputs[0], batchSize, _n_output_size, _n_max_thread_pre_block,
                                           stream));
        return 0;
    }
 
    size_t Hardswish::getSerializationSize() const noexcept {
        return sizeof(_n_max_thread_pre_block) + sizeof(_n_output_size);
    }
 
    void Hardswish::serialize(void *buffer) const noexcept {
        char *d = static_cast<char *>(buffer), *a = d;
        w(d, _n_max_thread_pre_block);
        w(d, _n_output_size);
//        printf("serialize:%d,%d\n", _n_max_thread_pre_block, _n_output_size);
        assert(d == a + getSerializationSize());
    }
 
 
    bool Hardswish::supportsFormat(DataType type, PluginFormat format) const noexcept {
        return (type == DataType::kFLOAT && format == PluginFormat::kLINEAR);
    }
 
    void Hardswish::configureWithFormat(const Dims *inputDims, int nbInputs,
                                        const Dims *outputDims, int nbOutputs, DataType type, PluginFormat format,
                                        int maxBatchSize) noexcept {
 
    }
 
 
    void Hardswish::configurePlugin(const PluginTensorDesc *in, int nbInput, const PluginTensorDesc *out,
                                    int nbOutput) noexcept {
 
        _n_output_size = in->dims.d[0] * in->dims.d[1] * in->dims.d[2];
//        printf("configurePlugin:%d,%d,%d\n", in->dims.d[0], in->dims.d[1], in->dims.d[2]);
    }
 
    IPluginV2 *Hardswish::clone() const noexcept {
        Hardswish *p = new Hardswish();
        p->setPluginNamespace(_s_plugin_namespace.c_str());
        p->_n_max_thread_pre_block = _n_max_thread_pre_block;
        p->_n_output_size = _n_output_size;
//        printf("clone:%d,%d\n", _n_max_thread_pre_block, _n_output_size);
        return p;
    }
 
 
    //
    PluginFieldCollection HardswishPluginCreator::_fc{};
    std::vector <PluginField> HardswishPluginCreator::_vec_plugin_attributes;
 
    HardswishPluginCreator::HardswishPluginCreator() {
        _vec_plugin_attributes.clear();
        _fc.nbFields = _vec_plugin_attributes.size();
        _fc.fields = _vec_plugin_attributes.data();
    }
 
    const char *HardswishPluginCreator::getPluginName() const noexcept {
        return "HARDSWISH_TRT";
    }
 
    const char *HardswishPluginCreator::getPluginVersion() const noexcept {
        return "1.0";
    }
 
    const PluginFieldCollection *HardswishPluginCreator::getFieldNames() noexcept {
        return &_fc;
    }
 
    IPluginV2 *HardswishPluginCreator::createPlugin(const char *name, const PluginFieldCollection *fc) noexcept {
        Hardswish *obj = new Hardswish();
        obj->setPluginNamespace(_s_name_space.c_str());
        return obj;
    }
 
    IPluginV2 *
    HardswishPluginCreator::deserializePlugin(const char *name, const void *serialData, size_t serialLength) noexcept {
        Hardswish *obj = new Hardswish(serialData, serialLength);
        obj->setPluginNamespace(_s_name_space.c_str());
        return obj;
    }
 
    void HardswishPluginCreator::setPluginNamespace(const char *libNamespace) noexcept {
        _s_name_space = libNamespace;
    }
 
    const char *HardswishPluginCreator::getPluginNamespace() const noexcept {
        return _s_name_space.c_str();
    }
}//end namespace nvinfer1