//sys
|
#include <cmath>
|
#include <stdio.h>
|
#include <cassert>
|
#include <iostream>
|
#include <cuda_runtime.h>
|
#include <cuda.h>
|
#include <stdint.h>
|
#include <string.h>
|
//my
|
#include "hardswish.h"
|
|
#define NV_CUDA_CHECK(status) \
|
{ \
|
if (status != 0) \
|
{ \
|
std::cout << "Cuda failure: " << cudaGetErrorString(status) << " in file " << __FILE__ \
|
<< " at line " << __LINE__ << std::endl; \
|
abort(); \
|
} \
|
}
|
|
namespace nvinfer1
|
{
|
Hardswish::Hardswish()
|
{
|
cudaDeviceProp prop;
|
cudaGetDeviceProperties(&prop, 0);
|
_n_max_thread_pre_block = prop.maxThreadsPerBlock;
|
// printf("Hardswish():%d\n", _n_max_thread_pre_block);
|
}
|
|
Hardswish::Hardswish(const void* data, size_t length)
|
{
|
const char *d = reinterpret_cast<const char*>(data), *a = d;
|
r(d, _n_max_thread_pre_block);
|
r(d, _n_output_size);
|
// printf("r:threads:%d,size:%d\n", _n_max_thread_pre_block, _n_output_size);
|
assert(d == a + length);
|
}
|
|
Hardswish::~Hardswish()
|
{}
|
|
__global__ void kernel_hardswish(const float *input_, float *output_, int n_data_size_)
|
{
|
int i = threadIdx.x + blockIdx.x * blockDim.x;
|
if (i >= n_data_size_)return;
|
if (input_[i] >= 3.0f)
|
{
|
output_[i] = input_[i];
|
}
|
else if (input_[i] <= -3.0f)
|
{
|
output_[i] = 0.0f;
|
}
|
else
|
{
|
output_[i] = input_[i] * (input_[i] + 3.0f) / 6.0f;
|
}
|
}
|
|
cudaError_t cuda_hardswish_layer(const void* input_,
|
void* output_,
|
const int n_batch_size_,
|
const int n_output_size_,
|
const int threads_,
|
cudaStream_t stream_)
|
{
|
int n_data_size = n_batch_size_ * n_output_size_;
|
// printf("cuda_hardswish_layer:%d,size:%d\n", n_batch_size_, n_output_size_);
|
kernel_hardswish << <(n_data_size + threads_ -1)/threads_, threads_ >> >(
|
reinterpret_cast<const float*>(input_),
|
reinterpret_cast<float*>(output_),
|
n_data_size);
|
return cudaGetLastError();
|
}
|
|
int Hardswish::enqueue(int batchSize, const void* const* inputs, void** outputs, void* workspace,
|
cudaStream_t stream)
|
{
|
// printf("batch_size:%d,output_size:%d,threads:%d\n", batchSize, _n_output_size, _n_max_thread_pre_block);
|
NV_CUDA_CHECK(cuda_hardswish_layer(inputs[0], outputs[0], batchSize, _n_output_size , _n_max_thread_pre_block,stream));
|
return 0;
|
}
|
|
size_t Hardswish::getSerializationSize() const
|
{
|
return sizeof(_n_max_thread_pre_block) +sizeof(_n_output_size);
|
}
|
|
void Hardswish::serialize(void *buffer) const
|
{
|
char *d = static_cast<char*>(buffer), *a = d;
|
w(d, _n_max_thread_pre_block);
|
w(d, _n_output_size);
|
// printf("serialize:%d,%d\n", _n_max_thread_pre_block, _n_output_size);
|
assert(d == a + getSerializationSize());
|
}
|
|
void Hardswish::configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput)
|
{
|
|
_n_output_size = in->dims.d[0] * in->dims.d[1] * in->dims.d[2];
|
// printf("configurePlugin:%d,%d,%d\n", in->dims.d[0], in->dims.d[1], in->dims.d[2]);
|
}
|
IPluginV2IOExt* Hardswish::clone() const
|
{
|
Hardswish *p = new Hardswish();
|
p->setPluginNamespace(_s_plugin_namespace.c_str());
|
p->_n_max_thread_pre_block = _n_max_thread_pre_block;
|
p->_n_output_size = _n_output_size;
|
// printf("clone:%d,%d\n", _n_max_thread_pre_block, _n_output_size);
|
return p;
|
}
|
|
|
//
|
PluginFieldCollection HardswishPluginCreator::_fc{};
|
std::vector<PluginField> HardswishPluginCreator::_vec_plugin_attributes;
|
|
HardswishPluginCreator::HardswishPluginCreator()
|
{
|
_vec_plugin_attributes.clear();
|
_fc.nbFields = _vec_plugin_attributes.size();
|
_fc.fields = _vec_plugin_attributes.data();
|
}
|
|
const char* HardswishPluginCreator::getPluginName() const
|
{
|
return "HARDSWISH_TRT";
|
}
|
|
const char* HardswishPluginCreator::getPluginVersion() const
|
{
|
return "1.0";
|
}
|
|
const PluginFieldCollection* HardswishPluginCreator::getFieldNames()
|
{
|
return &_fc;
|
}
|
|
IPluginV2IOExt* HardswishPluginCreator::createPlugin(const char* name, const PluginFieldCollection* fc)
|
{
|
Hardswish* obj = new Hardswish();
|
obj->setPluginNamespace(_s_name_space.c_str());
|
return obj;
|
}
|
|
IPluginV2IOExt* HardswishPluginCreator::deserializePlugin(const char* name, const void* serialData, size_t serialLength)
|
{
|
Hardswish* obj = new Hardswish(serialData, serialLength);
|
obj->setPluginNamespace(_s_name_space.c_str());
|
return obj;
|
}
|
|
void HardswishPluginCreator::setPluginNamespace(const char* libNamespace)
|
{
|
_s_name_space = libNamespace;
|
}
|
|
const char* HardswishPluginCreator::getPluginNamespace() const
|
{
|
return _s_name_space.c_str();
|
}
|
}//end namespace nvinfer1
|