From 168af40fe9a3cc81c6ee16b3e81f154780c36bdb Mon Sep 17 00:00:00 2001 From: Scheaven <xuepengqiang> Date: 星期四, 03 六月 2021 15:03:27 +0800 Subject: [PATCH] up new v4 --- lib/detecter_tools/darknet/avgpool_layer_kernels.cu | 116 +++++++++++++++++++++++++++++----------------------------- 1 files changed, 58 insertions(+), 58 deletions(-) diff --git a/lib/detecter_tools/darknet/avgpool_layer_kernels.cu b/lib/detecter_tools/darknet/avgpool_layer_kernels.cu index 43f6c33..b8cdd60 100644 --- a/lib/detecter_tools/darknet/avgpool_layer_kernels.cu +++ b/lib/detecter_tools/darknet/avgpool_layer_kernels.cu @@ -1,58 +1,58 @@ -#include <cuda_runtime.h> -#include <curand.h> -#include <cublas_v2.h> - -#include "avgpool_layer.h" -#include "dark_cuda.h" - -__global__ void forward_avgpool_layer_kernel(int n, int w, int h, int c, float *input, float *output) -{ - int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - if(id >= n) return; - - int k = id % c; - id /= c; - int b = id; - - int i; - int out_index = (k + c*b); - output[out_index] = 0; - for(i = 0; i < w*h; ++i){ - int in_index = i + h*w*(k + b*c); - output[out_index] += input[in_index]; - } - output[out_index] /= w*h; -} - -__global__ void backward_avgpool_layer_kernel(int n, int w, int h, int c, float *in_delta, float *out_delta) -{ - int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - if(id >= n) return; - - int k = id % c; - id /= c; - int b = id; - - int i; - int out_index = (k + c*b); - for(i = 0; i < w*h; ++i){ - int in_index = i + h*w*(k + b*c); - in_delta[in_index] += out_delta[out_index] / (w*h); - } -} - -extern "C" void forward_avgpool_layer_gpu(avgpool_layer layer, network_state state) -{ - size_t n = layer.c*layer.batch; - - forward_avgpool_layer_kernel<<<cuda_gridsize(n), BLOCK, 0, get_cuda_stream() >>>(n, layer.w, layer.h, layer.c, state.input, layer.output_gpu); - CHECK_CUDA(cudaPeekAtLastError()); -} - -extern "C" void backward_avgpool_layer_gpu(avgpool_layer layer, network_state state) -{ - size_t n = layer.c*layer.batch; - - backward_avgpool_layer_kernel<<<cuda_gridsize(n), BLOCK, 0, get_cuda_stream() >>>(n, layer.w, layer.h, layer.c, state.delta, layer.delta_gpu); - CHECK_CUDA(cudaPeekAtLastError()); -} +#include <cuda_runtime.h> +#include <curand.h> +#include <cublas_v2.h> + +#include "avgpool_layer.h" +#include "dark_cuda.h" + +__global__ void forward_avgpool_layer_kernel(int n, int w, int h, int c, float *input, float *output) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(id >= n) return; + + int k = id % c; + id /= c; + int b = id; + + int i; + int out_index = (k + c*b); + output[out_index] = 0; + for(i = 0; i < w*h; ++i){ + int in_index = i + h*w*(k + b*c); + output[out_index] += input[in_index]; + } + output[out_index] /= w*h; +} + +__global__ void backward_avgpool_layer_kernel(int n, int w, int h, int c, float *in_delta, float *out_delta) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(id >= n) return; + + int k = id % c; + id /= c; + int b = id; + + int i; + int out_index = (k + c*b); + for(i = 0; i < w*h; ++i){ + int in_index = i + h*w*(k + b*c); + in_delta[in_index] += out_delta[out_index] / (w*h); + } +} + +extern "C" void forward_avgpool_layer_gpu(avgpool_layer layer, network_state state) +{ + size_t n = layer.c*layer.batch; + + forward_avgpool_layer_kernel<<<cuda_gridsize(n), BLOCK, 0, get_cuda_stream() >>>(n, layer.w, layer.h, layer.c, state.input, layer.output_gpu); + CHECK_CUDA(cudaPeekAtLastError()); +} + +extern "C" void backward_avgpool_layer_gpu(avgpool_layer layer, network_state state) +{ + size_t n = layer.c*layer.batch; + + backward_avgpool_layer_kernel<<<cuda_gridsize(n), BLOCK, 0, get_cuda_stream() >>>(n, layer.w, layer.h, layer.c, state.delta, layer.delta_gpu); + CHECK_CUDA(cudaPeekAtLastError()); +} -- Gitblit v1.8.0