From 168af40fe9a3cc81c6ee16b3e81f154780c36bdb Mon Sep 17 00:00:00 2001 From: Scheaven <xuepengqiang> Date: 星期四, 03 六月 2021 15:03:27 +0800 Subject: [PATCH] up new v4 --- lib/detecter_tools/darknet/normalization_layer.c | 302 +++++++++++++++++++++++++------------------------- 1 files changed, 151 insertions(+), 151 deletions(-) diff --git a/lib/detecter_tools/darknet/normalization_layer.c b/lib/detecter_tools/darknet/normalization_layer.c index ad0b52b..d6af621 100644 --- a/lib/detecter_tools/darknet/normalization_layer.c +++ b/lib/detecter_tools/darknet/normalization_layer.c @@ -1,151 +1,151 @@ -#include "normalization_layer.h" -#include "blas.h" -#include "utils.h" -#include <stdio.h> - -layer make_normalization_layer(int batch, int w, int h, int c, int size, float alpha, float beta, float kappa) -{ - fprintf(stderr, "Local Response Normalization Layer: %d x %d x %d image, %d size\n", w,h,c,size); - layer layer = { (LAYER_TYPE)0 }; - layer.type = NORMALIZATION; - layer.batch = batch; - layer.h = layer.out_h = h; - layer.w = layer.out_w = w; - layer.c = layer.out_c = c; - layer.kappa = kappa; - layer.size = size; - layer.alpha = alpha; - layer.beta = beta; - layer.output = (float*)xcalloc(h * w * c * batch, sizeof(float)); - layer.delta = (float*)xcalloc(h * w * c * batch, sizeof(float)); - layer.squared = (float*)xcalloc(h * w * c * batch, sizeof(float)); - layer.norms = (float*)xcalloc(h * w * c * batch, sizeof(float)); - layer.inputs = w*h*c; - layer.outputs = layer.inputs; - - layer.forward = forward_normalization_layer; - layer.backward = backward_normalization_layer; - #ifdef GPU - layer.forward_gpu = forward_normalization_layer_gpu; - layer.backward_gpu = backward_normalization_layer_gpu; - - layer.output_gpu = cuda_make_array(layer.output, h * w * c * batch); - layer.delta_gpu = cuda_make_array(layer.delta, h * w * c * batch); - layer.squared_gpu = cuda_make_array(layer.squared, h * w * c * batch); - layer.norms_gpu = cuda_make_array(layer.norms, h * w * c * batch); - #endif - return layer; -} - -void resize_normalization_layer(layer *layer, int w, int h) -{ - int c = layer->c; - int batch = layer->batch; - layer->h = h; - layer->w = w; - layer->out_h = h; - layer->out_w = w; - layer->inputs = w*h*c; - layer->outputs = layer->inputs; - layer->output = (float*)xrealloc(layer->output, h * w * c * batch * sizeof(float)); - layer->delta = (float*)xrealloc(layer->delta, h * w * c * batch * sizeof(float)); - layer->squared = (float*)xrealloc(layer->squared, h * w * c * batch * sizeof(float)); - layer->norms = (float*)xrealloc(layer->norms, h * w * c * batch * sizeof(float)); -#ifdef GPU - cuda_free(layer->output_gpu); - cuda_free(layer->delta_gpu); - cuda_free(layer->squared_gpu); - cuda_free(layer->norms_gpu); - layer->output_gpu = cuda_make_array(layer->output, h * w * c * batch); - layer->delta_gpu = cuda_make_array(layer->delta, h * w * c * batch); - layer->squared_gpu = cuda_make_array(layer->squared, h * w * c * batch); - layer->norms_gpu = cuda_make_array(layer->norms, h * w * c * batch); -#endif -} - -void forward_normalization_layer(const layer layer, network_state state) -{ - int k,b; - int w = layer.w; - int h = layer.h; - int c = layer.c; - scal_cpu(w*h*c*layer.batch, 0, layer.squared, 1); - - for(b = 0; b < layer.batch; ++b){ - float *squared = layer.squared + w*h*c*b; - float *norms = layer.norms + w*h*c*b; - float *input = state.input + w*h*c*b; - pow_cpu(w*h*c, 2, input, 1, squared, 1); - - const_cpu(w*h, layer.kappa, norms, 1); - for(k = 0; k < layer.size/2; ++k){ - axpy_cpu(w*h, layer.alpha, squared + w*h*k, 1, norms, 1); - } - - for(k = 1; k < layer.c; ++k){ - copy_cpu(w*h, norms + w*h*(k-1), 1, norms + w*h*k, 1); - int prev = k - ((layer.size-1)/2) - 1; - int next = k + (layer.size/2); - if(prev >= 0) axpy_cpu(w*h, -layer.alpha, squared + w*h*prev, 1, norms + w*h*k, 1); - if(next < layer.c) axpy_cpu(w*h, layer.alpha, squared + w*h*next, 1, norms + w*h*k, 1); - } - } - pow_cpu(w*h*c*layer.batch, -layer.beta, layer.norms, 1, layer.output, 1); - mul_cpu(w*h*c*layer.batch, state.input, 1, layer.output, 1); -} - -void backward_normalization_layer(const layer layer, network_state state) -{ - // TODO This is approximate ;-) - // Also this should add in to delta instead of overwritting. - - int w = layer.w; - int h = layer.h; - int c = layer.c; - pow_cpu(w*h*c*layer.batch, -layer.beta, layer.norms, 1, state.delta, 1); - mul_cpu(w*h*c*layer.batch, layer.delta, 1, state.delta, 1); -} - -#ifdef GPU -void forward_normalization_layer_gpu(const layer layer, network_state state) -{ - int k,b; - int w = layer.w; - int h = layer.h; - int c = layer.c; - scal_ongpu(w*h*c*layer.batch, 0, layer.squared_gpu, 1); - - for(b = 0; b < layer.batch; ++b){ - float *squared = layer.squared_gpu + w*h*c*b; - float *norms = layer.norms_gpu + w*h*c*b; - float *input = state.input + w*h*c*b; - pow_ongpu(w*h*c, 2, input, 1, squared, 1); - - const_ongpu(w*h, layer.kappa, norms, 1); - for(k = 0; k < layer.size/2; ++k){ - axpy_ongpu(w*h, layer.alpha, squared + w*h*k, 1, norms, 1); - } - - for(k = 1; k < layer.c; ++k){ - copy_ongpu(w*h, norms + w*h*(k-1), 1, norms + w*h*k, 1); - int prev = k - ((layer.size-1)/2) - 1; - int next = k + (layer.size/2); - if(prev >= 0) axpy_ongpu(w*h, -layer.alpha, squared + w*h*prev, 1, norms + w*h*k, 1); - if(next < layer.c) axpy_ongpu(w*h, layer.alpha, squared + w*h*next, 1, norms + w*h*k, 1); - } - } - pow_ongpu(w*h*c*layer.batch, -layer.beta, layer.norms_gpu, 1, layer.output_gpu, 1); - mul_ongpu(w*h*c*layer.batch, state.input, 1, layer.output_gpu, 1); -} - -void backward_normalization_layer_gpu(const layer layer, network_state state) -{ - // TODO This is approximate ;-) - - int w = layer.w; - int h = layer.h; - int c = layer.c; - pow_ongpu(w*h*c*layer.batch, -layer.beta, layer.norms_gpu, 1, state.delta, 1); - mul_ongpu(w*h*c*layer.batch, layer.delta_gpu, 1, state.delta, 1); -} -#endif +#include "normalization_layer.h" +#include "blas.h" +#include "utils.h" +#include <stdio.h> + +layer make_normalization_layer(int batch, int w, int h, int c, int size, float alpha, float beta, float kappa) +{ + fprintf(stderr, "Local Response Normalization Layer: %d x %d x %d image, %d size\n", w,h,c,size); + layer layer = { (LAYER_TYPE)0 }; + layer.type = NORMALIZATION; + layer.batch = batch; + layer.h = layer.out_h = h; + layer.w = layer.out_w = w; + layer.c = layer.out_c = c; + layer.kappa = kappa; + layer.size = size; + layer.alpha = alpha; + layer.beta = beta; + layer.output = (float*)xcalloc(h * w * c * batch, sizeof(float)); + layer.delta = (float*)xcalloc(h * w * c * batch, sizeof(float)); + layer.squared = (float*)xcalloc(h * w * c * batch, sizeof(float)); + layer.norms = (float*)xcalloc(h * w * c * batch, sizeof(float)); + layer.inputs = w*h*c; + layer.outputs = layer.inputs; + + layer.forward = forward_normalization_layer; + layer.backward = backward_normalization_layer; + #ifdef GPU + layer.forward_gpu = forward_normalization_layer_gpu; + layer.backward_gpu = backward_normalization_layer_gpu; + + layer.output_gpu = cuda_make_array(layer.output, h * w * c * batch); + layer.delta_gpu = cuda_make_array(layer.delta, h * w * c * batch); + layer.squared_gpu = cuda_make_array(layer.squared, h * w * c * batch); + layer.norms_gpu = cuda_make_array(layer.norms, h * w * c * batch); + #endif + return layer; +} + +void resize_normalization_layer(layer *layer, int w, int h) +{ + int c = layer->c; + int batch = layer->batch; + layer->h = h; + layer->w = w; + layer->out_h = h; + layer->out_w = w; + layer->inputs = w*h*c; + layer->outputs = layer->inputs; + layer->output = (float*)xrealloc(layer->output, h * w * c * batch * sizeof(float)); + layer->delta = (float*)xrealloc(layer->delta, h * w * c * batch * sizeof(float)); + layer->squared = (float*)xrealloc(layer->squared, h * w * c * batch * sizeof(float)); + layer->norms = (float*)xrealloc(layer->norms, h * w * c * batch * sizeof(float)); +#ifdef GPU + cuda_free(layer->output_gpu); + cuda_free(layer->delta_gpu); + cuda_free(layer->squared_gpu); + cuda_free(layer->norms_gpu); + layer->output_gpu = cuda_make_array(layer->output, h * w * c * batch); + layer->delta_gpu = cuda_make_array(layer->delta, h * w * c * batch); + layer->squared_gpu = cuda_make_array(layer->squared, h * w * c * batch); + layer->norms_gpu = cuda_make_array(layer->norms, h * w * c * batch); +#endif +} + +void forward_normalization_layer(const layer layer, network_state state) +{ + int k,b; + int w = layer.w; + int h = layer.h; + int c = layer.c; + scal_cpu(w*h*c*layer.batch, 0, layer.squared, 1); + + for(b = 0; b < layer.batch; ++b){ + float *squared = layer.squared + w*h*c*b; + float *norms = layer.norms + w*h*c*b; + float *input = state.input + w*h*c*b; + pow_cpu(w*h*c, 2, input, 1, squared, 1); + + const_cpu(w*h, layer.kappa, norms, 1); + for(k = 0; k < layer.size/2; ++k){ + axpy_cpu(w*h, layer.alpha, squared + w*h*k, 1, norms, 1); + } + + for(k = 1; k < layer.c; ++k){ + copy_cpu(w*h, norms + w*h*(k-1), 1, norms + w*h*k, 1); + int prev = k - ((layer.size-1)/2) - 1; + int next = k + (layer.size/2); + if(prev >= 0) axpy_cpu(w*h, -layer.alpha, squared + w*h*prev, 1, norms + w*h*k, 1); + if(next < layer.c) axpy_cpu(w*h, layer.alpha, squared + w*h*next, 1, norms + w*h*k, 1); + } + } + pow_cpu(w*h*c*layer.batch, -layer.beta, layer.norms, 1, layer.output, 1); + mul_cpu(w*h*c*layer.batch, state.input, 1, layer.output, 1); +} + +void backward_normalization_layer(const layer layer, network_state state) +{ + // TODO This is approximate ;-) + // Also this should add in to delta instead of overwritting. + + int w = layer.w; + int h = layer.h; + int c = layer.c; + pow_cpu(w*h*c*layer.batch, -layer.beta, layer.norms, 1, state.delta, 1); + mul_cpu(w*h*c*layer.batch, layer.delta, 1, state.delta, 1); +} + +#ifdef GPU +void forward_normalization_layer_gpu(const layer layer, network_state state) +{ + int k,b; + int w = layer.w; + int h = layer.h; + int c = layer.c; + scal_ongpu(w*h*c*layer.batch, 0, layer.squared_gpu, 1); + + for(b = 0; b < layer.batch; ++b){ + float *squared = layer.squared_gpu + w*h*c*b; + float *norms = layer.norms_gpu + w*h*c*b; + float *input = state.input + w*h*c*b; + pow_ongpu(w*h*c, 2, input, 1, squared, 1); + + const_ongpu(w*h, layer.kappa, norms, 1); + for(k = 0; k < layer.size/2; ++k){ + axpy_ongpu(w*h, layer.alpha, squared + w*h*k, 1, norms, 1); + } + + for(k = 1; k < layer.c; ++k){ + copy_ongpu(w*h, norms + w*h*(k-1), 1, norms + w*h*k, 1); + int prev = k - ((layer.size-1)/2) - 1; + int next = k + (layer.size/2); + if(prev >= 0) axpy_ongpu(w*h, -layer.alpha, squared + w*h*prev, 1, norms + w*h*k, 1); + if(next < layer.c) axpy_ongpu(w*h, layer.alpha, squared + w*h*next, 1, norms + w*h*k, 1); + } + } + pow_ongpu(w*h*c*layer.batch, -layer.beta, layer.norms_gpu, 1, layer.output_gpu, 1); + mul_ongpu(w*h*c*layer.batch, state.input, 1, layer.output_gpu, 1); +} + +void backward_normalization_layer_gpu(const layer layer, network_state state) +{ + // TODO This is approximate ;-) + + int w = layer.w; + int h = layer.h; + int c = layer.c; + pow_ongpu(w*h*c*layer.batch, -layer.beta, layer.norms_gpu, 1, state.delta, 1); + mul_ongpu(w*h*c*layer.batch, layer.delta_gpu, 1, state.delta, 1); +} +#endif -- Gitblit v1.8.0