~suntianyu/baseDetector.git

			@@ -1,151 +1,151 @@
			#include "normalization_layer.h"
			#include "blas.h"
			#include "utils.h"
			#include <stdio.h>

			layer make_normalization_layer(int batch, int w, int h, int c, int size, float alpha, float beta, float kappa)
			{
			fprintf(stderr, "Local Response Normalization Layer: %d x %d x %d image, %d size\n", w,h,c,size);
			layer layer = { (LAYER_TYPE)0 };
			layer.type = NORMALIZATION;
			layer.batch = batch;
			layer.h = layer.out_h = h;
			layer.w = layer.out_w = w;
			layer.c = layer.out_c = c;
			layer.kappa = kappa;
			layer.size = size;
			layer.alpha = alpha;
			layer.beta = beta;
			layer.output = (float)xcalloc(h w * c * batch, sizeof(float));
			layer.delta = (float)xcalloc(h w * c * batch, sizeof(float));
			layer.squared = (float)xcalloc(h w * c * batch, sizeof(float));
			layer.norms = (float)xcalloc(h w * c * batch, sizeof(float));
			layer.inputs = whc;
			layer.outputs = layer.inputs;

			layer.forward = forward_normalization_layer;
			layer.backward = backward_normalization_layer;
			#ifdef GPU
			layer.forward_gpu = forward_normalization_layer_gpu;
			layer.backward_gpu = backward_normalization_layer_gpu;

			layer.output_gpu = cuda_make_array(layer.output, h * w * c * batch);
			layer.delta_gpu = cuda_make_array(layer.delta, h * w * c * batch);
			layer.squared_gpu = cuda_make_array(layer.squared, h * w * c * batch);
			layer.norms_gpu = cuda_make_array(layer.norms, h * w * c * batch);
			#endif
			return layer;
			}

			void resize_normalization_layer(layer *layer, int w, int h)
			{
			int c = layer->c;
			int batch = layer->batch;
			layer->h = h;
			layer->w = w;
			layer->out_h = h;
			layer->out_w = w;
			layer->inputs = whc;
			layer->outputs = layer->inputs;
			layer->output = (float)xrealloc(layer->output, h w * c * batch * sizeof(float));
			layer->delta = (float)xrealloc(layer->delta, h w * c * batch * sizeof(float));
			layer->squared = (float)xrealloc(layer->squared, h w * c * batch * sizeof(float));
			layer->norms = (float)xrealloc(layer->norms, h w * c * batch * sizeof(float));
			#ifdef GPU
			cuda_free(layer->output_gpu);
			cuda_free(layer->delta_gpu);
			cuda_free(layer->squared_gpu);
			cuda_free(layer->norms_gpu);
			layer->output_gpu = cuda_make_array(layer->output, h * w * c * batch);
			layer->delta_gpu = cuda_make_array(layer->delta, h * w * c * batch);
			layer->squared_gpu = cuda_make_array(layer->squared, h * w * c * batch);
			layer->norms_gpu = cuda_make_array(layer->norms, h * w * c * batch);
			#endif
			}

			void forward_normalization_layer(const layer layer, network_state state)
			{
			int k,b;
			int w = layer.w;
			int h = layer.h;
			int c = layer.c;
			scal_cpu(whc*layer.batch, 0, layer.squared, 1);

			for(b = 0; b < layer.batch; ++b){
			float squared = layer.squared + whcb;
			float norms = layer.norms + whcb;
			float input = state.input + whcb;
			pow_cpu(whc, 2, input, 1, squared, 1);

			const_cpu(w*h, layer.kappa, norms, 1);
			for(k = 0; k < layer.size/2; ++k){
			axpy_cpu(wh, layer.alpha, squared + wh*k, 1, norms, 1);
			}

			for(k = 1; k < layer.c; ++k){
			copy_cpu(wh, norms + wh(k-1), 1, norms + wh*k, 1);
			int prev = k - ((layer.size-1)/2) - 1;
			int next = k + (layer.size/2);
			if(prev >= 0) axpy_cpu(wh, -layer.alpha, squared + whprev, 1, norms + wh*k, 1);
			if(next < layer.c) axpy_cpu(wh, layer.alpha, squared + whnext, 1, norms + wh*k, 1);
			}
			}
			pow_cpu(whc*layer.batch, -layer.beta, layer.norms, 1, layer.output, 1);
			mul_cpu(whc*layer.batch, state.input, 1, layer.output, 1);
			}

			void backward_normalization_layer(const layer layer, network_state state)
			{
			// TODO This is approximate ;-)
			// Also this should add in to delta instead of overwritting.

			int w = layer.w;
			int h = layer.h;
			int c = layer.c;
			pow_cpu(whc*layer.batch, -layer.beta, layer.norms, 1, state.delta, 1);
			mul_cpu(whc*layer.batch, layer.delta, 1, state.delta, 1);
			}

			#ifdef GPU
			void forward_normalization_layer_gpu(const layer layer, network_state state)
			{
			int k,b;
			int w = layer.w;
			int h = layer.h;
			int c = layer.c;
			scal_ongpu(whc*layer.batch, 0, layer.squared_gpu, 1);

			for(b = 0; b < layer.batch; ++b){
			float squared = layer.squared_gpu + whcb;
			float norms = layer.norms_gpu + whcb;
			float input = state.input + whcb;
			pow_ongpu(whc, 2, input, 1, squared, 1);

			const_ongpu(w*h, layer.kappa, norms, 1);
			for(k = 0; k < layer.size/2; ++k){
			axpy_ongpu(wh, layer.alpha, squared + wh*k, 1, norms, 1);
			}

			for(k = 1; k < layer.c; ++k){
			copy_ongpu(wh, norms + wh(k-1), 1, norms + wh*k, 1);
			int prev = k - ((layer.size-1)/2) - 1;
			int next = k + (layer.size/2);
			if(prev >= 0) axpy_ongpu(wh, -layer.alpha, squared + whprev, 1, norms + wh*k, 1);
			if(next < layer.c) axpy_ongpu(wh, layer.alpha, squared + whnext, 1, norms + wh*k, 1);
			}
			}
			pow_ongpu(whc*layer.batch, -layer.beta, layer.norms_gpu, 1, layer.output_gpu, 1);
			mul_ongpu(whc*layer.batch, state.input, 1, layer.output_gpu, 1);
			}

			void backward_normalization_layer_gpu(const layer layer, network_state state)
			{
			// TODO This is approximate ;-)

			int w = layer.w;
			int h = layer.h;
			int c = layer.c;
			pow_ongpu(whc*layer.batch, -layer.beta, layer.norms_gpu, 1, state.delta, 1);
			mul_ongpu(whc*layer.batch, layer.delta_gpu, 1, state.delta, 1);
			}
			#endif
			#include "normalization_layer.h"
			#include "blas.h"
			#include "utils.h"
			#include <stdio.h>

			layer make_normalization_layer(int batch, int w, int h, int c, int size, float alpha, float beta, float kappa)
			{
			fprintf(stderr, "Local Response Normalization Layer: %d x %d x %d image, %d size\n", w,h,c,size);
			layer layer = { (LAYER_TYPE)0 };
			layer.type = NORMALIZATION;
			layer.batch = batch;
			layer.h = layer.out_h = h;
			layer.w = layer.out_w = w;
			layer.c = layer.out_c = c;
			layer.kappa = kappa;
			layer.size = size;
			layer.alpha = alpha;
			layer.beta = beta;
			layer.output = (float)xcalloc(h w * c * batch, sizeof(float));
			layer.delta = (float)xcalloc(h w * c * batch, sizeof(float));
			layer.squared = (float)xcalloc(h w * c * batch, sizeof(float));
			layer.norms = (float)xcalloc(h w * c * batch, sizeof(float));
			layer.inputs = whc;
			layer.outputs = layer.inputs;

			layer.forward = forward_normalization_layer;
			layer.backward = backward_normalization_layer;
			#ifdef GPU
			layer.forward_gpu = forward_normalization_layer_gpu;
			layer.backward_gpu = backward_normalization_layer_gpu;

			layer.output_gpu = cuda_make_array(layer.output, h * w * c * batch);
			layer.delta_gpu = cuda_make_array(layer.delta, h * w * c * batch);
			layer.squared_gpu = cuda_make_array(layer.squared, h * w * c * batch);
			layer.norms_gpu = cuda_make_array(layer.norms, h * w * c * batch);
			#endif
			return layer;
			}

			void resize_normalization_layer(layer *layer, int w, int h)
			{
			int c = layer->c;
			int batch = layer->batch;
			layer->h = h;
			layer->w = w;
			layer->out_h = h;
			layer->out_w = w;
			layer->inputs = whc;
			layer->outputs = layer->inputs;
			layer->output = (float)xrealloc(layer->output, h w * c * batch * sizeof(float));
			layer->delta = (float)xrealloc(layer->delta, h w * c * batch * sizeof(float));
			layer->squared = (float)xrealloc(layer->squared, h w * c * batch * sizeof(float));
			layer->norms = (float)xrealloc(layer->norms, h w * c * batch * sizeof(float));
			#ifdef GPU
			cuda_free(layer->output_gpu);
			cuda_free(layer->delta_gpu);
			cuda_free(layer->squared_gpu);
			cuda_free(layer->norms_gpu);
			layer->output_gpu = cuda_make_array(layer->output, h * w * c * batch);
			layer->delta_gpu = cuda_make_array(layer->delta, h * w * c * batch);
			layer->squared_gpu = cuda_make_array(layer->squared, h * w * c * batch);
			layer->norms_gpu = cuda_make_array(layer->norms, h * w * c * batch);
			#endif
			}

			void forward_normalization_layer(const layer layer, network_state state)
			{
			int k,b;
			int w = layer.w;
			int h = layer.h;
			int c = layer.c;
			scal_cpu(whc*layer.batch, 0, layer.squared, 1);

			for(b = 0; b < layer.batch; ++b){
			float squared = layer.squared + whcb;
			float norms = layer.norms + whcb;
			float input = state.input + whcb;
			pow_cpu(whc, 2, input, 1, squared, 1);

			const_cpu(w*h, layer.kappa, norms, 1);
			for(k = 0; k < layer.size/2; ++k){
			axpy_cpu(wh, layer.alpha, squared + wh*k, 1, norms, 1);
			}

			for(k = 1; k < layer.c; ++k){
			copy_cpu(wh, norms + wh(k-1), 1, norms + wh*k, 1);
			int prev = k - ((layer.size-1)/2) - 1;
			int next = k + (layer.size/2);
			if(prev >= 0) axpy_cpu(wh, -layer.alpha, squared + whprev, 1, norms + wh*k, 1);
			if(next < layer.c) axpy_cpu(wh, layer.alpha, squared + whnext, 1, norms + wh*k, 1);
			}
			}
			pow_cpu(whc*layer.batch, -layer.beta, layer.norms, 1, layer.output, 1);
			mul_cpu(whc*layer.batch, state.input, 1, layer.output, 1);
			}

			void backward_normalization_layer(const layer layer, network_state state)
			{
			// TODO This is approximate ;-)
			// Also this should add in to delta instead of overwritting.

			int w = layer.w;
			int h = layer.h;
			int c = layer.c;
			pow_cpu(whc*layer.batch, -layer.beta, layer.norms, 1, state.delta, 1);
			mul_cpu(whc*layer.batch, layer.delta, 1, state.delta, 1);
			}

			#ifdef GPU
			void forward_normalization_layer_gpu(const layer layer, network_state state)
			{
			int k,b;
			int w = layer.w;
			int h = layer.h;
			int c = layer.c;
			scal_ongpu(whc*layer.batch, 0, layer.squared_gpu, 1);

			for(b = 0; b < layer.batch; ++b){
			float squared = layer.squared_gpu + whcb;
			float norms = layer.norms_gpu + whcb;
			float input = state.input + whcb;
			pow_ongpu(whc, 2, input, 1, squared, 1);

			const_ongpu(w*h, layer.kappa, norms, 1);
			for(k = 0; k < layer.size/2; ++k){
			axpy_ongpu(wh, layer.alpha, squared + wh*k, 1, norms, 1);
			}

			for(k = 1; k < layer.c; ++k){
			copy_ongpu(wh, norms + wh(k-1), 1, norms + wh*k, 1);
			int prev = k - ((layer.size-1)/2) - 1;
			int next = k + (layer.size/2);
			if(prev >= 0) axpy_ongpu(wh, -layer.alpha, squared + whprev, 1, norms + wh*k, 1);
			if(next < layer.c) axpy_ongpu(wh, layer.alpha, squared + whnext, 1, norms + wh*k, 1);
			}
			}
			pow_ongpu(whc*layer.batch, -layer.beta, layer.norms_gpu, 1, layer.output_gpu, 1);
			mul_ongpu(whc*layer.batch, state.input, 1, layer.output_gpu, 1);
			}

			void backward_normalization_layer_gpu(const layer layer, network_state state)
			{
			// TODO This is approximate ;-)

			int w = layer.w;
			int h = layer.h;
			int c = layer.c;
			pow_ongpu(whc*layer.batch, -layer.beta, layer.norms_gpu, 1, state.delta, 1);
			mul_ongpu(whc*layer.batch, layer.delta_gpu, 1, state.delta, 1);
			}
			#endif