From 168af40fe9a3cc81c6ee16b3e81f154780c36bdb Mon Sep 17 00:00:00 2001
From: Scheaven <xuepengqiang>
Date: 星期四, 03 六月 2021 15:03:27 +0800
Subject: [PATCH] up new v4

---
 lib/detecter_tools/darknet/normalization_layer.c |  302 +++++++++++++++++++++++++-------------------------
 1 files changed, 151 insertions(+), 151 deletions(-)

diff --git a/lib/detecter_tools/darknet/normalization_layer.c b/lib/detecter_tools/darknet/normalization_layer.c
index ad0b52b..d6af621 100644
--- a/lib/detecter_tools/darknet/normalization_layer.c
+++ b/lib/detecter_tools/darknet/normalization_layer.c
@@ -1,151 +1,151 @@
-#include "normalization_layer.h"
-#include "blas.h"
-#include "utils.h"
-#include <stdio.h>
-
-layer make_normalization_layer(int batch, int w, int h, int c, int size, float alpha, float beta, float kappa)
-{
-    fprintf(stderr, "Local Response Normalization Layer: %d x %d x %d image, %d size\n", w,h,c,size);
-    layer layer = { (LAYER_TYPE)0 };
-    layer.type = NORMALIZATION;
-    layer.batch = batch;
-    layer.h = layer.out_h = h;
-    layer.w = layer.out_w = w;
-    layer.c = layer.out_c = c;
-    layer.kappa = kappa;
-    layer.size = size;
-    layer.alpha = alpha;
-    layer.beta = beta;
-    layer.output = (float*)xcalloc(h * w * c * batch, sizeof(float));
-    layer.delta = (float*)xcalloc(h * w * c * batch, sizeof(float));
-    layer.squared = (float*)xcalloc(h * w * c * batch, sizeof(float));
-    layer.norms = (float*)xcalloc(h * w * c * batch, sizeof(float));
-    layer.inputs = w*h*c;
-    layer.outputs = layer.inputs;
-
-    layer.forward = forward_normalization_layer;
-    layer.backward = backward_normalization_layer;
-    #ifdef GPU
-    layer.forward_gpu = forward_normalization_layer_gpu;
-    layer.backward_gpu = backward_normalization_layer_gpu;
-
-    layer.output_gpu =  cuda_make_array(layer.output, h * w * c * batch);
-    layer.delta_gpu =   cuda_make_array(layer.delta, h * w * c * batch);
-    layer.squared_gpu = cuda_make_array(layer.squared, h * w * c * batch);
-    layer.norms_gpu =   cuda_make_array(layer.norms, h * w * c * batch);
-    #endif
-    return layer;
-}
-
-void resize_normalization_layer(layer *layer, int w, int h)
-{
-    int c = layer->c;
-    int batch = layer->batch;
-    layer->h = h;
-    layer->w = w;
-    layer->out_h = h;
-    layer->out_w = w;
-    layer->inputs = w*h*c;
-    layer->outputs = layer->inputs;
-    layer->output = (float*)xrealloc(layer->output, h * w * c * batch * sizeof(float));
-    layer->delta = (float*)xrealloc(layer->delta, h * w * c * batch * sizeof(float));
-    layer->squared = (float*)xrealloc(layer->squared, h * w * c * batch * sizeof(float));
-    layer->norms = (float*)xrealloc(layer->norms, h * w * c * batch * sizeof(float));
-#ifdef GPU
-    cuda_free(layer->output_gpu);
-    cuda_free(layer->delta_gpu);
-    cuda_free(layer->squared_gpu);
-    cuda_free(layer->norms_gpu);
-    layer->output_gpu =  cuda_make_array(layer->output, h * w * c * batch);
-    layer->delta_gpu =   cuda_make_array(layer->delta, h * w * c * batch);
-    layer->squared_gpu = cuda_make_array(layer->squared, h * w * c * batch);
-    layer->norms_gpu =   cuda_make_array(layer->norms, h * w * c * batch);
-#endif
-}
-
-void forward_normalization_layer(const layer layer, network_state state)
-{
-    int k,b;
-    int w = layer.w;
-    int h = layer.h;
-    int c = layer.c;
-    scal_cpu(w*h*c*layer.batch, 0, layer.squared, 1);
-
-    for(b = 0; b < layer.batch; ++b){
-        float *squared = layer.squared + w*h*c*b;
-        float *norms   = layer.norms + w*h*c*b;
-        float *input   = state.input + w*h*c*b;
-        pow_cpu(w*h*c, 2, input, 1, squared, 1);
-
-        const_cpu(w*h, layer.kappa, norms, 1);
-        for(k = 0; k < layer.size/2; ++k){
-            axpy_cpu(w*h, layer.alpha, squared + w*h*k, 1, norms, 1);
-        }
-
-        for(k = 1; k < layer.c; ++k){
-            copy_cpu(w*h, norms + w*h*(k-1), 1, norms + w*h*k, 1);
-            int prev = k - ((layer.size-1)/2) - 1;
-            int next = k + (layer.size/2);
-            if(prev >= 0)      axpy_cpu(w*h, -layer.alpha, squared + w*h*prev, 1, norms + w*h*k, 1);
-            if(next < layer.c) axpy_cpu(w*h,  layer.alpha, squared + w*h*next, 1, norms + w*h*k, 1);
-        }
-    }
-    pow_cpu(w*h*c*layer.batch, -layer.beta, layer.norms, 1, layer.output, 1);
-    mul_cpu(w*h*c*layer.batch, state.input, 1, layer.output, 1);
-}
-
-void backward_normalization_layer(const layer layer, network_state state)
-{
-    // TODO This is approximate ;-)
-    // Also this should add in to delta instead of overwritting.
-
-    int w = layer.w;
-    int h = layer.h;
-    int c = layer.c;
-    pow_cpu(w*h*c*layer.batch, -layer.beta, layer.norms, 1, state.delta, 1);
-    mul_cpu(w*h*c*layer.batch, layer.delta, 1, state.delta, 1);
-}
-
-#ifdef GPU
-void forward_normalization_layer_gpu(const layer layer, network_state state)
-{
-    int k,b;
-    int w = layer.w;
-    int h = layer.h;
-    int c = layer.c;
-    scal_ongpu(w*h*c*layer.batch, 0, layer.squared_gpu, 1);
-
-    for(b = 0; b < layer.batch; ++b){
-        float *squared = layer.squared_gpu + w*h*c*b;
-        float *norms   = layer.norms_gpu + w*h*c*b;
-        float *input   = state.input + w*h*c*b;
-        pow_ongpu(w*h*c, 2, input, 1, squared, 1);
-
-        const_ongpu(w*h, layer.kappa, norms, 1);
-        for(k = 0; k < layer.size/2; ++k){
-            axpy_ongpu(w*h, layer.alpha, squared + w*h*k, 1, norms, 1);
-        }
-
-        for(k = 1; k < layer.c; ++k){
-            copy_ongpu(w*h, norms + w*h*(k-1), 1, norms + w*h*k, 1);
-            int prev = k - ((layer.size-1)/2) - 1;
-            int next = k + (layer.size/2);
-            if(prev >= 0)      axpy_ongpu(w*h, -layer.alpha, squared + w*h*prev, 1, norms + w*h*k, 1);
-            if(next < layer.c) axpy_ongpu(w*h,  layer.alpha, squared + w*h*next, 1, norms + w*h*k, 1);
-        }
-    }
-    pow_ongpu(w*h*c*layer.batch, -layer.beta, layer.norms_gpu, 1, layer.output_gpu, 1);
-    mul_ongpu(w*h*c*layer.batch, state.input, 1, layer.output_gpu, 1);
-}
-
-void backward_normalization_layer_gpu(const layer layer, network_state state)
-{
-    // TODO This is approximate ;-)
-
-    int w = layer.w;
-    int h = layer.h;
-    int c = layer.c;
-    pow_ongpu(w*h*c*layer.batch, -layer.beta, layer.norms_gpu, 1, state.delta, 1);
-    mul_ongpu(w*h*c*layer.batch, layer.delta_gpu, 1, state.delta, 1);
-}
-#endif
+#include "normalization_layer.h"
+#include "blas.h"
+#include "utils.h"
+#include <stdio.h>
+
+layer make_normalization_layer(int batch, int w, int h, int c, int size, float alpha, float beta, float kappa)
+{
+    fprintf(stderr, "Local Response Normalization Layer: %d x %d x %d image, %d size\n", w,h,c,size);
+    layer layer = { (LAYER_TYPE)0 };
+    layer.type = NORMALIZATION;
+    layer.batch = batch;
+    layer.h = layer.out_h = h;
+    layer.w = layer.out_w = w;
+    layer.c = layer.out_c = c;
+    layer.kappa = kappa;
+    layer.size = size;
+    layer.alpha = alpha;
+    layer.beta = beta;
+    layer.output = (float*)xcalloc(h * w * c * batch, sizeof(float));
+    layer.delta = (float*)xcalloc(h * w * c * batch, sizeof(float));
+    layer.squared = (float*)xcalloc(h * w * c * batch, sizeof(float));
+    layer.norms = (float*)xcalloc(h * w * c * batch, sizeof(float));
+    layer.inputs = w*h*c;
+    layer.outputs = layer.inputs;
+
+    layer.forward = forward_normalization_layer;
+    layer.backward = backward_normalization_layer;
+    #ifdef GPU
+    layer.forward_gpu = forward_normalization_layer_gpu;
+    layer.backward_gpu = backward_normalization_layer_gpu;
+
+    layer.output_gpu =  cuda_make_array(layer.output, h * w * c * batch);
+    layer.delta_gpu =   cuda_make_array(layer.delta, h * w * c * batch);
+    layer.squared_gpu = cuda_make_array(layer.squared, h * w * c * batch);
+    layer.norms_gpu =   cuda_make_array(layer.norms, h * w * c * batch);
+    #endif
+    return layer;
+}
+
+void resize_normalization_layer(layer *layer, int w, int h)
+{
+    int c = layer->c;
+    int batch = layer->batch;
+    layer->h = h;
+    layer->w = w;
+    layer->out_h = h;
+    layer->out_w = w;
+    layer->inputs = w*h*c;
+    layer->outputs = layer->inputs;
+    layer->output = (float*)xrealloc(layer->output, h * w * c * batch * sizeof(float));
+    layer->delta = (float*)xrealloc(layer->delta, h * w * c * batch * sizeof(float));
+    layer->squared = (float*)xrealloc(layer->squared, h * w * c * batch * sizeof(float));
+    layer->norms = (float*)xrealloc(layer->norms, h * w * c * batch * sizeof(float));
+#ifdef GPU
+    cuda_free(layer->output_gpu);
+    cuda_free(layer->delta_gpu);
+    cuda_free(layer->squared_gpu);
+    cuda_free(layer->norms_gpu);
+    layer->output_gpu =  cuda_make_array(layer->output, h * w * c * batch);
+    layer->delta_gpu =   cuda_make_array(layer->delta, h * w * c * batch);
+    layer->squared_gpu = cuda_make_array(layer->squared, h * w * c * batch);
+    layer->norms_gpu =   cuda_make_array(layer->norms, h * w * c * batch);
+#endif
+}
+
+void forward_normalization_layer(const layer layer, network_state state)
+{
+    int k,b;
+    int w = layer.w;
+    int h = layer.h;
+    int c = layer.c;
+    scal_cpu(w*h*c*layer.batch, 0, layer.squared, 1);
+
+    for(b = 0; b < layer.batch; ++b){
+        float *squared = layer.squared + w*h*c*b;
+        float *norms   = layer.norms + w*h*c*b;
+        float *input   = state.input + w*h*c*b;
+        pow_cpu(w*h*c, 2, input, 1, squared, 1);
+
+        const_cpu(w*h, layer.kappa, norms, 1);
+        for(k = 0; k < layer.size/2; ++k){
+            axpy_cpu(w*h, layer.alpha, squared + w*h*k, 1, norms, 1);
+        }
+
+        for(k = 1; k < layer.c; ++k){
+            copy_cpu(w*h, norms + w*h*(k-1), 1, norms + w*h*k, 1);
+            int prev = k - ((layer.size-1)/2) - 1;
+            int next = k + (layer.size/2);
+            if(prev >= 0)      axpy_cpu(w*h, -layer.alpha, squared + w*h*prev, 1, norms + w*h*k, 1);
+            if(next < layer.c) axpy_cpu(w*h,  layer.alpha, squared + w*h*next, 1, norms + w*h*k, 1);
+        }
+    }
+    pow_cpu(w*h*c*layer.batch, -layer.beta, layer.norms, 1, layer.output, 1);
+    mul_cpu(w*h*c*layer.batch, state.input, 1, layer.output, 1);
+}
+
+void backward_normalization_layer(const layer layer, network_state state)
+{
+    // TODO This is approximate ;-)
+    // Also this should add in to delta instead of overwritting.
+
+    int w = layer.w;
+    int h = layer.h;
+    int c = layer.c;
+    pow_cpu(w*h*c*layer.batch, -layer.beta, layer.norms, 1, state.delta, 1);
+    mul_cpu(w*h*c*layer.batch, layer.delta, 1, state.delta, 1);
+}
+
+#ifdef GPU
+void forward_normalization_layer_gpu(const layer layer, network_state state)
+{
+    int k,b;
+    int w = layer.w;
+    int h = layer.h;
+    int c = layer.c;
+    scal_ongpu(w*h*c*layer.batch, 0, layer.squared_gpu, 1);
+
+    for(b = 0; b < layer.batch; ++b){
+        float *squared = layer.squared_gpu + w*h*c*b;
+        float *norms   = layer.norms_gpu + w*h*c*b;
+        float *input   = state.input + w*h*c*b;
+        pow_ongpu(w*h*c, 2, input, 1, squared, 1);
+
+        const_ongpu(w*h, layer.kappa, norms, 1);
+        for(k = 0; k < layer.size/2; ++k){
+            axpy_ongpu(w*h, layer.alpha, squared + w*h*k, 1, norms, 1);
+        }
+
+        for(k = 1; k < layer.c; ++k){
+            copy_ongpu(w*h, norms + w*h*(k-1), 1, norms + w*h*k, 1);
+            int prev = k - ((layer.size-1)/2) - 1;
+            int next = k + (layer.size/2);
+            if(prev >= 0)      axpy_ongpu(w*h, -layer.alpha, squared + w*h*prev, 1, norms + w*h*k, 1);
+            if(next < layer.c) axpy_ongpu(w*h,  layer.alpha, squared + w*h*next, 1, norms + w*h*k, 1);
+        }
+    }
+    pow_ongpu(w*h*c*layer.batch, -layer.beta, layer.norms_gpu, 1, layer.output_gpu, 1);
+    mul_ongpu(w*h*c*layer.batch, state.input, 1, layer.output_gpu, 1);
+}
+
+void backward_normalization_layer_gpu(const layer layer, network_state state)
+{
+    // TODO This is approximate ;-)
+
+    int w = layer.w;
+    int h = layer.h;
+    int c = layer.c;
+    pow_ongpu(w*h*c*layer.batch, -layer.beta, layer.norms_gpu, 1, state.delta, 1);
+    mul_ongpu(w*h*c*layer.batch, layer.delta_gpu, 1, state.delta, 1);
+}
+#endif

--
Gitblit v1.8.0