From 168af40fe9a3cc81c6ee16b3e81f154780c36bdb Mon Sep 17 00:00:00 2001 From: Scheaven <xuepengqiang> Date: 星期四, 03 六月 2021 15:03:27 +0800 Subject: [PATCH] up new v4 --- lib/detecter_tools/darknet/cost_layer.c | 296 +++++++++++++++++++++++++++++----------------------------- 1 files changed, 148 insertions(+), 148 deletions(-) diff --git a/lib/detecter_tools/darknet/cost_layer.c b/lib/detecter_tools/darknet/cost_layer.c index dcddb88..ed1cc13 100644 --- a/lib/detecter_tools/darknet/cost_layer.c +++ b/lib/detecter_tools/darknet/cost_layer.c @@ -1,148 +1,148 @@ -#include "cost_layer.h" -#include "utils.h" -#include "dark_cuda.h" -#include "blas.h" -#include <math.h> -#include <string.h> -#include <stdlib.h> -#include <stdio.h> - -COST_TYPE get_cost_type(char *s) -{ - if (strcmp(s, "sse")==0) return SSE; - if (strcmp(s, "masked")==0) return MASKED; - if (strcmp(s, "smooth")==0) return SMOOTH; - fprintf(stderr, "Couldn't find cost type %s, going with SSE\n", s); - return SSE; -} - -char *get_cost_string(COST_TYPE a) -{ - switch(a){ - case SSE: - return "sse"; - case MASKED: - return "masked"; - case SMOOTH: - return "smooth"; - default: - return "sse"; - } -} - -cost_layer make_cost_layer(int batch, int inputs, COST_TYPE cost_type, float scale) -{ - fprintf(stderr, "cost %4d\n", inputs); - cost_layer l = { (LAYER_TYPE)0 }; - l.type = COST; - - l.scale = scale; - l.batch = batch; - l.inputs = inputs; - l.outputs = inputs; - l.cost_type = cost_type; - l.delta = (float*)xcalloc(inputs * batch, sizeof(float)); - l.output = (float*)xcalloc(inputs * batch, sizeof(float)); - l.cost = (float*)xcalloc(1, sizeof(float)); - - l.forward = forward_cost_layer; - l.backward = backward_cost_layer; - #ifdef GPU - l.forward_gpu = forward_cost_layer_gpu; - l.backward_gpu = backward_cost_layer_gpu; - - l.delta_gpu = cuda_make_array(l.delta, inputs*batch); - l.output_gpu = cuda_make_array(l.output, inputs*batch); - #endif - return l; -} - -void resize_cost_layer(cost_layer *l, int inputs) -{ - l->inputs = inputs; - l->outputs = inputs; - l->delta = (float*)xrealloc(l->delta, inputs * l->batch * sizeof(float)); - l->output = (float*)xrealloc(l->output, inputs * l->batch * sizeof(float)); -#ifdef GPU - cuda_free(l->delta_gpu); - cuda_free(l->output_gpu); - l->delta_gpu = cuda_make_array(l->delta, inputs*l->batch); - l->output_gpu = cuda_make_array(l->output, inputs*l->batch); -#endif -} - -void forward_cost_layer(cost_layer l, network_state state) -{ - if (!state.truth) return; - if(l.cost_type == MASKED){ - int i; - for(i = 0; i < l.batch*l.inputs; ++i){ - if(state.truth[i] == SECRET_NUM) state.input[i] = SECRET_NUM; - } - } - if(l.cost_type == SMOOTH){ - smooth_l1_cpu(l.batch*l.inputs, state.input, state.truth, l.delta, l.output); - } else { - l2_cpu(l.batch*l.inputs, state.input, state.truth, l.delta, l.output); - } - l.cost[0] = sum_array(l.output, l.batch*l.inputs); -} - -void backward_cost_layer(const cost_layer l, network_state state) -{ - axpy_cpu(l.batch*l.inputs, l.scale, l.delta, 1, state.delta, 1); -} - -#ifdef GPU - -void pull_cost_layer(cost_layer l) -{ - cuda_pull_array(l.delta_gpu, l.delta, l.batch*l.inputs); -} - -void push_cost_layer(cost_layer l) -{ - cuda_push_array(l.delta_gpu, l.delta, l.batch*l.inputs); -} - -int float_abs_compare (const void * a, const void * b) -{ - float fa = *(const float*) a; - if(fa < 0) fa = -fa; - float fb = *(const float*) b; - if(fb < 0) fb = -fb; - return (fa > fb) - (fa < fb); -} - -void forward_cost_layer_gpu(cost_layer l, network_state state) -{ - if (!state.truth) return; - if (l.cost_type == MASKED) { - mask_ongpu(l.batch*l.inputs, state.input, SECRET_NUM, state.truth); - } - - if(l.cost_type == SMOOTH){ - smooth_l1_gpu(l.batch*l.inputs, state.input, state.truth, l.delta_gpu, l.output_gpu); - } else { - l2_gpu(l.batch*l.inputs, state.input, state.truth, l.delta_gpu, l.output_gpu); - } - - if(l.ratio){ - cuda_pull_array(l.delta_gpu, l.delta, l.batch*l.inputs); - qsort(l.delta, l.batch*l.inputs, sizeof(float), float_abs_compare); - int n = (1-l.ratio) * l.batch*l.inputs; - float thresh = l.delta[n]; - thresh = 0; - printf("%f\n", thresh); - supp_ongpu(l.batch*l.inputs, thresh, l.delta_gpu, 1); - } - - cuda_pull_array(l.output_gpu, l.output, l.batch*l.inputs); - l.cost[0] = sum_array(l.output, l.batch*l.inputs); -} - -void backward_cost_layer_gpu(const cost_layer l, network_state state) -{ - axpy_ongpu(l.batch*l.inputs, l.scale, l.delta_gpu, 1, state.delta, 1); -} -#endif +#include "cost_layer.h" +#include "utils.h" +#include "dark_cuda.h" +#include "blas.h" +#include <math.h> +#include <string.h> +#include <stdlib.h> +#include <stdio.h> + +COST_TYPE get_cost_type(char *s) +{ + if (strcmp(s, "sse")==0) return SSE; + if (strcmp(s, "masked")==0) return MASKED; + if (strcmp(s, "smooth")==0) return SMOOTH; + fprintf(stderr, "Couldn't find cost type %s, going with SSE\n", s); + return SSE; +} + +char *get_cost_string(COST_TYPE a) +{ + switch(a){ + case SSE: + return "sse"; + case MASKED: + return "masked"; + case SMOOTH: + return "smooth"; + default: + return "sse"; + } +} + +cost_layer make_cost_layer(int batch, int inputs, COST_TYPE cost_type, float scale) +{ + fprintf(stderr, "cost %4d\n", inputs); + cost_layer l = { (LAYER_TYPE)0 }; + l.type = COST; + + l.scale = scale; + l.batch = batch; + l.inputs = inputs; + l.outputs = inputs; + l.cost_type = cost_type; + l.delta = (float*)xcalloc(inputs * batch, sizeof(float)); + l.output = (float*)xcalloc(inputs * batch, sizeof(float)); + l.cost = (float*)xcalloc(1, sizeof(float)); + + l.forward = forward_cost_layer; + l.backward = backward_cost_layer; + #ifdef GPU + l.forward_gpu = forward_cost_layer_gpu; + l.backward_gpu = backward_cost_layer_gpu; + + l.delta_gpu = cuda_make_array(l.delta, inputs*batch); + l.output_gpu = cuda_make_array(l.output, inputs*batch); + #endif + return l; +} + +void resize_cost_layer(cost_layer *l, int inputs) +{ + l->inputs = inputs; + l->outputs = inputs; + l->delta = (float*)xrealloc(l->delta, inputs * l->batch * sizeof(float)); + l->output = (float*)xrealloc(l->output, inputs * l->batch * sizeof(float)); +#ifdef GPU + cuda_free(l->delta_gpu); + cuda_free(l->output_gpu); + l->delta_gpu = cuda_make_array(l->delta, inputs*l->batch); + l->output_gpu = cuda_make_array(l->output, inputs*l->batch); +#endif +} + +void forward_cost_layer(cost_layer l, network_state state) +{ + if (!state.truth) return; + if(l.cost_type == MASKED){ + int i; + for(i = 0; i < l.batch*l.inputs; ++i){ + if(state.truth[i] == SECRET_NUM) state.input[i] = SECRET_NUM; + } + } + if(l.cost_type == SMOOTH){ + smooth_l1_cpu(l.batch*l.inputs, state.input, state.truth, l.delta, l.output); + } else { + l2_cpu(l.batch*l.inputs, state.input, state.truth, l.delta, l.output); + } + l.cost[0] = sum_array(l.output, l.batch*l.inputs); +} + +void backward_cost_layer(const cost_layer l, network_state state) +{ + axpy_cpu(l.batch*l.inputs, l.scale, l.delta, 1, state.delta, 1); +} + +#ifdef GPU + +void pull_cost_layer(cost_layer l) +{ + cuda_pull_array(l.delta_gpu, l.delta, l.batch*l.inputs); +} + +void push_cost_layer(cost_layer l) +{ + cuda_push_array(l.delta_gpu, l.delta, l.batch*l.inputs); +} + +int float_abs_compare (const void * a, const void * b) +{ + float fa = *(const float*) a; + if(fa < 0) fa = -fa; + float fb = *(const float*) b; + if(fb < 0) fb = -fb; + return (fa > fb) - (fa < fb); +} + +void forward_cost_layer_gpu(cost_layer l, network_state state) +{ + if (!state.truth) return; + if (l.cost_type == MASKED) { + mask_ongpu(l.batch*l.inputs, state.input, SECRET_NUM, state.truth); + } + + if(l.cost_type == SMOOTH){ + smooth_l1_gpu(l.batch*l.inputs, state.input, state.truth, l.delta_gpu, l.output_gpu); + } else { + l2_gpu(l.batch*l.inputs, state.input, state.truth, l.delta_gpu, l.output_gpu); + } + + if(l.ratio){ + cuda_pull_array(l.delta_gpu, l.delta, l.batch*l.inputs); + qsort(l.delta, l.batch*l.inputs, sizeof(float), float_abs_compare); + int n = (1-l.ratio) * l.batch*l.inputs; + float thresh = l.delta[n]; + thresh = 0; + printf("%f\n", thresh); + supp_ongpu(l.batch*l.inputs, thresh, l.delta_gpu, 1); + } + + cuda_pull_array(l.output_gpu, l.output, l.batch*l.inputs); + l.cost[0] = sum_array(l.output, l.batch*l.inputs); +} + +void backward_cost_layer_gpu(const cost_layer l, network_state state) +{ + axpy_ongpu(l.batch*l.inputs, l.scale, l.delta_gpu, 1, state.delta, 1); +} +#endif -- Gitblit v1.8.0