From eadf8170a676cedc6ab4f1928567a1ed3cbee050 Mon Sep 17 00:00:00 2001
From: Scheaven <xuepengqiang>
Date: 星期二, 06 七月 2021 10:15:06 +0800
Subject: [PATCH] up cuda

---
 lib/detecter_tools/darknet/parser.c | 4492 +++++++++++++++++++++++++++++++----------------------------
 1 files changed, 2,352 insertions(+), 2,140 deletions(-)

diff --git a/lib/detecter_tools/darknet/parser.c b/lib/detecter_tools/darknet/parser.c
index 0572bef..8f8f584 100644
--- a/lib/detecter_tools/darknet/parser.c
+++ b/lib/detecter_tools/darknet/parser.c
@@ -1,2140 +1,2352 @@
-#include <stdio.h>
-#include <string.h>
-#include <stdlib.h>
-#include <stdint.h>
-
-#include "activation_layer.h"
-#include "activations.h"
-#include "assert.h"
-#include "avgpool_layer.h"
-#include "batchnorm_layer.h"
-#include "blas.h"
-#include "connected_layer.h"
-#include "convolutional_layer.h"
-#include "cost_layer.h"
-#include "crnn_layer.h"
-#include "crop_layer.h"
-#include "detection_layer.h"
-#include "dropout_layer.h"
-#include "gru_layer.h"
-#include "list.h"
-#include "local_layer.h"
-#include "lstm_layer.h"
-#include "conv_lstm_layer.h"
-#include "maxpool_layer.h"
-#include "normalization_layer.h"
-#include "option_list.h"
-#include "parser.h"
-#include "region_layer.h"
-#include "reorg_layer.h"
-#include "reorg_old_layer.h"
-#include "rnn_layer.h"
-#include "route_layer.h"
-#include "shortcut_layer.h"
-#include "scale_channels_layer.h"
-#include "sam_layer.h"
-#include "softmax_layer.h"
-#include "utils.h"
-#include "upsample_layer.h"
-#include "version.h"
-#include "yolo_layer.h"
-#include "gaussian_yolo_layer.h"
-
-typedef struct{
-    char *type;
-    list *options;
-}section;
-
-list *read_cfg(char *filename);
-
-LAYER_TYPE string_to_layer_type(char * type)
-{
-
-    if (strcmp(type, "[shortcut]")==0) return SHORTCUT;
-    if (strcmp(type, "[scale_channels]") == 0) return SCALE_CHANNELS;
-    if (strcmp(type, "[sam]") == 0) return SAM;
-    if (strcmp(type, "[crop]")==0) return CROP;
-    if (strcmp(type, "[cost]")==0) return COST;
-    if (strcmp(type, "[detection]")==0) return DETECTION;
-    if (strcmp(type, "[region]")==0) return REGION;
-    if (strcmp(type, "[yolo]") == 0) return YOLO;
-    if (strcmp(type, "[Gaussian_yolo]") == 0) return GAUSSIAN_YOLO;
-    if (strcmp(type, "[local]")==0) return LOCAL;
-    if (strcmp(type, "[conv]")==0
-            || strcmp(type, "[convolutional]")==0) return CONVOLUTIONAL;
-    if (strcmp(type, "[activation]")==0) return ACTIVE;
-    if (strcmp(type, "[net]")==0
-            || strcmp(type, "[network]")==0) return NETWORK;
-    if (strcmp(type, "[crnn]")==0) return CRNN;
-    if (strcmp(type, "[gru]")==0) return GRU;
-    if (strcmp(type, "[lstm]")==0) return LSTM;
-    if (strcmp(type, "[conv_lstm]") == 0) return CONV_LSTM;
-    if (strcmp(type, "[rnn]")==0) return RNN;
-    if (strcmp(type, "[conn]")==0
-            || strcmp(type, "[connected]")==0) return CONNECTED;
-    if (strcmp(type, "[max]")==0
-            || strcmp(type, "[maxpool]")==0) return MAXPOOL;
-    if (strcmp(type, "[local_avg]") == 0
-        || strcmp(type, "[local_avgpool]") == 0) return LOCAL_AVGPOOL;
-    if (strcmp(type, "[reorg3d]")==0) return REORG;
-    if (strcmp(type, "[reorg]") == 0) return REORG_OLD;
-    if (strcmp(type, "[avg]")==0
-            || strcmp(type, "[avgpool]")==0) return AVGPOOL;
-    if (strcmp(type, "[dropout]")==0) return DROPOUT;
-    if (strcmp(type, "[lrn]")==0
-            || strcmp(type, "[normalization]")==0) return NORMALIZATION;
-    if (strcmp(type, "[batchnorm]")==0) return BATCHNORM;
-    if (strcmp(type, "[soft]")==0
-            || strcmp(type, "[softmax]")==0) return SOFTMAX;
-    if (strcmp(type, "[route]")==0) return ROUTE;
-    if (strcmp(type, "[upsample]") == 0) return UPSAMPLE;
-    if (strcmp(type, "[empty]") == 0) return EMPTY;
-    return BLANK;
-}
-
-void free_section(section *s)
-{
-    free(s->type);
-    node *n = s->options->front;
-    while(n){
-        kvp *pair = (kvp *)n->val;
-        free(pair->key);
-        free(pair);
-        node *next = n->next;
-        free(n);
-        n = next;
-    }
-    free(s->options);
-    free(s);
-}
-
-void parse_data(char *data, float *a, int n)
-{
-    int i;
-    if(!data) return;
-    char *curr = data;
-    char *next = data;
-    int done = 0;
-    for(i = 0; i < n && !done; ++i){
-        while(*++next !='\0' && *next != ',');
-        if(*next == '\0') done = 1;
-        *next = '\0';
-        sscanf(curr, "%g", &a[i]);
-        curr = next+1;
-    }
-}
-
-typedef struct size_params{
-    int batch;
-    int inputs;
-    int h;
-    int w;
-    int c;
-    int index;
-    int time_steps;
-    int train;
-    network net;
-} size_params;
-
-local_layer parse_local(list *options, size_params params)
-{
-    int n = option_find_int(options, "filters",1);
-    int size = option_find_int(options, "size",1);
-    int stride = option_find_int(options, "stride",1);
-    int pad = option_find_int(options, "pad",0);
-    char *activation_s = option_find_str(options, "activation", "logistic");
-    ACTIVATION activation = get_activation(activation_s);
-
-    int batch,h,w,c;
-    h = params.h;
-    w = params.w;
-    c = params.c;
-    batch=params.batch;
-    if(!(h && w && c)) error("Layer before local layer must output image.");
-
-    local_layer layer = make_local_layer(batch,h,w,c,n,size,stride,pad,activation);
-
-    return layer;
-}
-
-convolutional_layer parse_convolutional(list *options, size_params params)
-{
-    int n = option_find_int(options, "filters",1);
-    int groups = option_find_int_quiet(options, "groups", 1);
-    int size = option_find_int(options, "size",1);
-    int stride = -1;
-    //int stride = option_find_int(options, "stride",1);
-    int stride_x = option_find_int_quiet(options, "stride_x", -1);
-    int stride_y = option_find_int_quiet(options, "stride_y", -1);
-    if (stride_x < 1 || stride_y < 1) {
-        stride = option_find_int(options, "stride", 1);
-        if (stride_x < 1) stride_x = stride;
-        if (stride_y < 1) stride_y = stride;
-    }
-    else {
-        stride = option_find_int_quiet(options, "stride", 1);
-    }
-    int dilation = option_find_int_quiet(options, "dilation", 1);
-    int antialiasing = option_find_int_quiet(options, "antialiasing", 0);
-    if (size == 1) dilation = 1;
-    int pad = option_find_int_quiet(options, "pad",0);
-    int padding = option_find_int_quiet(options, "padding",0);
-    if(pad) padding = size/2;
-
-    char *activation_s = option_find_str(options, "activation", "logistic");
-    ACTIVATION activation = get_activation(activation_s);
-
-    int assisted_excitation = option_find_float_quiet(options, "assisted_excitation", 0);
-
-    int share_index = option_find_int_quiet(options, "share_index", -1000000000);
-    convolutional_layer *share_layer = NULL;
-    if(share_index >= 0) share_layer = &params.net.layers[share_index];
-    else if(share_index != -1000000000) share_layer = &params.net.layers[params.index + share_index];
-
-    int batch,h,w,c;
-    h = params.h;
-    w = params.w;
-    c = params.c;
-    batch=params.batch;
-    if(!(h && w && c)) error("Layer before convolutional layer must output image.");
-    int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0);
-    int cbn = option_find_int_quiet(options, "cbn", 0);
-    if (cbn) batch_normalize = 2;
-    int binary = option_find_int_quiet(options, "binary", 0);
-    int xnor = option_find_int_quiet(options, "xnor", 0);
-    int use_bin_output = option_find_int_quiet(options, "bin_output", 0);
-    int sway = option_find_int_quiet(options, "sway", 0);
-    int rotate = option_find_int_quiet(options, "rotate", 0);
-    int stretch = option_find_int_quiet(options, "stretch", 0);
-    int stretch_sway = option_find_int_quiet(options, "stretch_sway", 0);
-    if ((sway + rotate + stretch + stretch_sway) > 1) {
-        printf(" Error: should be used only 1 param: sway=1, rotate=1 or stretch=1 in the [convolutional] layer \n");
-        exit(0);
-    }
-    int deform = sway || rotate || stretch || stretch_sway;
-    if (deform && size == 1) {
-        printf(" Error: params (sway=1, rotate=1 or stretch=1) should be used only with size >=3 in the [convolutional] layer \n");
-        exit(0);
-    }
-
-    convolutional_layer layer = make_convolutional_layer(batch,1,h,w,c,n,groups,size,stride_x,stride_y,dilation,padding,activation, batch_normalize, binary, xnor, params.net.adam, use_bin_output, params.index, antialiasing, share_layer, assisted_excitation, deform, params.train);
-    layer.flipped = option_find_int_quiet(options, "flipped", 0);
-    layer.dot = option_find_float_quiet(options, "dot", 0);
-    layer.sway = sway;
-    layer.rotate = rotate;
-    layer.stretch = stretch;
-    layer.stretch_sway = stretch_sway;
-    layer.angle = option_find_float_quiet(options, "angle", 15);
-    layer.grad_centr = option_find_int_quiet(options, "grad_centr", 0);
-
-    if(params.net.adam){
-        layer.B1 = params.net.B1;
-        layer.B2 = params.net.B2;
-        layer.eps = params.net.eps;
-    }
-
-    return layer;
-}
-
-layer parse_crnn(list *options, size_params params)
-{
-    int size = option_find_int_quiet(options, "size", 3);
-    int stride = option_find_int_quiet(options, "stride", 1);
-    int dilation = option_find_int_quiet(options, "dilation", 1);
-    int pad = option_find_int_quiet(options, "pad", 0);
-    int padding = option_find_int_quiet(options, "padding", 0);
-    if (pad) padding = size / 2;
-
-    int output_filters = option_find_int(options, "output",1);
-    int hidden_filters = option_find_int(options, "hidden",1);
-    int groups = option_find_int_quiet(options, "groups", 1);
-    char *activation_s = option_find_str(options, "activation", "logistic");
-    ACTIVATION activation = get_activation(activation_s);
-    int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0);
-    int xnor = option_find_int_quiet(options, "xnor", 0);
-
-    layer l = make_crnn_layer(params.batch, params.h, params.w, params.c, hidden_filters, output_filters, groups, params.time_steps, size, stride, dilation, padding, activation, batch_normalize, xnor, params.train);
-
-    l.shortcut = option_find_int_quiet(options, "shortcut", 0);
-
-    return l;
-}
-
-layer parse_rnn(list *options, size_params params)
-{
-    int output = option_find_int(options, "output",1);
-    int hidden = option_find_int(options, "hidden",1);
-    char *activation_s = option_find_str(options, "activation", "logistic");
-    ACTIVATION activation = get_activation(activation_s);
-    int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0);
-    int logistic = option_find_int_quiet(options, "logistic", 0);
-
-    layer l = make_rnn_layer(params.batch, params.inputs, hidden, output, params.time_steps, activation, batch_normalize, logistic);
-
-    l.shortcut = option_find_int_quiet(options, "shortcut", 0);
-
-    return l;
-}
-
-layer parse_gru(list *options, size_params params)
-{
-    int output = option_find_int(options, "output",1);
-    int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0);
-
-    layer l = make_gru_layer(params.batch, params.inputs, output, params.time_steps, batch_normalize);
-
-    return l;
-}
-
-layer parse_lstm(list *options, size_params params)
-{
-    int output = option_find_int(options, "output",1);
-    int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0);
-
-    layer l = make_lstm_layer(params.batch, params.inputs, output, params.time_steps, batch_normalize);
-
-    return l;
-}
-
-layer parse_conv_lstm(list *options, size_params params)
-{
-    // a ConvLSTM with a larger transitional kernel should be able to capture faster motions
-    int size = option_find_int_quiet(options, "size", 3);
-    int stride = option_find_int_quiet(options, "stride", 1);
-    int dilation = option_find_int_quiet(options, "dilation", 1);
-    int pad = option_find_int_quiet(options, "pad", 0);
-    int padding = option_find_int_quiet(options, "padding", 0);
-    if (pad) padding = size / 2;
-
-    int output_filters = option_find_int(options, "output", 1);
-    int groups = option_find_int_quiet(options, "groups", 1);
-    char *activation_s = option_find_str(options, "activation", "LINEAR");
-    ACTIVATION activation = get_activation(activation_s);
-    int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0);
-    int xnor = option_find_int_quiet(options, "xnor", 0);
-    int peephole = option_find_int_quiet(options, "peephole", 0);
-
-    layer l = make_conv_lstm_layer(params.batch, params.h, params.w, params.c, output_filters, groups, params.time_steps, size, stride, dilation, padding, activation, batch_normalize, peephole, xnor, params.train);
-
-    l.state_constrain = option_find_int_quiet(options, "state_constrain", params.time_steps * 32);
-    l.shortcut = option_find_int_quiet(options, "shortcut", 0);
-
-    return l;
-}
-
-connected_layer parse_connected(list *options, size_params params)
-{
-    int output = option_find_int(options, "output",1);
-    char *activation_s = option_find_str(options, "activation", "logistic");
-    ACTIVATION activation = get_activation(activation_s);
-    int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0);
-
-    connected_layer layer = make_connected_layer(params.batch, 1, params.inputs, output, activation, batch_normalize);
-
-    return layer;
-}
-
-softmax_layer parse_softmax(list *options, size_params params)
-{
-	int groups = option_find_int_quiet(options, "groups", 1);
-	softmax_layer layer = make_softmax_layer(params.batch, params.inputs, groups);
-	layer.temperature = option_find_float_quiet(options, "temperature", 1);
-	char *tree_file = option_find_str(options, "tree", 0);
-	if (tree_file) layer.softmax_tree = read_tree(tree_file);
-	layer.w = params.w;
-	layer.h = params.h;
-	layer.c = params.c;
-	layer.spatial = option_find_float_quiet(options, "spatial", 0);
-	layer.noloss = option_find_int_quiet(options, "noloss", 0);
-	return layer;
-}
-
-int *parse_yolo_mask(char *a, int *num)
-{
-    int *mask = 0;
-    if (a) {
-        int len = strlen(a);
-        int n = 1;
-        int i;
-        for (i = 0; i < len; ++i) {
-            if (a[i] == ',') ++n;
-        }
-        mask = (int*)xcalloc(n, sizeof(int));
-        for (i = 0; i < n; ++i) {
-            int val = atoi(a);
-            mask[i] = val;
-            a = strchr(a, ',') + 1;
-        }
-        *num = n;
-    }
-    return mask;
-}
-
-float *get_classes_multipliers(char *cpc, const int classes)
-{
-    float *classes_multipliers = NULL;
-    if (cpc) {
-        int classes_counters = classes;
-        int *counters_per_class = parse_yolo_mask(cpc, &classes_counters);
-        if (classes_counters != classes) {
-            printf(" number of values in counters_per_class = %d doesn't match with classes = %d \n", classes_counters, classes);
-            exit(0);
-        }
-        float max_counter = 0;
-        int i;
-        for (i = 0; i < classes_counters; ++i) if (max_counter < counters_per_class[i]) max_counter = counters_per_class[i];
-        classes_multipliers = (float *)calloc(classes_counters, sizeof(float));
-        for (i = 0; i < classes_counters; ++i) classes_multipliers[i] = max_counter / counters_per_class[i];
-        free(counters_per_class);
-        printf(" classes_multipliers: ");
-        for (i = 0; i < classes_counters; ++i) printf("%.1f, ", classes_multipliers[i]);
-        printf("\n");
-    }
-    return classes_multipliers;
-}
-
-layer parse_yolo(list *options, size_params params)
-{
-    int classes = option_find_int(options, "classes", 20);
-    int total = option_find_int(options, "num", 1);
-    int num = total;
-    char *a = option_find_str(options, "mask", 0);
-    int *mask = parse_yolo_mask(a, &num);
-    int max_boxes = option_find_int_quiet(options, "max", 90);
-    layer l = make_yolo_layer(params.batch, params.w, params.h, num, total, mask, classes, max_boxes);
-    if (l.outputs != params.inputs) {
-        printf("Error: l.outputs == params.inputs \n");
-        printf("filters= in the [convolutional]-layer doesn't correspond to classes= or mask= in [yolo]-layer \n");
-        exit(EXIT_FAILURE);
-    }
-    //assert(l.outputs == params.inputs);
-
-    char *cpc = option_find_str(options, "counters_per_class", 0);
-    l.classes_multipliers = get_classes_multipliers(cpc, classes);
-
-    l.label_smooth_eps = option_find_float_quiet(options, "label_smooth_eps", 0.0f);
-    l.scale_x_y = option_find_float_quiet(options, "scale_x_y", 1);
-    l.objectness_smooth = option_find_int_quiet(options, "objectness_smooth", 0);
-    l.max_delta = option_find_float_quiet(options, "max_delta", FLT_MAX);   // set 10
-    l.iou_normalizer = option_find_float_quiet(options, "iou_normalizer", 0.75);
-    l.cls_normalizer = option_find_float_quiet(options, "cls_normalizer", 1);
-    char *iou_loss = option_find_str_quiet(options, "iou_loss", "mse");   //  "iou");
-
-    if (strcmp(iou_loss, "mse") == 0) l.iou_loss = MSE;
-    else if (strcmp(iou_loss, "giou") == 0) l.iou_loss = GIOU;
-    else if (strcmp(iou_loss, "diou") == 0) l.iou_loss = DIOU;
-    else if (strcmp(iou_loss, "ciou") == 0) l.iou_loss = CIOU;
-    else l.iou_loss = IOU;
-    fprintf(stderr, "[yolo] params: iou loss: %s (%d), iou_norm: %2.2f, cls_norm: %2.2f, scale_x_y: %2.2f\n",
-        iou_loss, l.iou_loss, l.iou_normalizer, l.cls_normalizer, l.scale_x_y);
-
-    char *iou_thresh_kind_str = option_find_str_quiet(options, "iou_thresh_kind", "iou");
-    if (strcmp(iou_thresh_kind_str, "iou") == 0) l.iou_thresh_kind = IOU;
-    else if (strcmp(iou_thresh_kind_str, "giou") == 0) l.iou_thresh_kind = GIOU;
-    else if (strcmp(iou_thresh_kind_str, "diou") == 0) l.iou_thresh_kind = DIOU;
-    else if (strcmp(iou_thresh_kind_str, "ciou") == 0) l.iou_thresh_kind = CIOU;
-    else {
-        fprintf(stderr, " Wrong iou_thresh_kind = %s \n", iou_thresh_kind_str);
-        l.iou_thresh_kind = IOU;
-    }
-
-    l.beta_nms = option_find_float_quiet(options, "beta_nms", 0.6);
-    char *nms_kind = option_find_str_quiet(options, "nms_kind", "default");
-    if (strcmp(nms_kind, "default") == 0) l.nms_kind = DEFAULT_NMS;
-    else {
-        if (strcmp(nms_kind, "greedynms") == 0) l.nms_kind = GREEDY_NMS;
-        else if (strcmp(nms_kind, "diounms") == 0) l.nms_kind = DIOU_NMS;
-        else l.nms_kind = DEFAULT_NMS;
-        printf("nms_kind: %s (%d), beta = %f \n", nms_kind, l.nms_kind, l.beta_nms);
-    }
-
-    l.jitter = option_find_float(options, "jitter", .2);
-    l.resize = option_find_float_quiet(options, "resize", 1.0);
-    l.focal_loss = option_find_int_quiet(options, "focal_loss", 0);
-
-    l.ignore_thresh = option_find_float(options, "ignore_thresh", .5);
-    l.truth_thresh = option_find_float(options, "truth_thresh", 1);
-    l.iou_thresh = option_find_float_quiet(options, "iou_thresh", 1); // recommended to use iou_thresh=0.213 in [yolo]
-    l.random = option_find_float_quiet(options, "random", 0);
-
-    char *map_file = option_find_str(options, "map", 0);
-    if (map_file) l.map = read_map(map_file);
-
-    a = option_find_str(options, "anchors", 0);
-    if (a) {
-        int len = strlen(a);
-        int n = 1;
-        int i;
-        for (i = 0; i < len; ++i) {
-            if (a[i] == ',') ++n;
-        }
-        for (i = 0; i < n && i < total*2; ++i) {
-            float bias = atof(a);
-            l.biases[i] = bias;
-            a = strchr(a, ',') + 1;
-        }
-    }
-    return l;
-}
-
-
-int *parse_gaussian_yolo_mask(char *a, int *num) // Gaussian_YOLOv3
-{
-    int *mask = 0;
-    if (a) {
-        int len = strlen(a);
-        int n = 1;
-        int i;
-        for (i = 0; i < len; ++i) {
-            if (a[i] == ',') ++n;
-        }
-        mask = (int *)calloc(n, sizeof(int));
-        for (i = 0; i < n; ++i) {
-            int val = atoi(a);
-            mask[i] = val;
-            a = strchr(a, ',') + 1;
-        }
-        *num = n;
-    }
-    return mask;
-}
-
-
-layer parse_gaussian_yolo(list *options, size_params params) // Gaussian_YOLOv3
-{
-    int classes = option_find_int(options, "classes", 20);
-    int max_boxes = option_find_int_quiet(options, "max", 90);
-    int total = option_find_int(options, "num", 1);
-    int num = total;
-
-    char *a = option_find_str(options, "mask", 0);
-    int *mask = parse_gaussian_yolo_mask(a, &num);
-    layer l = make_gaussian_yolo_layer(params.batch, params.w, params.h, num, total, mask, classes, max_boxes);
-    if (l.outputs != params.inputs) {
-        printf("Error: l.outputs == params.inputs \n");
-        printf("filters= in the [convolutional]-layer doesn't correspond to classes= or mask= in [Gaussian_yolo]-layer \n");
-        exit(EXIT_FAILURE);
-    }
-    //assert(l.outputs == params.inputs);
-
-    char *cpc = option_find_str(options, "counters_per_class", 0);
-    l.classes_multipliers = get_classes_multipliers(cpc, classes);
-
-    l.label_smooth_eps = option_find_float_quiet(options, "label_smooth_eps", 0.0f);
-    l.scale_x_y = option_find_float_quiet(options, "scale_x_y", 1);
-    l.objectness_smooth = option_find_int_quiet(options, "objectness_smooth", 0);
-    l.max_delta = option_find_float_quiet(options, "max_delta", FLT_MAX);   // set 10
-    l.uc_normalizer = option_find_float_quiet(options, "uc_normalizer", 1.0);
-    l.iou_normalizer = option_find_float_quiet(options, "iou_normalizer", 0.75);
-    l.cls_normalizer = option_find_float_quiet(options, "cls_normalizer", 1.0);
-    char *iou_loss = option_find_str_quiet(options, "iou_loss", "mse");   //  "iou");
-
-    if (strcmp(iou_loss, "mse") == 0) l.iou_loss = MSE;
-    else if (strcmp(iou_loss, "giou") == 0) l.iou_loss = GIOU;
-    else if (strcmp(iou_loss, "diou") == 0) l.iou_loss = DIOU;
-    else if (strcmp(iou_loss, "ciou") == 0) l.iou_loss = CIOU;
-    else l.iou_loss = IOU;
-
-    char *iou_thresh_kind_str = option_find_str_quiet(options, "iou_thresh_kind", "iou");
-    if (strcmp(iou_thresh_kind_str, "iou") == 0) l.iou_thresh_kind = IOU;
-    else if (strcmp(iou_thresh_kind_str, "giou") == 0) l.iou_thresh_kind = GIOU;
-    else if (strcmp(iou_thresh_kind_str, "diou") == 0) l.iou_thresh_kind = DIOU;
-    else if (strcmp(iou_thresh_kind_str, "ciou") == 0) l.iou_thresh_kind = CIOU;
-    else {
-        fprintf(stderr, " Wrong iou_thresh_kind = %s \n", iou_thresh_kind_str);
-        l.iou_thresh_kind = IOU;
-    }
-
-    l.beta_nms = option_find_float_quiet(options, "beta_nms", 0.6);
-    char *nms_kind = option_find_str_quiet(options, "nms_kind", "default");
-    if (strcmp(nms_kind, "default") == 0) l.nms_kind = DEFAULT_NMS;
-    else {
-        if (strcmp(nms_kind, "greedynms") == 0) l.nms_kind = GREEDY_NMS;
-        else if (strcmp(nms_kind, "diounms") == 0) l.nms_kind = DIOU_NMS;
-        else if (strcmp(nms_kind, "cornersnms") == 0) l.nms_kind = CORNERS_NMS;
-        else l.nms_kind = DEFAULT_NMS;
-        printf("nms_kind: %s (%d), beta = %f \n", nms_kind, l.nms_kind, l.beta_nms);
-    }
-
-    char *yolo_point = option_find_str_quiet(options, "yolo_point", "center");
-    if (strcmp(yolo_point, "left_top") == 0) l.yolo_point = YOLO_LEFT_TOP;
-    else if (strcmp(yolo_point, "right_bottom") == 0) l.yolo_point = YOLO_RIGHT_BOTTOM;
-    else l.yolo_point = YOLO_CENTER;
-
-    fprintf(stderr, "[Gaussian_yolo] iou loss: %s (%d), iou_norm: %2.2f, cls_norm: %2.2f, scale: %2.2f, point: %d\n",
-        iou_loss, l.iou_loss, l.iou_normalizer, l.cls_normalizer, l.scale_x_y, l.yolo_point);
-
-    l.jitter = option_find_float(options, "jitter", .2);
-    l.resize = option_find_float_quiet(options, "resize", 1.0);
-
-    l.ignore_thresh = option_find_float(options, "ignore_thresh", .5);
-    l.truth_thresh = option_find_float(options, "truth_thresh", 1);
-    l.iou_thresh = option_find_float_quiet(options, "iou_thresh", 1); // recommended to use iou_thresh=0.213 in [yolo]
-    l.random = option_find_float_quiet(options, "random", 0);
-
-    char *map_file = option_find_str(options, "map", 0);
-    if (map_file) l.map = read_map(map_file);
-
-    a = option_find_str(options, "anchors", 0);
-    if (a) {
-        int len = strlen(a);
-        int n = 1;
-        int i;
-        for (i = 0; i < len; ++i) {
-            if (a[i] == ',') ++n;
-        }
-        for (i = 0; i < n; ++i) {
-            float bias = atof(a);
-            l.biases[i] = bias;
-            a = strchr(a, ',') + 1;
-        }
-    }
-    return l;
-}
-
-layer parse_region(list *options, size_params params)
-{
-    int coords = option_find_int(options, "coords", 4);
-    int classes = option_find_int(options, "classes", 20);
-    int num = option_find_int(options, "num", 1);
-    int max_boxes = option_find_int_quiet(options, "max", 90);
-
-    layer l = make_region_layer(params.batch, params.w, params.h, num, classes, coords, max_boxes);
-    if (l.outputs != params.inputs) {
-        printf("Error: l.outputs == params.inputs \n");
-        printf("filters= in the [convolutional]-layer doesn't correspond to classes= or num= in [region]-layer \n");
-        exit(EXIT_FAILURE);
-    }
-    //assert(l.outputs == params.inputs);
-
-    l.log = option_find_int_quiet(options, "log", 0);
-    l.sqrt = option_find_int_quiet(options, "sqrt", 0);
-
-    l.softmax = option_find_int(options, "softmax", 0);
-    l.focal_loss = option_find_int_quiet(options, "focal_loss", 0);
-    //l.max_boxes = option_find_int_quiet(options, "max",30);
-    l.jitter = option_find_float(options, "jitter", .2);
-    l.resize = option_find_float_quiet(options, "resize", 1.0);
-    l.rescore = option_find_int_quiet(options, "rescore",0);
-
-    l.thresh = option_find_float(options, "thresh", .5);
-    l.classfix = option_find_int_quiet(options, "classfix", 0);
-    l.absolute = option_find_int_quiet(options, "absolute", 0);
-    l.random = option_find_float_quiet(options, "random", 0);
-
-    l.coord_scale = option_find_float(options, "coord_scale", 1);
-    l.object_scale = option_find_float(options, "object_scale", 1);
-    l.noobject_scale = option_find_float(options, "noobject_scale", 1);
-    l.mask_scale = option_find_float(options, "mask_scale", 1);
-    l.class_scale = option_find_float(options, "class_scale", 1);
-    l.bias_match = option_find_int_quiet(options, "bias_match",0);
-
-    char *tree_file = option_find_str(options, "tree", 0);
-    if (tree_file) l.softmax_tree = read_tree(tree_file);
-    char *map_file = option_find_str(options, "map", 0);
-    if (map_file) l.map = read_map(map_file);
-
-    char *a = option_find_str(options, "anchors", 0);
-    if(a){
-        int len = strlen(a);
-        int n = 1;
-        int i;
-        for(i = 0; i < len; ++i){
-            if (a[i] == ',') ++n;
-        }
-        for(i = 0; i < n && i < num*2; ++i){
-            float bias = atof(a);
-            l.biases[i] = bias;
-            a = strchr(a, ',')+1;
-        }
-    }
-    return l;
-}
-detection_layer parse_detection(list *options, size_params params)
-{
-    int coords = option_find_int(options, "coords", 1);
-    int classes = option_find_int(options, "classes", 1);
-    int rescore = option_find_int(options, "rescore", 0);
-    int num = option_find_int(options, "num", 1);
-    int side = option_find_int(options, "side", 7);
-    detection_layer layer = make_detection_layer(params.batch, params.inputs, num, side, classes, coords, rescore);
-
-    layer.softmax = option_find_int(options, "softmax", 0);
-    layer.sqrt = option_find_int(options, "sqrt", 0);
-
-    layer.max_boxes = option_find_int_quiet(options, "max",30);
-    layer.coord_scale = option_find_float(options, "coord_scale", 1);
-    layer.forced = option_find_int(options, "forced", 0);
-    layer.object_scale = option_find_float(options, "object_scale", 1);
-    layer.noobject_scale = option_find_float(options, "noobject_scale", 1);
-    layer.class_scale = option_find_float(options, "class_scale", 1);
-    layer.jitter = option_find_float(options, "jitter", .2);
-    layer.resize = option_find_float_quiet(options, "resize", 1.0);
-    layer.random = option_find_float_quiet(options, "random", 0);
-    layer.reorg = option_find_int_quiet(options, "reorg", 0);
-    return layer;
-}
-
-cost_layer parse_cost(list *options, size_params params)
-{
-    char *type_s = option_find_str(options, "type", "sse");
-    COST_TYPE type = get_cost_type(type_s);
-    float scale = option_find_float_quiet(options, "scale",1);
-    cost_layer layer = make_cost_layer(params.batch, params.inputs, type, scale);
-    layer.ratio =  option_find_float_quiet(options, "ratio",0);
-    return layer;
-}
-
-crop_layer parse_crop(list *options, size_params params)
-{
-    int crop_height = option_find_int(options, "crop_height",1);
-    int crop_width = option_find_int(options, "crop_width",1);
-    int flip = option_find_int(options, "flip",0);
-    float angle = option_find_float(options, "angle",0);
-    float saturation = option_find_float(options, "saturation",1);
-    float exposure = option_find_float(options, "exposure",1);
-
-    int batch,h,w,c;
-    h = params.h;
-    w = params.w;
-    c = params.c;
-    batch=params.batch;
-    if(!(h && w && c)) error("Layer before crop layer must output image.");
-
-    int noadjust = option_find_int_quiet(options, "noadjust",0);
-
-    crop_layer l = make_crop_layer(batch,h,w,c,crop_height,crop_width,flip, angle, saturation, exposure);
-    l.shift = option_find_float(options, "shift", 0);
-    l.noadjust = noadjust;
-    return l;
-}
-
-layer parse_reorg(list *options, size_params params)
-{
-    int stride = option_find_int(options, "stride",1);
-    int reverse = option_find_int_quiet(options, "reverse",0);
-
-    int batch,h,w,c;
-    h = params.h;
-    w = params.w;
-    c = params.c;
-    batch=params.batch;
-    if(!(h && w && c)) error("Layer before reorg layer must output image.");
-
-    layer layer = make_reorg_layer(batch,w,h,c,stride,reverse);
-    return layer;
-}
-
-layer parse_reorg_old(list *options, size_params params)
-{
-    printf("\n reorg_old \n");
-    int stride = option_find_int(options, "stride", 1);
-    int reverse = option_find_int_quiet(options, "reverse", 0);
-
-    int batch, h, w, c;
-    h = params.h;
-    w = params.w;
-    c = params.c;
-    batch = params.batch;
-    if (!(h && w && c)) error("Layer before reorg layer must output image.");
-
-    layer layer = make_reorg_old_layer(batch, w, h, c, stride, reverse);
-    return layer;
-}
-
-maxpool_layer parse_local_avgpool(list *options, size_params params)
-{
-    int stride = option_find_int(options, "stride", 1);
-    int stride_x = option_find_int_quiet(options, "stride_x", stride);
-    int stride_y = option_find_int_quiet(options, "stride_y", stride);
-    int size = option_find_int(options, "size", stride);
-    int padding = option_find_int_quiet(options, "padding", size - 1);
-    int maxpool_depth = 0;
-    int out_channels = 1;
-    int antialiasing = 0;
-    const int avgpool = 1;
-
-    int batch, h, w, c;
-    h = params.h;
-    w = params.w;
-    c = params.c;
-    batch = params.batch;
-    if (!(h && w && c)) error("Layer before [local_avgpool] layer must output image.");
-
-    maxpool_layer layer = make_maxpool_layer(batch, h, w, c, size, stride_x, stride_y, padding, maxpool_depth, out_channels, antialiasing, avgpool, params.train);
-    return layer;
-}
-
-maxpool_layer parse_maxpool(list *options, size_params params)
-{
-    int stride = option_find_int(options, "stride",1);
-    int stride_x = option_find_int_quiet(options, "stride_x", stride);
-    int stride_y = option_find_int_quiet(options, "stride_y", stride);
-    int size = option_find_int(options, "size",stride);
-    int padding = option_find_int_quiet(options, "padding", size-1);
-    int maxpool_depth = option_find_int_quiet(options, "maxpool_depth", 0);
-    int out_channels = option_find_int_quiet(options, "out_channels", 1);
-    int antialiasing = option_find_int_quiet(options, "antialiasing", 0);
-    const int avgpool = 0;
-
-    int batch,h,w,c;
-    h = params.h;
-    w = params.w;
-    c = params.c;
-    batch=params.batch;
-    if(!(h && w && c)) error("Layer before [maxpool] layer must output image.");
-
-    maxpool_layer layer = make_maxpool_layer(batch, h, w, c, size, stride_x, stride_y, padding, maxpool_depth, out_channels, antialiasing, avgpool, params.train);
-    return layer;
-}
-
-avgpool_layer parse_avgpool(list *options, size_params params)
-{
-    int batch,w,h,c;
-    w = params.w;
-    h = params.h;
-    c = params.c;
-    batch=params.batch;
-    if(!(h && w && c)) error("Layer before avgpool layer must output image.");
-
-    avgpool_layer layer = make_avgpool_layer(batch,w,h,c);
-    return layer;
-}
-
-dropout_layer parse_dropout(list *options, size_params params)
-{
-    float probability = option_find_float(options, "probability", .2);
-    int dropblock = option_find_int_quiet(options, "dropblock", 0);
-    float dropblock_size_rel = option_find_float_quiet(options, "dropblock_size_rel", 0);
-    int dropblock_size_abs = option_find_float_quiet(options, "dropblock_size_abs", 0);
-    if (dropblock_size_abs > params.w || dropblock_size_abs > params.h) {
-        printf(" [dropout] - dropblock_size_abs = %d that is bigger than layer size %d x %d \n", dropblock_size_abs, params.w, params.h);
-        dropblock_size_abs = min_val_cmp(params.w, params.h);
-    }
-    if (dropblock && !dropblock_size_rel && !dropblock_size_abs) {
-        printf(" [dropout] - None of the parameters (dropblock_size_rel or dropblock_size_abs) are set, will be used: dropblock_size_abs = 7 \n");
-        dropblock_size_abs = 7;
-    }
-    if (dropblock_size_rel && dropblock_size_abs) {
-        printf(" [dropout] - Both parameters are set, only the parameter will be used: dropblock_size_abs = %d \n", dropblock_size_abs);
-        dropblock_size_rel = 0;
-    }
-    dropout_layer layer = make_dropout_layer(params.batch, params.inputs, probability, dropblock, dropblock_size_rel, dropblock_size_abs, params.w, params.h, params.c);
-    layer.out_w = params.w;
-    layer.out_h = params.h;
-    layer.out_c = params.c;
-    return layer;
-}
-
-layer parse_normalization(list *options, size_params params)
-{
-    float alpha = option_find_float(options, "alpha", .0001);
-    float beta =  option_find_float(options, "beta" , .75);
-    float kappa = option_find_float(options, "kappa", 1);
-    int size = option_find_int(options, "size", 5);
-    layer l = make_normalization_layer(params.batch, params.w, params.h, params.c, size, alpha, beta, kappa);
-    return l;
-}
-
-layer parse_batchnorm(list *options, size_params params)
-{
-    layer l = make_batchnorm_layer(params.batch, params.w, params.h, params.c, params.train);
-    return l;
-}
-
-layer parse_shortcut(list *options, size_params params, network net)
-{
-    char *activation_s = option_find_str(options, "activation", "linear");
-    ACTIVATION activation = get_activation(activation_s);
-
-    char *weights_type_str = option_find_str_quiet(options, "weights_type", "none");
-    WEIGHTS_TYPE_T weights_type = NO_WEIGHTS;
-    if(strcmp(weights_type_str, "per_feature") == 0 || strcmp(weights_type_str, "per_layer") == 0) weights_type = PER_FEATURE;
-    else if (strcmp(weights_type_str, "per_channel") == 0) weights_type = PER_CHANNEL;
-    else if (strcmp(weights_type_str, "none") != 0) {
-        printf("Error: Incorrect weights_type = %s \n Use one of: none, per_feature, per_channel \n", weights_type_str);
-        getchar();
-        exit(0);
-    }
-
-    char *weights_normalization_str = option_find_str_quiet(options, "weights_normalization", "none");
-    WEIGHTS_NORMALIZATION_T weights_normalization = NO_NORMALIZATION;
-    if (strcmp(weights_normalization_str, "relu") == 0 || strcmp(weights_normalization_str, "avg_relu") == 0) weights_normalization = RELU_NORMALIZATION;
-    else if (strcmp(weights_normalization_str, "softmax") == 0) weights_normalization = SOFTMAX_NORMALIZATION;
-    else if (strcmp(weights_type_str, "none") != 0) {
-        printf("Error: Incorrect weights_normalization = %s \n Use one of: none, relu, softmax \n", weights_normalization_str);
-        getchar();
-        exit(0);
-    }
-
-    char *l = option_find(options, "from");
-    int len = strlen(l);
-    if (!l) error("Route Layer must specify input layers: from = ...");
-    int n = 1;
-    int i;
-    for (i = 0; i < len; ++i) {
-        if (l[i] == ',') ++n;
-    }
-
-    int* layers = (int*)calloc(n, sizeof(int));
-    int* sizes = (int*)calloc(n, sizeof(int));
-    float **layers_output = (float **)calloc(n, sizeof(float *));
-    float **layers_delta = (float **)calloc(n, sizeof(float *));
-    float **layers_output_gpu = (float **)calloc(n, sizeof(float *));
-    float **layers_delta_gpu = (float **)calloc(n, sizeof(float *));
-
-    for (i = 0; i < n; ++i) {
-        int index = atoi(l);
-        l = strchr(l, ',') + 1;
-        if (index < 0) index = params.index + index;
-        layers[i] = index;
-        sizes[i] = params.net.layers[index].outputs;
-        layers_output[i] = params.net.layers[index].output;
-        layers_delta[i] = params.net.layers[index].delta;
-    }
-
-#ifdef GPU
-    for (i = 0; i < n; ++i) {
-        layers_output_gpu[i] = params.net.layers[layers[i]].output_gpu;
-        layers_delta_gpu[i] = params.net.layers[layers[i]].delta_gpu;
-    }
-#endif// GPU
-
-    layer s = make_shortcut_layer(params.batch, n, layers, sizes, params.w, params.h, params.c, layers_output, layers_delta,
-        layers_output_gpu, layers_delta_gpu, weights_type, weights_normalization, activation, params.train);
-
-    free(layers_output_gpu);
-    free(layers_delta_gpu);
-
-    for (i = 0; i < n; ++i) {
-        int index = layers[i];
-        assert(params.w == net.layers[index].out_w && params.h == net.layers[index].out_h);
-
-        if (params.w != net.layers[index].out_w || params.h != net.layers[index].out_h || params.c != net.layers[index].out_c)
-            fprintf(stderr, " (%4d x%4d x%4d) + (%4d x%4d x%4d) \n",
-                params.w, params.h, params.c, net.layers[index].out_w, net.layers[index].out_h, params.net.layers[index].out_c);
-    }
-
-    return s;
-}
-
-
-layer parse_scale_channels(list *options, size_params params, network net)
-{
-    char *l = option_find(options, "from");
-    int index = atoi(l);
-    if (index < 0) index = params.index + index;
-    int scale_wh = option_find_int_quiet(options, "scale_wh", 0);
-
-    int batch = params.batch;
-    layer from = net.layers[index];
-
-    layer s = make_scale_channels_layer(batch, index, params.w, params.h, params.c, from.out_w, from.out_h, from.out_c, scale_wh);
-
-    char *activation_s = option_find_str_quiet(options, "activation", "linear");
-    ACTIVATION activation = get_activation(activation_s);
-    s.activation = activation;
-    if (activation == SWISH || activation == MISH) {
-        printf(" [scale_channels] layer doesn't support SWISH or MISH activations \n");
-    }
-    return s;
-}
-
-layer parse_sam(list *options, size_params params, network net)
-{
-    char *l = option_find(options, "from");
-    int index = atoi(l);
-    if (index < 0) index = params.index + index;
-
-    int batch = params.batch;
-    layer from = net.layers[index];
-
-    layer s = make_sam_layer(batch, index, params.w, params.h, params.c, from.out_w, from.out_h, from.out_c);
-
-    char *activation_s = option_find_str_quiet(options, "activation", "linear");
-    ACTIVATION activation = get_activation(activation_s);
-    s.activation = activation;
-    if (activation == SWISH || activation == MISH) {
-        printf(" [sam] layer doesn't support SWISH or MISH activations \n");
-    }
-    return s;
-}
-
-
-layer parse_activation(list *options, size_params params)
-{
-    char *activation_s = option_find_str(options, "activation", "linear");
-    ACTIVATION activation = get_activation(activation_s);
-
-    layer l = make_activation_layer(params.batch, params.inputs, activation);
-
-    l.out_h = params.h;
-    l.out_w = params.w;
-    l.out_c = params.c;
-    l.h = params.h;
-    l.w = params.w;
-    l.c = params.c;
-
-    return l;
-}
-
-layer parse_upsample(list *options, size_params params, network net)
-{
-
-    int stride = option_find_int(options, "stride", 2);
-    layer l = make_upsample_layer(params.batch, params.w, params.h, params.c, stride);
-    l.scale = option_find_float_quiet(options, "scale", 1);
-    return l;
-}
-
-route_layer parse_route(list *options, size_params params)
-{
-    char *l = option_find(options, "layers");
-    if(!l) error("Route Layer must specify input layers");
-    int len = strlen(l);
-    int n = 1;
-    int i;
-    for(i = 0; i < len; ++i){
-        if (l[i] == ',') ++n;
-    }
-
-    int* layers = (int*)xcalloc(n, sizeof(int));
-    int* sizes = (int*)xcalloc(n, sizeof(int));
-    for(i = 0; i < n; ++i){
-        int index = atoi(l);
-        l = strchr(l, ',')+1;
-        if(index < 0) index = params.index + index;
-        layers[i] = index;
-        sizes[i] = params.net.layers[index].outputs;
-    }
-    int batch = params.batch;
-
-    int groups = option_find_int_quiet(options, "groups", 1);
-    int group_id = option_find_int_quiet(options, "group_id", 0);
-
-    route_layer layer = make_route_layer(batch, n, layers, sizes, groups, group_id);
-
-    convolutional_layer first = params.net.layers[layers[0]];
-    layer.out_w = first.out_w;
-    layer.out_h = first.out_h;
-    layer.out_c = first.out_c;
-    for(i = 1; i < n; ++i){
-        int index = layers[i];
-        convolutional_layer next = params.net.layers[index];
-        if(next.out_w == first.out_w && next.out_h == first.out_h){
-            layer.out_c += next.out_c;
-        }else{
-            fprintf(stderr, " The width and height of the input layers are different. \n");
-            layer.out_h = layer.out_w = layer.out_c = 0;
-        }
-    }
-    layer.out_c = layer.out_c / layer.groups;
-
-    layer.w = first.w;
-    layer.h = first.h;
-    layer.c = layer.out_c;
-
-    if (n > 3) fprintf(stderr, " \t    ");
-    else if (n > 1) fprintf(stderr, " \t            ");
-    else fprintf(stderr, " \t\t            ");
-
-    fprintf(stderr, "           ");
-    if (layer.groups > 1) fprintf(stderr, "%d/%d", layer.group_id, layer.groups);
-    else fprintf(stderr, "   ");
-    fprintf(stderr, " -> %4d x%4d x%4d \n", layer.out_w, layer.out_h, layer.out_c);
-
-    return layer;
-}
-
-learning_rate_policy get_policy(char *s)
-{
-    if (strcmp(s, "random")==0) return RANDOM;
-    if (strcmp(s, "poly")==0) return POLY;
-    if (strcmp(s, "constant")==0) return CONSTANT;
-    if (strcmp(s, "step")==0) return STEP;
-    if (strcmp(s, "exp")==0) return EXP;
-    if (strcmp(s, "sigmoid")==0) return SIG;
-    if (strcmp(s, "steps")==0) return STEPS;
-    if (strcmp(s, "sgdr")==0) return SGDR;
-    fprintf(stderr, "Couldn't find policy %s, going with constant\n", s);
-    return CONSTANT;
-}
-
-void parse_net_options(list *options, network *net)
-{
-    net->max_batches = option_find_int(options, "max_batches", 0);
-    net->batch = option_find_int(options, "batch",1);
-    net->learning_rate = option_find_float(options, "learning_rate", .001);
-    net->learning_rate_min = option_find_float_quiet(options, "learning_rate_min", .00001);
-    net->batches_per_cycle = option_find_int_quiet(options, "sgdr_cycle", net->max_batches);
-    net->batches_cycle_mult = option_find_int_quiet(options, "sgdr_mult", 2);
-    net->momentum = option_find_float(options, "momentum", .9);
-    net->decay = option_find_float(options, "decay", .0001);
-    int subdivs = option_find_int(options, "subdivisions",1);
-    net->time_steps = option_find_int_quiet(options, "time_steps",1);
-    net->track = option_find_int_quiet(options, "track", 0);
-    net->augment_speed = option_find_int_quiet(options, "augment_speed", 2);
-    net->init_sequential_subdivisions = net->sequential_subdivisions = option_find_int_quiet(options, "sequential_subdivisions", subdivs);
-    if (net->sequential_subdivisions > subdivs) net->init_sequential_subdivisions = net->sequential_subdivisions = subdivs;
-    net->try_fix_nan = option_find_int_quiet(options, "try_fix_nan", 0);
-    net->batch /= subdivs;
-    net->batch *= net->time_steps;
-    net->subdivisions = subdivs;
-
-    *net->seen = 0;
-    *net->cur_iteration = 0;
-    net->loss_scale = option_find_float_quiet(options, "loss_scale", 1);
-    net->dynamic_minibatch = option_find_int_quiet(options, "dynamic_minibatch", 0);
-    net->optimized_memory = option_find_int_quiet(options, "optimized_memory", 0);
-    net->workspace_size_limit = (size_t)1024*1024 * option_find_float_quiet(options, "workspace_size_limit_MB", 1024);  // 1024 MB by default
-
-    net->adam = option_find_int_quiet(options, "adam", 0);
-    if(net->adam){
-        net->B1 = option_find_float(options, "B1", .9);
-        net->B2 = option_find_float(options, "B2", .999);
-        net->eps = option_find_float(options, "eps", .000001);
-    }
-
-    net->h = option_find_int_quiet(options, "height",0);
-    net->w = option_find_int_quiet(options, "width",0);
-    net->c = option_find_int_quiet(options, "channels",0);
-    net->inputs = option_find_int_quiet(options, "inputs", net->h * net->w * net->c);
-    net->max_crop = option_find_int_quiet(options, "max_crop",net->w*2);
-    net->min_crop = option_find_int_quiet(options, "min_crop",net->w);
-    net->flip = option_find_int_quiet(options, "flip", 1);
-    net->blur = option_find_int_quiet(options, "blur", 0);
-    net->gaussian_noise = option_find_int_quiet(options, "gaussian_noise", 0);
-    net->mixup = option_find_int_quiet(options, "mixup", 0);
-    int cutmix = option_find_int_quiet(options, "cutmix", 0);
-    int mosaic = option_find_int_quiet(options, "mosaic", 0);
-    if (mosaic && cutmix) net->mixup = 4;
-    else if (cutmix) net->mixup = 2;
-    else if (mosaic) net->mixup = 3;
-    net->letter_box = option_find_int_quiet(options, "letter_box", 0);
-    net->label_smooth_eps = option_find_float_quiet(options, "label_smooth_eps", 0.0f);
-    net->resize_step = option_find_float_quiet(options, "resize_step", 32);
-    net->attention = option_find_int_quiet(options, "attention", 0);
-    net->adversarial_lr = option_find_float_quiet(options, "adversarial_lr", 0);
-
-    net->angle = option_find_float_quiet(options, "angle", 0);
-    net->aspect = option_find_float_quiet(options, "aspect", 1);
-    net->saturation = option_find_float_quiet(options, "saturation", 1);
-    net->exposure = option_find_float_quiet(options, "exposure", 1);
-    net->hue = option_find_float_quiet(options, "hue", 0);
-    net->power = option_find_float_quiet(options, "power", 4);
-
-    if(!net->inputs && !(net->h && net->w && net->c)) error("No input parameters supplied");
-
-    char *policy_s = option_find_str(options, "policy", "constant");
-    net->policy = get_policy(policy_s);
-    net->burn_in = option_find_int_quiet(options, "burn_in", 0);
-#ifdef GPU
-    if (net->gpu_index >= 0) {
-        char device_name[1024];
-        int compute_capability = get_gpu_compute_capability(net->gpu_index, device_name);
-#ifdef CUDNN_HALF
-        if (compute_capability >= 700) net->cudnn_half = 1;
-        else net->cudnn_half = 0;
-#endif// CUDNN_HALF
-        fprintf(stderr, " %d : compute_capability = %d, cudnn_half = %d, GPU: %s \n", net->gpu_index, compute_capability, net->cudnn_half, device_name);
-    }
-    else fprintf(stderr, " GPU isn't used \n");
-#endif// GPU
-    if(net->policy == STEP){
-        net->step = option_find_int(options, "step", 1);
-        net->scale = option_find_float(options, "scale", 1);
-    } else if (net->policy == STEPS || net->policy == SGDR){
-        char *l = option_find(options, "steps");
-        char *p = option_find(options, "scales");
-        char *s = option_find(options, "seq_scales");
-        if(net->policy == STEPS && (!l || !p)) error("STEPS policy must have steps and scales in cfg file");
-
-        if (l) {
-            int len = strlen(l);
-            int n = 1;
-            int i;
-            for (i = 0; i < len; ++i) {
-                if (l[i] == ',') ++n;
-            }
-            int* steps = (int*)xcalloc(n, sizeof(int));
-            float* scales = (float*)xcalloc(n, sizeof(float));
-            float* seq_scales = (float*)xcalloc(n, sizeof(float));
-            for (i = 0; i < n; ++i) {
-                float scale = 1.0;
-                if (p) {
-                    scale = atof(p);
-                    p = strchr(p, ',') + 1;
-                }
-                float sequence_scale = 1.0;
-                if (s) {
-                    sequence_scale = atof(s);
-                    s = strchr(s, ',') + 1;
-                }
-                int step = atoi(l);
-                l = strchr(l, ',') + 1;
-                steps[i] = step;
-                scales[i] = scale;
-                seq_scales[i] = sequence_scale;
-            }
-            net->scales = scales;
-            net->steps = steps;
-            net->seq_scales = seq_scales;
-            net->num_steps = n;
-        }
-    } else if (net->policy == EXP){
-        net->gamma = option_find_float(options, "gamma", 1);
-    } else if (net->policy == SIG){
-        net->gamma = option_find_float(options, "gamma", 1);
-        net->step = option_find_int(options, "step", 1);
-    } else if (net->policy == POLY || net->policy == RANDOM){
-        //net->power = option_find_float(options, "power", 1);
-    }
-
-}
-
-int is_network(section *s)
-{
-    return (strcmp(s->type, "[net]")==0
-            || strcmp(s->type, "[network]")==0);
-}
-
-void set_train_only_bn(network net)
-{
-    int train_only_bn = 0;
-    int i;
-    for (i = net.n - 1; i >= 0; --i) {
-        if (net.layers[i].train_only_bn) train_only_bn = net.layers[i].train_only_bn;  // set l.train_only_bn for all previous layers
-        if (train_only_bn) {
-            net.layers[i].train_only_bn = train_only_bn;
-
-            if (net.layers[i].type == CONV_LSTM) {
-                net.layers[i].wf->train_only_bn = train_only_bn;
-                net.layers[i].wi->train_only_bn = train_only_bn;
-                net.layers[i].wg->train_only_bn = train_only_bn;
-                net.layers[i].wo->train_only_bn = train_only_bn;
-                net.layers[i].uf->train_only_bn = train_only_bn;
-                net.layers[i].ui->train_only_bn = train_only_bn;
-                net.layers[i].ug->train_only_bn = train_only_bn;
-                net.layers[i].uo->train_only_bn = train_only_bn;
-                if (net.layers[i].peephole) {
-                    net.layers[i].vf->train_only_bn = train_only_bn;
-                    net.layers[i].vi->train_only_bn = train_only_bn;
-                    net.layers[i].vo->train_only_bn = train_only_bn;
-                }
-            }
-            else if (net.layers[i].type == CRNN) {
-                net.layers[i].input_layer->train_only_bn = train_only_bn;
-                net.layers[i].self_layer->train_only_bn = train_only_bn;
-                net.layers[i].output_layer->train_only_bn = train_only_bn;
-            }
-        }
-    }
-}
-
-network parse_network_cfg(char *filename)
-{
-    return parse_network_cfg_custom(filename, 0, 0);
-}
-
-network parse_network_cfg_custom(char *filename, int batch, int time_steps)
-{
-    list *sections = read_cfg(filename);
-    node *n = sections->front;
-    if(!n) error("Config file has no sections");
-    network net = make_network(sections->size - 1);
-    net.gpu_index = gpu_index;
-    size_params params;
-
-    if (batch > 0) params.train = 0;    // allocates memory for Detection only
-    else params.train = 1;              // allocates memory for Detection & Training
-
-    section *s = (section *)n->val;
-    list *options = s->options;
-    if(!is_network(s)) error("First section must be [net] or [network]");
-    parse_net_options(options, &net);
-
-#ifdef GPU
-    printf("net.optimized_memory = %d \n", net.optimized_memory);
-    if (net.optimized_memory >= 2 && params.train) {
-        pre_allocate_pinned_memory((size_t)1024 * 1024 * 1024 * 8);   // pre-allocate 8 GB CPU-RAM for pinned memory
-    }
-#endif  // GPU
-
-    params.h = net.h;
-    params.w = net.w;
-    params.c = net.c;
-    params.inputs = net.inputs;
-    if (batch > 0) net.batch = batch;
-    if (time_steps > 0) net.time_steps = time_steps;
-    if (net.batch < 1) net.batch = 1;
-    if (net.time_steps < 1) net.time_steps = 1;
-    if (net.batch < net.time_steps) net.batch = net.time_steps;
-    params.batch = net.batch;
-    params.time_steps = net.time_steps;
-    params.net = net;
-    printf("mini_batch = %d, batch = %d, time_steps = %d, train = %d \n", net.batch, net.batch * net.subdivisions, net.time_steps, params.train);
-
-    int avg_outputs = 0;
-    int avg_counter = 0;
-    float bflops = 0;
-    size_t workspace_size = 0;
-    size_t max_inputs = 0;
-    size_t max_outputs = 0;
-    int receptive_w = 1, receptive_h = 1;
-    int receptive_w_scale = 1, receptive_h_scale = 1;
-    const int show_receptive_field = option_find_float_quiet(options, "show_receptive_field", 0);
-
-    n = n->next;
-    int count = 0;
-    free_section(s);
-    fprintf(stderr, "   layer   filters  size/strd(dil)      input                output\n");
-    while(n){
-        params.index = count;
-        fprintf(stderr, "%4d ", count);
-        s = (section *)n->val;
-        options = s->options;
-        layer l = { (LAYER_TYPE)0 };
-        LAYER_TYPE lt = string_to_layer_type(s->type);
-        if(lt == CONVOLUTIONAL){
-            l = parse_convolutional(options, params);
-        }else if(lt == LOCAL){
-            l = parse_local(options, params);
-        }else if(lt == ACTIVE){
-            l = parse_activation(options, params);
-        }else if(lt == RNN){
-            l = parse_rnn(options, params);
-        }else if(lt == GRU){
-            l = parse_gru(options, params);
-        }else if(lt == LSTM){
-            l = parse_lstm(options, params);
-        }else if (lt == CONV_LSTM) {
-            l = parse_conv_lstm(options, params);
-        }else if(lt == CRNN){
-            l = parse_crnn(options, params);
-        }else if(lt == CONNECTED){
-            l = parse_connected(options, params);
-        }else if(lt == CROP){
-            l = parse_crop(options, params);
-        }else if(lt == COST){
-            l = parse_cost(options, params);
-            l.keep_delta_gpu = 1;
-        }else if(lt == REGION){
-            l = parse_region(options, params);
-            l.keep_delta_gpu = 1;
-        }else if (lt == YOLO) {
-            l = parse_yolo(options, params);
-            l.keep_delta_gpu = 1;
-        }else if (lt == GAUSSIAN_YOLO) {
-            l = parse_gaussian_yolo(options, params);
-            l.keep_delta_gpu = 1;
-        }else if(lt == DETECTION){
-            l = parse_detection(options, params);
-        }else if(lt == SOFTMAX){
-            l = parse_softmax(options, params);
-            net.hierarchy = l.softmax_tree;
-            l.keep_delta_gpu = 1;
-        }else if(lt == NORMALIZATION){
-            l = parse_normalization(options, params);
-        }else if(lt == BATCHNORM){
-            l = parse_batchnorm(options, params);
-        }else if(lt == MAXPOOL){
-            l = parse_maxpool(options, params);
-        }else if (lt == LOCAL_AVGPOOL) {
-            l = parse_local_avgpool(options, params);
-        }else if(lt == REORG){
-            l = parse_reorg(options, params);        }
-        else if (lt == REORG_OLD) {
-            l = parse_reorg_old(options, params);
-        }else if(lt == AVGPOOL){
-            l = parse_avgpool(options, params);
-        }else if(lt == ROUTE){
-            l = parse_route(options, params);
-            int k;
-            for (k = 0; k < l.n; ++k) {
-                net.layers[l.input_layers[k]].use_bin_output = 0;
-                net.layers[l.input_layers[k]].keep_delta_gpu = 1;
-            }
-        }else if (lt == UPSAMPLE) {
-            l = parse_upsample(options, params, net);
-        }else if(lt == SHORTCUT){
-            l = parse_shortcut(options, params, net);
-            net.layers[count - 1].use_bin_output = 0;
-            net.layers[l.index].use_bin_output = 0;
-            net.layers[l.index].keep_delta_gpu = 1;
-        }else if (lt == SCALE_CHANNELS) {
-            l = parse_scale_channels(options, params, net);
-            net.layers[count - 1].use_bin_output = 0;
-            net.layers[l.index].use_bin_output = 0;
-            net.layers[l.index].keep_delta_gpu = 1;
-        }
-        else if (lt == SAM) {
-            l = parse_sam(options, params, net);
-            net.layers[count - 1].use_bin_output = 0;
-            net.layers[l.index].use_bin_output = 0;
-            net.layers[l.index].keep_delta_gpu = 1;
-        }else if(lt == DROPOUT){
-            l = parse_dropout(options, params);
-            l.output = net.layers[count-1].output;
-            l.delta = net.layers[count-1].delta;
-#ifdef GPU
-            l.output_gpu = net.layers[count-1].output_gpu;
-            l.delta_gpu = net.layers[count-1].delta_gpu;
-            l.keep_delta_gpu = 1;
-#endif
-        }
-        else if (lt == EMPTY) {
-            layer empty_layer = {(LAYER_TYPE)0};
-            empty_layer.out_w = params.w;
-            empty_layer.out_h = params.h;
-            empty_layer.out_c = params.c;
-            l = empty_layer;
-            l.output = net.layers[count - 1].output;
-            l.delta = net.layers[count - 1].delta;
-#ifdef GPU
-            l.output_gpu = net.layers[count - 1].output_gpu;
-            l.delta_gpu = net.layers[count - 1].delta_gpu;
-#endif
-        }else{
-            fprintf(stderr, "Type not recognized: %s\n", s->type);
-        }
-
-        // calculate receptive field
-        if(show_receptive_field)
-        {
-            int dilation = max_val_cmp(1, l.dilation);
-            int stride = max_val_cmp(1, l.stride);
-            int size = max_val_cmp(1, l.size);
-
-            if (l.type == UPSAMPLE || (l.type == REORG))
-            {
-
-                l.receptive_w = receptive_w;
-                l.receptive_h = receptive_h;
-                l.receptive_w_scale = receptive_w_scale = receptive_w_scale / stride;
-                l.receptive_h_scale = receptive_h_scale = receptive_h_scale / stride;
-
-            }
-            else {
-                if (l.type == ROUTE) {
-                    receptive_w = receptive_h = receptive_w_scale = receptive_h_scale = 0;
-                    int k;
-                    for (k = 0; k < l.n; ++k) {
-                        layer route_l = net.layers[l.input_layers[k]];
-                        receptive_w = max_val_cmp(receptive_w, route_l.receptive_w);
-                        receptive_h = max_val_cmp(receptive_h, route_l.receptive_h);
-                        receptive_w_scale = max_val_cmp(receptive_w_scale, route_l.receptive_w_scale);
-                        receptive_h_scale = max_val_cmp(receptive_h_scale, route_l.receptive_h_scale);
-                    }
-                }
-                else
-                {
-                    int increase_receptive = size + (dilation - 1) * 2 - 1;// stride;
-                    increase_receptive = max_val_cmp(0, increase_receptive);
-
-                    receptive_w += increase_receptive * receptive_w_scale;
-                    receptive_h += increase_receptive * receptive_h_scale;
-                    receptive_w_scale *= stride;
-                    receptive_h_scale *= stride;
-                }
-
-                l.receptive_w = receptive_w;
-                l.receptive_h = receptive_h;
-                l.receptive_w_scale = receptive_w_scale;
-                l.receptive_h_scale = receptive_h_scale;
-            }
-            //printf(" size = %d, dilation = %d, stride = %d, receptive_w = %d, receptive_w_scale = %d - ", size, dilation, stride, receptive_w, receptive_w_scale);
-
-            int cur_receptive_w = receptive_w;
-            int cur_receptive_h = receptive_h;
-
-            fprintf(stderr, "%4d - receptive field: %d x %d \n", count, cur_receptive_w, cur_receptive_h);
-        }
-
-#ifdef GPU
-        // futher GPU-memory optimization: net.optimized_memory == 2
-        if (net.optimized_memory >= 2 && params.train && l.type != DROPOUT)
-        {
-            l.optimized_memory = net.optimized_memory;
-            if (l.output_gpu) {
-                cuda_free(l.output_gpu);
-                //l.output_gpu = cuda_make_array_pinned(l.output, l.batch*l.outputs); // l.steps
-                l.output_gpu = cuda_make_array_pinned_preallocated(NULL, l.batch*l.outputs); // l.steps
-            }
-            if (l.activation_input_gpu) {
-                cuda_free(l.activation_input_gpu);
-                l.activation_input_gpu = cuda_make_array_pinned_preallocated(NULL, l.batch*l.outputs); // l.steps
-            }
-
-            if (l.x_gpu) {
-                cuda_free(l.x_gpu);
-                l.x_gpu = cuda_make_array_pinned_preallocated(NULL, l.batch*l.outputs); // l.steps
-            }
-
-            // maximum optimization
-            if (net.optimized_memory >= 3 && l.type != DROPOUT) {
-                if (l.delta_gpu) {
-                    cuda_free(l.delta_gpu);
-                    //l.delta_gpu = cuda_make_array_pinned_preallocated(NULL, l.batch*l.outputs); // l.steps
-                    //printf("\n\n PINNED DELTA GPU = %d \n", l.batch*l.outputs);
-                }
-            }
-
-            if (l.type == CONVOLUTIONAL) {
-                set_specified_workspace_limit(&l, net.workspace_size_limit);   // workspace size limit 1 GB
-            }
-        }
-#endif // GPU
-
-        l.clip = option_find_float_quiet(options, "clip", 0);
-        l.dynamic_minibatch = net.dynamic_minibatch;
-        l.onlyforward = option_find_int_quiet(options, "onlyforward", 0);
-        l.dont_update = option_find_int_quiet(options, "dont_update", 0);
-        l.burnin_update = option_find_int_quiet(options, "burnin_update", 0);
-        l.stopbackward = option_find_int_quiet(options, "stopbackward", 0);
-        l.train_only_bn = option_find_int_quiet(options, "train_only_bn", 0);
-        l.dontload = option_find_int_quiet(options, "dontload", 0);
-        l.dontloadscales = option_find_int_quiet(options, "dontloadscales", 0);
-        l.learning_rate_scale = option_find_float_quiet(options, "learning_rate", 1);
-        option_unused(options);
-        net.layers[count] = l;
-        if (l.workspace_size > workspace_size) workspace_size = l.workspace_size;
-        if (l.inputs > max_inputs) max_inputs = l.inputs;
-        if (l.outputs > max_outputs) max_outputs = l.outputs;
-        free_section(s);
-        n = n->next;
-        ++count;
-        if(n){
-            if (l.antialiasing) {
-                params.h = l.input_layer->out_h;
-                params.w = l.input_layer->out_w;
-                params.c = l.input_layer->out_c;
-                params.inputs = l.input_layer->outputs;
-            }
-            else {
-                params.h = l.out_h;
-                params.w = l.out_w;
-                params.c = l.out_c;
-                params.inputs = l.outputs;
-            }
-        }
-        if (l.bflops > 0) bflops += l.bflops;
-
-        if (l.w > 1 && l.h > 1) {
-            avg_outputs += l.outputs;
-            avg_counter++;
-        }
-    }
-    free_list(sections);
-
-#ifdef GPU
-    if (net.optimized_memory && params.train)
-    {
-        int k;
-        for (k = 0; k < net.n; ++k) {
-            layer l = net.layers[k];
-            // delta GPU-memory optimization: net.optimized_memory == 1
-            if (!l.keep_delta_gpu) {
-                const size_t delta_size = l.outputs*l.batch; // l.steps
-                if (net.max_delta_gpu_size < delta_size) {
-                    net.max_delta_gpu_size = delta_size;
-                    if (net.global_delta_gpu) cuda_free(net.global_delta_gpu);
-                    if (net.state_delta_gpu) cuda_free(net.state_delta_gpu);
-                    assert(net.max_delta_gpu_size > 0);
-                    net.global_delta_gpu = (float *)cuda_make_array(NULL, net.max_delta_gpu_size);
-                    net.state_delta_gpu = (float *)cuda_make_array(NULL, net.max_delta_gpu_size);
-                }
-                if (l.delta_gpu) {
-                    if (net.optimized_memory >= 3) {}
-                    else cuda_free(l.delta_gpu);
-                }
-                l.delta_gpu = net.global_delta_gpu;
-            }
-
-            // maximum optimization
-            if (net.optimized_memory >= 3 && l.type != DROPOUT) {
-                if (l.delta_gpu && l.keep_delta_gpu) {
-                    //cuda_free(l.delta_gpu);   // already called above
-                    l.delta_gpu = cuda_make_array_pinned_preallocated(NULL, l.batch*l.outputs); // l.steps
-                    //printf("\n\n PINNED DELTA GPU = %d \n", l.batch*l.outputs);
-                }
-            }
-
-            net.layers[k] = l;
-        }
-    }
-#endif
-
-    set_train_only_bn(net); // set l.train_only_bn for all required layers
-
-    net.outputs = get_network_output_size(net);
-    net.output = get_network_output(net);
-    avg_outputs = avg_outputs / avg_counter;
-    fprintf(stderr, "Total BFLOPS %5.3f \n", bflops);
-    fprintf(stderr, "avg_outputs = %d \n", avg_outputs);
-#ifdef GPU
-    get_cuda_stream();
-    get_cuda_memcpy_stream();
-    if (gpu_index >= 0)
-    {
-        int size = get_network_input_size(net) * net.batch;
-        net.input_state_gpu = cuda_make_array(0, size);
-        if (cudaSuccess == cudaHostAlloc(&net.input_pinned_cpu, size * sizeof(float), cudaHostRegisterMapped)) net.input_pinned_cpu_flag = 1;
-        else {
-            cudaGetLastError(); // reset CUDA-error
-            net.input_pinned_cpu = (float*)xcalloc(size, sizeof(float));
-        }
-
-        // pre-allocate memory for inference on Tensor Cores (fp16)
-        if (net.cudnn_half) {
-            *net.max_input16_size = max_inputs;
-            CHECK_CUDA(cudaMalloc((void **)net.input16_gpu, *net.max_input16_size * sizeof(short))); //sizeof(half)
-            *net.max_output16_size = max_outputs;
-            CHECK_CUDA(cudaMalloc((void **)net.output16_gpu, *net.max_output16_size * sizeof(short))); //sizeof(half)
-        }
-        if (workspace_size) {
-            fprintf(stderr, " Allocate additional workspace_size = %1.2f MB \n", (float)workspace_size/1000000);
-            net.workspace = cuda_make_array(0, workspace_size / sizeof(float) + 1);
-        }
-        else {
-            net.workspace = (float*)xcalloc(1, workspace_size);
-        }
-    }
-#else
-        if (workspace_size) {
-            net.workspace = (float*)xcalloc(1, workspace_size);
-        }
-#endif
-
-    LAYER_TYPE lt = net.layers[net.n - 1].type;
-    if ((net.w % 32 != 0 || net.h % 32 != 0) && (lt == YOLO || lt == REGION || lt == DETECTION)) {
-        printf("\n Warning: width=%d and height=%d in cfg-file must be divisible by 32 for default networks Yolo v1/v2/v3!!! \n\n",
-            net.w, net.h);
-    }
-    return net;
-}
-
-
-
-list *read_cfg(char *filename)
-{
-    FILE *file = fopen(filename, "r");
-    if(file == 0) file_error(filename);
-    char *line;
-    int nu = 0;
-    list *sections = make_list();
-    section *current = 0;
-    while((line=fgetl(file)) != 0){
-        ++ nu;
-        strip(line);
-        switch(line[0]){
-            case '[':
-                current = (section*)xmalloc(sizeof(section));
-                list_insert(sections, current);
-                current->options = make_list();
-                current->type = line;
-                break;
-            case '\0':
-            case '#':
-            case ';':
-                free(line);
-                break;
-            default:
-                if(!read_option(line, current->options)){
-                    fprintf(stderr, "Config file error line %d, could parse: %s\n", nu, line);
-                    free(line);
-                }
-                break;
-        }
-    }
-    fclose(file);
-    return sections;
-}
-
-void save_convolutional_weights_binary(layer l, FILE *fp)
-{
-#ifdef GPU
-    if(gpu_index >= 0){
-        pull_convolutional_layer(l);
-    }
-#endif
-    int size = (l.c/l.groups)*l.size*l.size;
-    binarize_weights(l.weights, l.n, size, l.binary_weights);
-    int i, j, k;
-    fwrite(l.biases, sizeof(float), l.n, fp);
-    if (l.batch_normalize){
-        fwrite(l.scales, sizeof(float), l.n, fp);
-        fwrite(l.rolling_mean, sizeof(float), l.n, fp);
-        fwrite(l.rolling_variance, sizeof(float), l.n, fp);
-    }
-    for(i = 0; i < l.n; ++i){
-        float mean = l.binary_weights[i*size];
-        if(mean < 0) mean = -mean;
-        fwrite(&mean, sizeof(float), 1, fp);
-        for(j = 0; j < size/8; ++j){
-            int index = i*size + j*8;
-            unsigned char c = 0;
-            for(k = 0; k < 8; ++k){
-                if (j*8 + k >= size) break;
-                if (l.binary_weights[index + k] > 0) c = (c | 1<<k);
-            }
-            fwrite(&c, sizeof(char), 1, fp);
-        }
-    }
-}
-
-void save_shortcut_weights(layer l, FILE *fp)
-{
-#ifdef GPU
-    if (gpu_index >= 0) {
-        pull_shortcut_layer(l);
-        printf("\n pull_shortcut_layer \n");
-    }
-#endif
-    int i;
-    for (i = 0; i < l.nweights; ++i) printf(" %f, ", l.weight_updates[i]);
-    printf(" l.nweights = %d - update \n", l.nweights);
-    for (i = 0; i < l.nweights; ++i) printf(" %f, ", l.weights[i]);
-    printf(" l.nweights = %d \n\n", l.nweights);
-
-    int num = l.nweights;
-    fwrite(l.weights, sizeof(float), num, fp);
-}
-
-void save_convolutional_weights(layer l, FILE *fp)
-{
-    if(l.binary){
-        //save_convolutional_weights_binary(l, fp);
-        //return;
-    }
-#ifdef GPU
-    if(gpu_index >= 0){
-        pull_convolutional_layer(l);
-    }
-#endif
-    int num = l.nweights;
-    fwrite(l.biases, sizeof(float), l.n, fp);
-    if (l.batch_normalize){
-        fwrite(l.scales, sizeof(float), l.n, fp);
-        fwrite(l.rolling_mean, sizeof(float), l.n, fp);
-        fwrite(l.rolling_variance, sizeof(float), l.n, fp);
-    }
-    fwrite(l.weights, sizeof(float), num, fp);
-    //if(l.adam){
-    //    fwrite(l.m, sizeof(float), num, fp);
-    //    fwrite(l.v, sizeof(float), num, fp);
-    //}
-}
-
-void save_batchnorm_weights(layer l, FILE *fp)
-{
-#ifdef GPU
-    if(gpu_index >= 0){
-        pull_batchnorm_layer(l);
-    }
-#endif
-    fwrite(l.biases, sizeof(float), l.c, fp);
-    fwrite(l.scales, sizeof(float), l.c, fp);
-    fwrite(l.rolling_mean, sizeof(float), l.c, fp);
-    fwrite(l.rolling_variance, sizeof(float), l.c, fp);
-}
-
-void save_connected_weights(layer l, FILE *fp)
-{
-#ifdef GPU
-    if(gpu_index >= 0){
-        pull_connected_layer(l);
-    }
-#endif
-    fwrite(l.biases, sizeof(float), l.outputs, fp);
-    fwrite(l.weights, sizeof(float), l.outputs*l.inputs, fp);
-    if (l.batch_normalize){
-        fwrite(l.scales, sizeof(float), l.outputs, fp);
-        fwrite(l.rolling_mean, sizeof(float), l.outputs, fp);
-        fwrite(l.rolling_variance, sizeof(float), l.outputs, fp);
-    }
-}
-
-void save_weights_upto(network net, char *filename, int cutoff)
-{
-#ifdef GPU
-    if(net.gpu_index >= 0){
-        cuda_set_device(net.gpu_index);
-    }
-#endif
-    fprintf(stderr, "Saving weights to %s\n", filename);
-    FILE *fp = fopen(filename, "wb");
-    if(!fp) file_error(filename);
-
-    int major = MAJOR_VERSION;
-    int minor = MINOR_VERSION;
-    int revision = PATCH_VERSION;
-    fwrite(&major, sizeof(int), 1, fp);
-    fwrite(&minor, sizeof(int), 1, fp);
-    fwrite(&revision, sizeof(int), 1, fp);
-    (*net.seen) = get_current_iteration(net) * net.batch * net.subdivisions; // remove this line, when you will save to weights-file both: seen & cur_iteration
-    fwrite(net.seen, sizeof(uint64_t), 1, fp);
-
-    int i;
-    for(i = 0; i < net.n && i < cutoff; ++i){
-        layer l = net.layers[i];
-        if (l.type == CONVOLUTIONAL && l.share_layer == NULL) {
-            save_convolutional_weights(l, fp);
-        } if (l.type == SHORTCUT && l.nweights > 0) {
-            save_shortcut_weights(l, fp);
-        } if(l.type == CONNECTED){
-            save_connected_weights(l, fp);
-        } if(l.type == BATCHNORM){
-            save_batchnorm_weights(l, fp);
-        } if(l.type == RNN){
-            save_connected_weights(*(l.input_layer), fp);
-            save_connected_weights(*(l.self_layer), fp);
-            save_connected_weights(*(l.output_layer), fp);
-        } if(l.type == GRU){
-            save_connected_weights(*(l.input_z_layer), fp);
-            save_connected_weights(*(l.input_r_layer), fp);
-            save_connected_weights(*(l.input_h_layer), fp);
-            save_connected_weights(*(l.state_z_layer), fp);
-            save_connected_weights(*(l.state_r_layer), fp);
-            save_connected_weights(*(l.state_h_layer), fp);
-        } if(l.type == LSTM){
-            save_connected_weights(*(l.wf), fp);
-            save_connected_weights(*(l.wi), fp);
-            save_connected_weights(*(l.wg), fp);
-            save_connected_weights(*(l.wo), fp);
-            save_connected_weights(*(l.uf), fp);
-            save_connected_weights(*(l.ui), fp);
-            save_connected_weights(*(l.ug), fp);
-            save_connected_weights(*(l.uo), fp);
-        } if (l.type == CONV_LSTM) {
-            if (l.peephole) {
-                save_convolutional_weights(*(l.vf), fp);
-                save_convolutional_weights(*(l.vi), fp);
-                save_convolutional_weights(*(l.vo), fp);
-            }
-            save_convolutional_weights(*(l.wf), fp);
-            save_convolutional_weights(*(l.wi), fp);
-            save_convolutional_weights(*(l.wg), fp);
-            save_convolutional_weights(*(l.wo), fp);
-            save_convolutional_weights(*(l.uf), fp);
-            save_convolutional_weights(*(l.ui), fp);
-            save_convolutional_weights(*(l.ug), fp);
-            save_convolutional_weights(*(l.uo), fp);
-        } if(l.type == CRNN){
-            save_convolutional_weights(*(l.input_layer), fp);
-            save_convolutional_weights(*(l.self_layer), fp);
-            save_convolutional_weights(*(l.output_layer), fp);
-        } if(l.type == LOCAL){
-#ifdef GPU
-            if(gpu_index >= 0){
-                pull_local_layer(l);
-            }
-#endif
-            int locations = l.out_w*l.out_h;
-            int size = l.size*l.size*l.c*l.n*locations;
-            fwrite(l.biases, sizeof(float), l.outputs, fp);
-            fwrite(l.weights, sizeof(float), size, fp);
-        }
-    }
-    fclose(fp);
-}
-void save_weights(network net, char *filename)
-{
-    save_weights_upto(net, filename, net.n);
-}
-
-void transpose_matrix(float *a, int rows, int cols)
-{
-    float* transpose = (float*)xcalloc(rows * cols, sizeof(float));
-    int x, y;
-    for(x = 0; x < rows; ++x){
-        for(y = 0; y < cols; ++y){
-            transpose[y*rows + x] = a[x*cols + y];
-        }
-    }
-    memcpy(a, transpose, rows*cols*sizeof(float));
-    free(transpose);
-}
-
-void load_connected_weights(layer l, FILE *fp, int transpose)
-{
-    fread(l.biases, sizeof(float), l.outputs, fp);
-    fread(l.weights, sizeof(float), l.outputs*l.inputs, fp);
-    if(transpose){
-        transpose_matrix(l.weights, l.inputs, l.outputs);
-    }
-    //printf("Biases: %f mean %f variance\n", mean_array(l.biases, l.outputs), variance_array(l.biases, l.outputs));
-    //printf("Weights: %f mean %f variance\n", mean_array(l.weights, l.outputs*l.inputs), variance_array(l.weights, l.outputs*l.inputs));
-    if (l.batch_normalize && (!l.dontloadscales)){
-        fread(l.scales, sizeof(float), l.outputs, fp);
-        fread(l.rolling_mean, sizeof(float), l.outputs, fp);
-        fread(l.rolling_variance, sizeof(float), l.outputs, fp);
-        //printf("Scales: %f mean %f variance\n", mean_array(l.scales, l.outputs), variance_array(l.scales, l.outputs));
-        //printf("rolling_mean: %f mean %f variance\n", mean_array(l.rolling_mean, l.outputs), variance_array(l.rolling_mean, l.outputs));
-        //printf("rolling_variance: %f mean %f variance\n", mean_array(l.rolling_variance, l.outputs), variance_array(l.rolling_variance, l.outputs));
-    }
-#ifdef GPU
-    if(gpu_index >= 0){
-        push_connected_layer(l);
-    }
-#endif
-}
-
-void load_batchnorm_weights(layer l, FILE *fp)
-{
-    fread(l.biases, sizeof(float), l.c, fp);
-    fread(l.scales, sizeof(float), l.c, fp);
-    fread(l.rolling_mean, sizeof(float), l.c, fp);
-    fread(l.rolling_variance, sizeof(float), l.c, fp);
-#ifdef GPU
-    if(gpu_index >= 0){
-        push_batchnorm_layer(l);
-    }
-#endif
-}
-
-void load_convolutional_weights_binary(layer l, FILE *fp)
-{
-    fread(l.biases, sizeof(float), l.n, fp);
-    if (l.batch_normalize && (!l.dontloadscales)){
-        fread(l.scales, sizeof(float), l.n, fp);
-        fread(l.rolling_mean, sizeof(float), l.n, fp);
-        fread(l.rolling_variance, sizeof(float), l.n, fp);
-    }
-    int size = (l.c / l.groups)*l.size*l.size;
-    int i, j, k;
-    for(i = 0; i < l.n; ++i){
-        float mean = 0;
-        fread(&mean, sizeof(float), 1, fp);
-        for(j = 0; j < size/8; ++j){
-            int index = i*size + j*8;
-            unsigned char c = 0;
-            fread(&c, sizeof(char), 1, fp);
-            for(k = 0; k < 8; ++k){
-                if (j*8 + k >= size) break;
-                l.weights[index + k] = (c & 1<<k) ? mean : -mean;
-            }
-        }
-    }
-#ifdef GPU
-    if(gpu_index >= 0){
-        push_convolutional_layer(l);
-    }
-#endif
-}
-
-void load_convolutional_weights(layer l, FILE *fp)
-{
-    if(l.binary){
-        //load_convolutional_weights_binary(l, fp);
-        //return;
-    }
-    int num = l.nweights;
-    int read_bytes;
-    read_bytes = fread(l.biases, sizeof(float), l.n, fp);
-    if (read_bytes > 0 && read_bytes < l.n) printf("\n Warning: Unexpected end of wights-file! l.biases - l.index = %d \n", l.index);
-    //fread(l.weights, sizeof(float), num, fp); // as in connected layer
-    if (l.batch_normalize && (!l.dontloadscales)){
-        read_bytes = fread(l.scales, sizeof(float), l.n, fp);
-        if (read_bytes > 0 && read_bytes < l.n) printf("\n Warning: Unexpected end of wights-file! l.scales - l.index = %d \n", l.index);
-        read_bytes = fread(l.rolling_mean, sizeof(float), l.n, fp);
-        if (read_bytes > 0 && read_bytes < l.n) printf("\n Warning: Unexpected end of wights-file! l.rolling_mean - l.index = %d \n", l.index);
-        read_bytes = fread(l.rolling_variance, sizeof(float), l.n, fp);
-        if (read_bytes > 0 && read_bytes < l.n) printf("\n Warning: Unexpected end of wights-file! l.rolling_variance - l.index = %d \n", l.index);
-        if(0){
-            int i;
-            for(i = 0; i < l.n; ++i){
-                printf("%g, ", l.rolling_mean[i]);
-            }
-            printf("\n");
-            for(i = 0; i < l.n; ++i){
-                printf("%g, ", l.rolling_variance[i]);
-            }
-            printf("\n");
-        }
-        if(0){
-            fill_cpu(l.n, 0, l.rolling_mean, 1);
-            fill_cpu(l.n, 0, l.rolling_variance, 1);
-        }
-    }
-    read_bytes = fread(l.weights, sizeof(float), num, fp);
-    if (read_bytes > 0 && read_bytes < l.n) printf("\n Warning: Unexpected end of wights-file! l.weights - l.index = %d \n", l.index);
-    //if(l.adam){
-    //    fread(l.m, sizeof(float), num, fp);
-    //    fread(l.v, sizeof(float), num, fp);
-    //}
-    //if(l.c == 3) scal_cpu(num, 1./256, l.weights, 1);
-    if (l.flipped) {
-        transpose_matrix(l.weights, (l.c/l.groups)*l.size*l.size, l.n);
-    }
-    //if (l.binary) binarize_weights(l.weights, l.n, (l.c/l.groups)*l.size*l.size, l.weights);
-#ifdef GPU
-    if(gpu_index >= 0){
-        push_convolutional_layer(l);
-    }
-#endif
-}
-
-void load_shortcut_weights(layer l, FILE *fp)
-{
-    int num = l.nweights;
-    int read_bytes;
-    read_bytes = fread(l.weights, sizeof(float), num, fp);
-    if (read_bytes > 0 && read_bytes < num) printf("\n Warning: Unexpected end of wights-file! l.weights - l.index = %d \n", l.index);
-    //for (int i = 0; i < l.nweights; ++i) printf(" %f, ", l.weights[i]);
-    //printf(" read_bytes = %d \n\n", read_bytes);
-#ifdef GPU
-    if (gpu_index >= 0) {
-        push_shortcut_layer(l);
-    }
-#endif
-}
-
-void load_weights_upto(network *net, char *filename, int cutoff)
-{
-#ifdef GPU
-    if(net->gpu_index >= 0){
-        cuda_set_device(net->gpu_index);
-    }
-#endif
-    fprintf(stderr, "Loading weights from %s...", filename);
-    fflush(stdout);
-    FILE *fp = fopen(filename, "rb");
-    if(!fp) file_error(filename);
-
-    int major;
-    int minor;
-    int revision;
-    fread(&major, sizeof(int), 1, fp);
-    fread(&minor, sizeof(int), 1, fp);
-    fread(&revision, sizeof(int), 1, fp);
-    if ((major * 10 + minor) >= 2) {
-        printf("\n seen 64");
-        uint64_t iseen = 0;
-        fread(&iseen, sizeof(uint64_t), 1, fp);
-        *net->seen = iseen;
-    }
-    else {
-        printf("\n seen 32");
-        uint32_t iseen = 0;
-        fread(&iseen, sizeof(uint32_t), 1, fp);
-        *net->seen = iseen;
-    }
-    *net->cur_iteration = get_current_batch(*net);
-    printf(", trained: %.0f K-images (%.0f Kilo-batches_64) \n", (float)(*net->seen / 1000), (float)(*net->seen / 64000));
-    int transpose = (major > 1000) || (minor > 1000);
-
-    int i;
-    for(i = 0; i < net->n && i < cutoff; ++i){
-        layer l = net->layers[i];
-        if (l.dontload) continue;
-        if(l.type == CONVOLUTIONAL && l.share_layer == NULL){
-            load_convolutional_weights(l, fp);
-        }
-        if (l.type == SHORTCUT && l.nweights > 0) {
-            load_shortcut_weights(l, fp);
-        }
-        if(l.type == CONNECTED){
-            load_connected_weights(l, fp, transpose);
-        }
-        if(l.type == BATCHNORM){
-            load_batchnorm_weights(l, fp);
-        }
-        if(l.type == CRNN){
-            load_convolutional_weights(*(l.input_layer), fp);
-            load_convolutional_weights(*(l.self_layer), fp);
-            load_convolutional_weights(*(l.output_layer), fp);
-        }
-        if(l.type == RNN){
-            load_connected_weights(*(l.input_layer), fp, transpose);
-            load_connected_weights(*(l.self_layer), fp, transpose);
-            load_connected_weights(*(l.output_layer), fp, transpose);
-        }
-        if(l.type == GRU){
-            load_connected_weights(*(l.input_z_layer), fp, transpose);
-            load_connected_weights(*(l.input_r_layer), fp, transpose);
-            load_connected_weights(*(l.input_h_layer), fp, transpose);
-            load_connected_weights(*(l.state_z_layer), fp, transpose);
-            load_connected_weights(*(l.state_r_layer), fp, transpose);
-            load_connected_weights(*(l.state_h_layer), fp, transpose);
-        }
-        if(l.type == LSTM){
-            load_connected_weights(*(l.wf), fp, transpose);
-            load_connected_weights(*(l.wi), fp, transpose);
-            load_connected_weights(*(l.wg), fp, transpose);
-            load_connected_weights(*(l.wo), fp, transpose);
-            load_connected_weights(*(l.uf), fp, transpose);
-            load_connected_weights(*(l.ui), fp, transpose);
-            load_connected_weights(*(l.ug), fp, transpose);
-            load_connected_weights(*(l.uo), fp, transpose);
-        }
-        if (l.type == CONV_LSTM) {
-            if (l.peephole) {
-                load_convolutional_weights(*(l.vf), fp);
-                load_convolutional_weights(*(l.vi), fp);
-                load_convolutional_weights(*(l.vo), fp);
-            }
-            load_convolutional_weights(*(l.wf), fp);
-            load_convolutional_weights(*(l.wi), fp);
-            load_convolutional_weights(*(l.wg), fp);
-            load_convolutional_weights(*(l.wo), fp);
-            load_convolutional_weights(*(l.uf), fp);
-            load_convolutional_weights(*(l.ui), fp);
-            load_convolutional_weights(*(l.ug), fp);
-            load_convolutional_weights(*(l.uo), fp);
-        }
-        if(l.type == LOCAL){
-            int locations = l.out_w*l.out_h;
-            int size = l.size*l.size*l.c*l.n*locations;
-            fread(l.biases, sizeof(float), l.outputs, fp);
-            fread(l.weights, sizeof(float), size, fp);
-#ifdef GPU
-            if(gpu_index >= 0){
-                push_local_layer(l);
-            }
-#endif
-        }
-        if (feof(fp)) break;
-    }
-    fprintf(stderr, "Done! Loaded %d layers from weights-file \n", i);
-    fclose(fp);
-}
-
-void load_weights(network *net, char *filename)
-{
-    load_weights_upto(net, filename, net->n);
-}
-
-// load network & force - set batch size
-network *load_network_custom(char *cfg, char *weights, int clear, int batch)
-{
-    printf(" Try to load cfg: %s, weights: %s, clear = %d \n", cfg, weights, clear);
-    network* net = (network*)xcalloc(1, sizeof(network));
-    *net = parse_network_cfg_custom(cfg, batch, 1);
-    if (weights && weights[0] != 0) {
-        printf(" Try to load weights: %s \n", weights);
-        load_weights(net, weights);
-    }
-    fuse_conv_batchnorm(*net);
-    if (clear) {
-        (*net->seen) = 0;
-        (*net->cur_iteration) = 0;
-    }
-    return net;
-}
-
-// load network & get batch size from cfg-file
-network *load_network(char *cfg, char *weights, int clear)
-{
-    printf(" Try to load cfg: %s, clear = %d \n", cfg, clear);
-    network* net = (network*)xcalloc(1, sizeof(network));
-    *net = parse_network_cfg(cfg);
-    if (weights && weights[0] != 0) {
-        printf(" Try to load weights: %s \n", weights);
-        load_weights(net, weights);
-    }
-    if (clear) {
-        (*net->seen) = 0;
-        (*net->cur_iteration) = 0;
-    }
-    return net;
-}
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdint.h>
+
+#include "activation_layer.h"
+#include "activations.h"
+#include "assert.h"
+#include "avgpool_layer.h"
+#include "batchnorm_layer.h"
+#include "blas.h"
+#include "connected_layer.h"
+#include "convolutional_layer.h"
+#include "cost_layer.h"
+#include "crnn_layer.h"
+#include "crop_layer.h"
+#include "detection_layer.h"
+#include "dropout_layer.h"
+#include "gru_layer.h"
+#include "list.h"
+#include "local_layer.h"
+#include "lstm_layer.h"
+#include "conv_lstm_layer.h"
+#include "maxpool_layer.h"
+#include "normalization_layer.h"
+#include "option_list.h"
+#include "parser.h"
+#include "region_layer.h"
+#include "reorg_layer.h"
+#include "reorg_old_layer.h"
+#include "rnn_layer.h"
+#include "route_layer.h"
+#include "shortcut_layer.h"
+#include "scale_channels_layer.h"
+#include "sam_layer.h"
+#include "softmax_layer.h"
+#include "utils.h"
+#include "upsample_layer.h"
+#include "version.h"
+#include "yolo_layer.h"
+#include "gaussian_yolo_layer.h"
+#include "representation_layer.h"
+
+void empty_func(dropout_layer l, network_state state) {
+    //l.output_gpu = state.input;
+}
+
+typedef struct{
+    char *type;
+    list *options;
+}section;
+
+list *read_cfg(char *filename);
+
+LAYER_TYPE string_to_layer_type(char * type)
+{
+
+    if (strcmp(type, "[shortcut]")==0) return SHORTCUT;
+    if (strcmp(type, "[scale_channels]") == 0) return SCALE_CHANNELS;
+    if (strcmp(type, "[sam]") == 0) return SAM;
+    if (strcmp(type, "[crop]")==0) return CROP;
+    if (strcmp(type, "[cost]")==0) return COST;
+    if (strcmp(type, "[detection]")==0) return DETECTION;
+    if (strcmp(type, "[region]")==0) return REGION;
+    if (strcmp(type, "[yolo]") == 0) return YOLO;
+    if (strcmp(type, "[Gaussian_yolo]") == 0) return GAUSSIAN_YOLO;
+    if (strcmp(type, "[local]")==0) return LOCAL;
+    if (strcmp(type, "[conv]")==0
+            || strcmp(type, "[convolutional]")==0) return CONVOLUTIONAL;
+    if (strcmp(type, "[activation]")==0) return ACTIVE;
+    if (strcmp(type, "[net]")==0
+            || strcmp(type, "[network]")==0) return NETWORK;
+    if (strcmp(type, "[crnn]")==0) return CRNN;
+    if (strcmp(type, "[gru]")==0) return GRU;
+    if (strcmp(type, "[lstm]")==0) return LSTM;
+    if (strcmp(type, "[conv_lstm]") == 0) return CONV_LSTM;
+    if (strcmp(type, "[history]") == 0) return HISTORY;
+    if (strcmp(type, "[rnn]")==0) return RNN;
+    if (strcmp(type, "[conn]")==0
+            || strcmp(type, "[connected]")==0) return CONNECTED;
+    if (strcmp(type, "[max]")==0
+            || strcmp(type, "[maxpool]")==0) return MAXPOOL;
+    if (strcmp(type, "[local_avg]") == 0
+        || strcmp(type, "[local_avgpool]") == 0) return LOCAL_AVGPOOL;
+    if (strcmp(type, "[reorg3d]")==0) return REORG;
+    if (strcmp(type, "[reorg]") == 0) return REORG_OLD;
+    if (strcmp(type, "[avg]")==0
+            || strcmp(type, "[avgpool]")==0) return AVGPOOL;
+    if (strcmp(type, "[dropout]")==0) return DROPOUT;
+    if (strcmp(type, "[lrn]")==0
+            || strcmp(type, "[normalization]")==0) return NORMALIZATION;
+    if (strcmp(type, "[batchnorm]")==0) return BATCHNORM;
+    if (strcmp(type, "[soft]")==0
+            || strcmp(type, "[softmax]")==0) return SOFTMAX;
+    if (strcmp(type, "[contrastive]") == 0) return CONTRASTIVE;
+    if (strcmp(type, "[route]")==0) return ROUTE;
+    if (strcmp(type, "[upsample]") == 0) return UPSAMPLE;
+    if (strcmp(type, "[empty]") == 0
+        || strcmp(type, "[silence]") == 0) return EMPTY;
+    if (strcmp(type, "[implicit]") == 0) return IMPLICIT;
+    return BLANK;
+}
+
+void free_section(section *s)
+{
+    free(s->type);
+    node *n = s->options->front;
+    while(n){
+        kvp *pair = (kvp *)n->val;
+        free(pair->key);
+        free(pair);
+        node *next = n->next;
+        free(n);
+        n = next;
+    }
+    free(s->options);
+    free(s);
+}
+
+void parse_data(char *data, float *a, int n)
+{
+    int i;
+    if(!data) return;
+    char *curr = data;
+    char *next = data;
+    int done = 0;
+    for(i = 0; i < n && !done; ++i){
+        while(*++next !='\0' && *next != ',');
+        if(*next == '\0') done = 1;
+        *next = '\0';
+        sscanf(curr, "%g", &a[i]);
+        curr = next+1;
+    }
+}
+
+typedef struct size_params{
+    int batch;
+    int inputs;
+    int h;
+    int w;
+    int c;
+    int index;
+    int time_steps;
+    int train;
+    network net;
+} size_params;
+
+local_layer parse_local(list *options, size_params params)
+{
+    int n = option_find_int(options, "filters",1);
+    int size = option_find_int(options, "size",1);
+    int stride = option_find_int(options, "stride",1);
+    int pad = option_find_int(options, "pad",0);
+    char *activation_s = option_find_str(options, "activation", "logistic");
+    ACTIVATION activation = get_activation(activation_s);
+
+    int batch,h,w,c;
+    h = params.h;
+    w = params.w;
+    c = params.c;
+    batch=params.batch;
+    if(!(h && w && c)) error("Layer before local layer must output image.");
+
+    local_layer layer = make_local_layer(batch,h,w,c,n,size,stride,pad,activation);
+
+    return layer;
+}
+
+convolutional_layer parse_convolutional(list *options, size_params params)
+{
+    int n = option_find_int(options, "filters",1);
+    int groups = option_find_int_quiet(options, "groups", 1);
+    int size = option_find_int(options, "size",1);
+    int stride = -1;
+    //int stride = option_find_int(options, "stride",1);
+    int stride_x = option_find_int_quiet(options, "stride_x", -1);
+    int stride_y = option_find_int_quiet(options, "stride_y", -1);
+    if (stride_x < 1 || stride_y < 1) {
+        stride = option_find_int(options, "stride", 1);
+        if (stride_x < 1) stride_x = stride;
+        if (stride_y < 1) stride_y = stride;
+    }
+    else {
+        stride = option_find_int_quiet(options, "stride", 1);
+    }
+    int dilation = option_find_int_quiet(options, "dilation", 1);
+    int antialiasing = option_find_int_quiet(options, "antialiasing", 0);
+    if (size == 1) dilation = 1;
+    int pad = option_find_int_quiet(options, "pad",0);
+    int padding = option_find_int_quiet(options, "padding",0);
+    if(pad) padding = size/2;
+
+    char *activation_s = option_find_str(options, "activation", "logistic");
+    ACTIVATION activation = get_activation(activation_s);
+
+    int assisted_excitation = option_find_float_quiet(options, "assisted_excitation", 0);
+
+    int share_index = option_find_int_quiet(options, "share_index", -1000000000);
+    convolutional_layer *share_layer = NULL;
+    if(share_index >= 0) share_layer = &params.net.layers[share_index];
+    else if(share_index != -1000000000) share_layer = &params.net.layers[params.index + share_index];
+
+    int batch,h,w,c;
+    h = params.h;
+    w = params.w;
+    c = params.c;
+    batch=params.batch;
+    if(!(h && w && c)) error("Layer before convolutional layer must output image.");
+    int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0);
+    int cbn = option_find_int_quiet(options, "cbn", 0);
+    if (cbn) batch_normalize = 2;
+    int binary = option_find_int_quiet(options, "binary", 0);
+    int xnor = option_find_int_quiet(options, "xnor", 0);
+    int use_bin_output = option_find_int_quiet(options, "bin_output", 0);
+    int sway = option_find_int_quiet(options, "sway", 0);
+    int rotate = option_find_int_quiet(options, "rotate", 0);
+    int stretch = option_find_int_quiet(options, "stretch", 0);
+    int stretch_sway = option_find_int_quiet(options, "stretch_sway", 0);
+    if ((sway + rotate + stretch + stretch_sway) > 1) {
+        printf(" Error: should be used only 1 param: sway=1, rotate=1 or stretch=1 in the [convolutional] layer \n");
+        exit(0);
+    }
+    int deform = sway || rotate || stretch || stretch_sway;
+    if (deform && size == 1) {
+        printf(" Error: params (sway=1, rotate=1 or stretch=1) should be used only with size >=3 in the [convolutional] layer \n");
+        exit(0);
+    }
+
+    convolutional_layer layer = make_convolutional_layer(batch,1,h,w,c,n,groups,size,stride_x,stride_y,dilation,padding,activation, batch_normalize, binary, xnor, params.net.adam, use_bin_output, params.index, antialiasing, share_layer, assisted_excitation, deform, params.train);
+    layer.flipped = option_find_int_quiet(options, "flipped", 0);
+    layer.dot = option_find_float_quiet(options, "dot", 0);
+    layer.sway = sway;
+    layer.rotate = rotate;
+    layer.stretch = stretch;
+    layer.stretch_sway = stretch_sway;
+    layer.angle = option_find_float_quiet(options, "angle", 15);
+    layer.grad_centr = option_find_int_quiet(options, "grad_centr", 0);
+    layer.reverse = option_find_float_quiet(options, "reverse", 0);
+    layer.coordconv = option_find_int_quiet(options, "coordconv", 0);
+
+    layer.stream = option_find_int_quiet(options, "stream", -1);
+    layer.wait_stream_id = option_find_int_quiet(options, "wait_stream", -1);
+
+    if(params.net.adam){
+        layer.B1 = params.net.B1;
+        layer.B2 = params.net.B2;
+        layer.eps = params.net.eps;
+    }
+
+    return layer;
+}
+
+layer parse_crnn(list *options, size_params params)
+{
+    int size = option_find_int_quiet(options, "size", 3);
+    int stride = option_find_int_quiet(options, "stride", 1);
+    int dilation = option_find_int_quiet(options, "dilation", 1);
+    int pad = option_find_int_quiet(options, "pad", 0);
+    int padding = option_find_int_quiet(options, "padding", 0);
+    if (pad) padding = size / 2;
+
+    int output_filters = option_find_int(options, "output",1);
+    int hidden_filters = option_find_int(options, "hidden",1);
+    int groups = option_find_int_quiet(options, "groups", 1);
+    char *activation_s = option_find_str(options, "activation", "logistic");
+    ACTIVATION activation = get_activation(activation_s);
+    int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0);
+    int xnor = option_find_int_quiet(options, "xnor", 0);
+
+    layer l = make_crnn_layer(params.batch, params.h, params.w, params.c, hidden_filters, output_filters, groups, params.time_steps, size, stride, dilation, padding, activation, batch_normalize, xnor, params.train);
+
+    l.shortcut = option_find_int_quiet(options, "shortcut", 0);
+
+    return l;
+}
+
+layer parse_rnn(list *options, size_params params)
+{
+    int output = option_find_int(options, "output",1);
+    int hidden = option_find_int(options, "hidden",1);
+    char *activation_s = option_find_str(options, "activation", "logistic");
+    ACTIVATION activation = get_activation(activation_s);
+    int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0);
+    int logistic = option_find_int_quiet(options, "logistic", 0);
+
+    layer l = make_rnn_layer(params.batch, params.inputs, hidden, output, params.time_steps, activation, batch_normalize, logistic);
+
+    l.shortcut = option_find_int_quiet(options, "shortcut", 0);
+
+    return l;
+}
+
+layer parse_gru(list *options, size_params params)
+{
+    int output = option_find_int(options, "output",1);
+    int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0);
+
+    layer l = make_gru_layer(params.batch, params.inputs, output, params.time_steps, batch_normalize);
+
+    return l;
+}
+
+layer parse_lstm(list *options, size_params params)
+{
+    int output = option_find_int(options, "output",1);
+    int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0);
+
+    layer l = make_lstm_layer(params.batch, params.inputs, output, params.time_steps, batch_normalize);
+
+    return l;
+}
+
+layer parse_conv_lstm(list *options, size_params params)
+{
+    // a ConvLSTM with a larger transitional kernel should be able to capture faster motions
+    int size = option_find_int_quiet(options, "size", 3);
+    int stride = option_find_int_quiet(options, "stride", 1);
+    int dilation = option_find_int_quiet(options, "dilation", 1);
+    int pad = option_find_int_quiet(options, "pad", 0);
+    int padding = option_find_int_quiet(options, "padding", 0);
+    if (pad) padding = size / 2;
+
+    int output_filters = option_find_int(options, "output", 1);
+    int groups = option_find_int_quiet(options, "groups", 1);
+    char *activation_s = option_find_str(options, "activation", "linear");
+    ACTIVATION activation = get_activation(activation_s);
+    int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0);
+    int xnor = option_find_int_quiet(options, "xnor", 0);
+    int peephole = option_find_int_quiet(options, "peephole", 0);
+    int bottleneck = option_find_int_quiet(options, "bottleneck", 0);
+
+    layer l = make_conv_lstm_layer(params.batch, params.h, params.w, params.c, output_filters, groups, params.time_steps, size, stride, dilation, padding, activation, batch_normalize, peephole, xnor, bottleneck, params.train);
+
+    l.state_constrain = option_find_int_quiet(options, "state_constrain", params.time_steps * 32);
+    l.shortcut = option_find_int_quiet(options, "shortcut", 0);
+
+    char *lstm_activation_s = option_find_str(options, "lstm_activation", "tanh");
+    l.lstm_activation = get_activation(lstm_activation_s);
+    l.time_normalizer = option_find_float_quiet(options, "time_normalizer", 1.0);
+
+    return l;
+}
+
+layer parse_history(list *options, size_params params)
+{
+    int history_size = option_find_int(options, "history_size", 4);
+    layer l = make_history_layer(params.batch, params.h, params.w, params.c, history_size, params.time_steps, params.train);
+    return l;
+}
+
+connected_layer parse_connected(list *options, size_params params)
+{
+    int output = option_find_int(options, "output",1);
+    char *activation_s = option_find_str(options, "activation", "logistic");
+    ACTIVATION activation = get_activation(activation_s);
+    int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0);
+
+    connected_layer layer = make_connected_layer(params.batch, 1, params.inputs, output, activation, batch_normalize);
+
+    return layer;
+}
+
+softmax_layer parse_softmax(list *options, size_params params)
+{
+	int groups = option_find_int_quiet(options, "groups", 1);
+	softmax_layer layer = make_softmax_layer(params.batch, params.inputs, groups);
+	layer.temperature = option_find_float_quiet(options, "temperature", 1);
+	char *tree_file = option_find_str(options, "tree", 0);
+	if (tree_file) layer.softmax_tree = read_tree(tree_file);
+	layer.w = params.w;
+	layer.h = params.h;
+	layer.c = params.c;
+	layer.spatial = option_find_float_quiet(options, "spatial", 0);
+	layer.noloss = option_find_int_quiet(options, "noloss", 0);
+	return layer;
+}
+
+contrastive_layer parse_contrastive(list *options, size_params params)
+{
+    int classes = option_find_int(options, "classes", 1000);
+    layer *yolo_layer = NULL;
+    int yolo_layer_id = option_find_int_quiet(options, "yolo_layer", 0);
+    if (yolo_layer_id < 0) yolo_layer_id = params.index + yolo_layer_id;
+    if(yolo_layer_id != 0) yolo_layer = params.net.layers + yolo_layer_id;
+    if (yolo_layer->type != YOLO) {
+        printf(" Error: [contrastive] layer should point to the [yolo] layer instead of %d layer! \n", yolo_layer_id);
+        getchar();
+        exit(0);
+    }
+
+    contrastive_layer layer = make_contrastive_layer(params.batch, params.w, params.h, params.c, classes, params.inputs, yolo_layer);
+    layer.temperature = option_find_float_quiet(options, "temperature", 1);
+    layer.steps = params.time_steps;
+    layer.cls_normalizer = option_find_float_quiet(options, "cls_normalizer", 1);
+    layer.max_delta = option_find_float_quiet(options, "max_delta", FLT_MAX);   // set 10
+    layer.contrastive_neg_max = option_find_int_quiet(options, "contrastive_neg_max", 3);
+    return layer;
+}
+
+int *parse_yolo_mask(char *a, int *num)
+{
+    int *mask = 0;
+    if (a) {
+        int len = strlen(a);
+        int n = 1;
+        int i;
+        for (i = 0; i < len; ++i) {
+            if (a[i] == '#') break;
+            if (a[i] == ',') ++n;
+        }
+        mask = (int*)xcalloc(n, sizeof(int));
+        for (i = 0; i < n; ++i) {
+            int val = atoi(a);
+            mask[i] = val;
+            a = strchr(a, ',') + 1;
+        }
+        *num = n;
+    }
+    return mask;
+}
+
+float *get_classes_multipliers(char *cpc, const int classes, const float max_delta)
+{
+    float *classes_multipliers = NULL;
+    if (cpc) {
+        int classes_counters = classes;
+        int *counters_per_class = parse_yolo_mask(cpc, &classes_counters);
+        if (classes_counters != classes) {
+            printf(" number of values in counters_per_class = %d doesn't match with classes = %d \n", classes_counters, classes);
+            exit(0);
+        }
+        float max_counter = 0;
+        int i;
+        for (i = 0; i < classes_counters; ++i) {
+            if (counters_per_class[i] < 1) counters_per_class[i] = 1;
+            if (max_counter < counters_per_class[i]) max_counter = counters_per_class[i];
+        }
+        classes_multipliers = (float *)calloc(classes_counters, sizeof(float));
+        for (i = 0; i < classes_counters; ++i) {
+            classes_multipliers[i] = max_counter / counters_per_class[i];
+            if(classes_multipliers[i] > max_delta) classes_multipliers[i] = max_delta;
+        }
+        free(counters_per_class);
+        printf(" classes_multipliers: ");
+        for (i = 0; i < classes_counters; ++i) printf("%.1f, ", classes_multipliers[i]);
+        printf("\n");
+    }
+    return classes_multipliers;
+}
+
+layer parse_yolo(list *options, size_params params)
+{
+    int classes = option_find_int(options, "classes", 20);
+    int total = option_find_int(options, "num", 1);
+    int num = total;
+    char *a = option_find_str(options, "mask", 0);
+    int *mask = parse_yolo_mask(a, &num);
+    int max_boxes = option_find_int_quiet(options, "max", 200);
+    layer l = make_yolo_layer(params.batch, params.w, params.h, num, total, mask, classes, max_boxes);
+    if (l.outputs != params.inputs) {
+        printf("Error: l.outputs == params.inputs \n");
+        printf("filters= in the [convolutional]-layer doesn't correspond to classes= or mask= in [yolo]-layer \n");
+        exit(EXIT_FAILURE);
+    }
+    //assert(l.outputs == params.inputs);
+
+    l.show_details = option_find_int_quiet(options, "show_details", 1);
+    l.max_delta = option_find_float_quiet(options, "max_delta", FLT_MAX);   // set 10
+    char *cpc = option_find_str(options, "counters_per_class", 0);
+    l.classes_multipliers = get_classes_multipliers(cpc, classes, l.max_delta);
+
+    l.label_smooth_eps = option_find_float_quiet(options, "label_smooth_eps", 0.0f);
+    l.scale_x_y = option_find_float_quiet(options, "scale_x_y", 1);
+    l.objectness_smooth = option_find_int_quiet(options, "objectness_smooth", 0);
+    l.new_coords = option_find_int_quiet(options, "new_coords", 0);
+    l.iou_normalizer = option_find_float_quiet(options, "iou_normalizer", 0.75);
+    l.obj_normalizer = option_find_float_quiet(options, "obj_normalizer", 1);
+    l.cls_normalizer = option_find_float_quiet(options, "cls_normalizer", 1);
+    l.delta_normalizer = option_find_float_quiet(options, "delta_normalizer", 1);
+    char *iou_loss = option_find_str_quiet(options, "iou_loss", "mse");   //  "iou");
+
+    if (strcmp(iou_loss, "mse") == 0) l.iou_loss = MSE;
+    else if (strcmp(iou_loss, "giou") == 0) l.iou_loss = GIOU;
+    else if (strcmp(iou_loss, "diou") == 0) l.iou_loss = DIOU;
+    else if (strcmp(iou_loss, "ciou") == 0) l.iou_loss = CIOU;
+    else l.iou_loss = IOU;
+    fprintf(stderr, "[yolo] params: iou loss: %s (%d), iou_norm: %2.2f, obj_norm: %2.2f, cls_norm: %2.2f, delta_norm: %2.2f, scale_x_y: %2.2f\n",
+        iou_loss, l.iou_loss, l.iou_normalizer, l.obj_normalizer, l.cls_normalizer, l.delta_normalizer, l.scale_x_y);
+
+    char *iou_thresh_kind_str = option_find_str_quiet(options, "iou_thresh_kind", "iou");
+    if (strcmp(iou_thresh_kind_str, "iou") == 0) l.iou_thresh_kind = IOU;
+    else if (strcmp(iou_thresh_kind_str, "giou") == 0) l.iou_thresh_kind = GIOU;
+    else if (strcmp(iou_thresh_kind_str, "diou") == 0) l.iou_thresh_kind = DIOU;
+    else if (strcmp(iou_thresh_kind_str, "ciou") == 0) l.iou_thresh_kind = CIOU;
+    else {
+        fprintf(stderr, " Wrong iou_thresh_kind = %s \n", iou_thresh_kind_str);
+        l.iou_thresh_kind = IOU;
+    }
+
+    l.beta_nms = option_find_float_quiet(options, "beta_nms", 0.6);
+    char *nms_kind = option_find_str_quiet(options, "nms_kind", "default");
+    if (strcmp(nms_kind, "default") == 0) l.nms_kind = DEFAULT_NMS;
+    else {
+        if (strcmp(nms_kind, "greedynms") == 0) l.nms_kind = GREEDY_NMS;
+        else if (strcmp(nms_kind, "diounms") == 0) l.nms_kind = DIOU_NMS;
+        else l.nms_kind = DEFAULT_NMS;
+        printf("nms_kind: %s (%d), beta = %f \n", nms_kind, l.nms_kind, l.beta_nms);
+    }
+
+    l.jitter = option_find_float(options, "jitter", .2);
+    l.resize = option_find_float_quiet(options, "resize", 1.0);
+    l.focal_loss = option_find_int_quiet(options, "focal_loss", 0);
+
+    l.ignore_thresh = option_find_float(options, "ignore_thresh", .5);
+    l.truth_thresh = option_find_float(options, "truth_thresh", 1);
+    l.iou_thresh = option_find_float_quiet(options, "iou_thresh", 1); // recommended to use iou_thresh=0.213 in [yolo]
+    l.random = option_find_float_quiet(options, "random", 0);
+
+    l.track_history_size = option_find_int_quiet(options, "track_history_size", 5);
+    l.sim_thresh = option_find_int_quiet(options, "sim_thresh", 0.8);
+    l.dets_for_track = option_find_int_quiet(options, "dets_for_track", 1);
+    l.dets_for_show = option_find_int_quiet(options, "dets_for_show", 1);
+    l.track_ciou_norm = option_find_float_quiet(options, "track_ciou_norm", 0.01);
+    int embedding_layer_id = option_find_int_quiet(options, "embedding_layer", 999999);
+    if (embedding_layer_id < 0) embedding_layer_id = params.index + embedding_layer_id;
+    if (embedding_layer_id != 999999) {
+        printf(" embedding_layer_id = %d, ", embedding_layer_id);
+        layer le = params.net.layers[embedding_layer_id];
+        l.embedding_layer_id = embedding_layer_id;
+        l.embedding_output = (float*)xcalloc(le.batch * le.outputs, sizeof(float));
+        l.embedding_size = le.n / l.n;
+        printf(" embedding_size = %d \n", l.embedding_size);
+        if (le.n % l.n != 0) {
+            printf(" Warning: filters=%d number in embedding_layer=%d isn't divisable by number of anchors %d \n", le.n, embedding_layer_id, l.n);
+            getchar();
+        }
+    }
+
+    char *map_file = option_find_str(options, "map", 0);
+    if (map_file) l.map = read_map(map_file);
+
+    a = option_find_str(options, "anchors", 0);
+    if (a) {
+        int len = strlen(a);
+        int n = 1;
+        int i;
+        for (i = 0; i < len; ++i) {
+            if (a[i] == '#') break;
+            if (a[i] == ',') ++n;
+        }
+        for (i = 0; i < n && i < total*2; ++i) {
+            float bias = atof(a);
+            l.biases[i] = bias;
+            a = strchr(a, ',') + 1;
+        }
+    }
+    return l;
+}
+
+
+int *parse_gaussian_yolo_mask(char *a, int *num) // Gaussian_YOLOv3
+{
+    int *mask = 0;
+    if (a) {
+        int len = strlen(a);
+        int n = 1;
+        int i;
+        for (i = 0; i < len; ++i) {
+            if (a[i] == '#') break;
+            if (a[i] == ',') ++n;
+        }
+        mask = (int *)calloc(n, sizeof(int));
+        for (i = 0; i < n; ++i) {
+            int val = atoi(a);
+            mask[i] = val;
+            a = strchr(a, ',') + 1;
+        }
+        *num = n;
+    }
+    return mask;
+}
+
+
+layer parse_gaussian_yolo(list *options, size_params params) // Gaussian_YOLOv3
+{
+    int classes = option_find_int(options, "classes", 20);
+    int max_boxes = option_find_int_quiet(options, "max", 200);
+    int total = option_find_int(options, "num", 1);
+    int num = total;
+
+    char *a = option_find_str(options, "mask", 0);
+    int *mask = parse_gaussian_yolo_mask(a, &num);
+    layer l = make_gaussian_yolo_layer(params.batch, params.w, params.h, num, total, mask, classes, max_boxes);
+    if (l.outputs != params.inputs) {
+        printf("Error: l.outputs == params.inputs \n");
+        printf("filters= in the [convolutional]-layer doesn't correspond to classes= or mask= in [Gaussian_yolo]-layer \n");
+        exit(EXIT_FAILURE);
+    }
+    //assert(l.outputs == params.inputs);
+    l.max_delta = option_find_float_quiet(options, "max_delta", FLT_MAX);   // set 10
+    char *cpc = option_find_str(options, "counters_per_class", 0);
+    l.classes_multipliers = get_classes_multipliers(cpc, classes, l.max_delta);
+
+    l.label_smooth_eps = option_find_float_quiet(options, "label_smooth_eps", 0.0f);
+    l.scale_x_y = option_find_float_quiet(options, "scale_x_y", 1);
+    l.objectness_smooth = option_find_int_quiet(options, "objectness_smooth", 0);
+    l.uc_normalizer = option_find_float_quiet(options, "uc_normalizer", 1.0);
+    l.iou_normalizer = option_find_float_quiet(options, "iou_normalizer", 0.75);
+    l.obj_normalizer = option_find_float_quiet(options, "obj_normalizer", 1.0);
+    l.cls_normalizer = option_find_float_quiet(options, "cls_normalizer", 1);
+    l.delta_normalizer = option_find_float_quiet(options, "delta_normalizer", 1);
+    char *iou_loss = option_find_str_quiet(options, "iou_loss", "mse");   //  "iou");
+
+    if (strcmp(iou_loss, "mse") == 0) l.iou_loss = MSE;
+    else if (strcmp(iou_loss, "giou") == 0) l.iou_loss = GIOU;
+    else if (strcmp(iou_loss, "diou") == 0) l.iou_loss = DIOU;
+    else if (strcmp(iou_loss, "ciou") == 0) l.iou_loss = CIOU;
+    else l.iou_loss = IOU;
+
+    char *iou_thresh_kind_str = option_find_str_quiet(options, "iou_thresh_kind", "iou");
+    if (strcmp(iou_thresh_kind_str, "iou") == 0) l.iou_thresh_kind = IOU;
+    else if (strcmp(iou_thresh_kind_str, "giou") == 0) l.iou_thresh_kind = GIOU;
+    else if (strcmp(iou_thresh_kind_str, "diou") == 0) l.iou_thresh_kind = DIOU;
+    else if (strcmp(iou_thresh_kind_str, "ciou") == 0) l.iou_thresh_kind = CIOU;
+    else {
+        fprintf(stderr, " Wrong iou_thresh_kind = %s \n", iou_thresh_kind_str);
+        l.iou_thresh_kind = IOU;
+    }
+
+    l.beta_nms = option_find_float_quiet(options, "beta_nms", 0.6);
+    char *nms_kind = option_find_str_quiet(options, "nms_kind", "default");
+    if (strcmp(nms_kind, "default") == 0) l.nms_kind = DEFAULT_NMS;
+    else {
+        if (strcmp(nms_kind, "greedynms") == 0) l.nms_kind = GREEDY_NMS;
+        else if (strcmp(nms_kind, "diounms") == 0) l.nms_kind = DIOU_NMS;
+        else if (strcmp(nms_kind, "cornersnms") == 0) l.nms_kind = CORNERS_NMS;
+        else l.nms_kind = DEFAULT_NMS;
+        printf("nms_kind: %s (%d), beta = %f \n", nms_kind, l.nms_kind, l.beta_nms);
+    }
+
+    char *yolo_point = option_find_str_quiet(options, "yolo_point", "center");
+    if (strcmp(yolo_point, "left_top") == 0) l.yolo_point = YOLO_LEFT_TOP;
+    else if (strcmp(yolo_point, "right_bottom") == 0) l.yolo_point = YOLO_RIGHT_BOTTOM;
+    else l.yolo_point = YOLO_CENTER;
+
+    fprintf(stderr, "[Gaussian_yolo] iou loss: %s (%d), iou_norm: %2.2f, obj_norm: %2.2f, cls_norm: %2.2f, delta_norm: %2.2f, scale: %2.2f, point: %d\n",
+        iou_loss, l.iou_loss, l.iou_normalizer, l.obj_normalizer, l.cls_normalizer, l.delta_normalizer, l.scale_x_y, l.yolo_point);
+
+    l.jitter = option_find_float(options, "jitter", .2);
+    l.resize = option_find_float_quiet(options, "resize", 1.0);
+
+    l.ignore_thresh = option_find_float(options, "ignore_thresh", .5);
+    l.truth_thresh = option_find_float(options, "truth_thresh", 1);
+    l.iou_thresh = option_find_float_quiet(options, "iou_thresh", 1); // recommended to use iou_thresh=0.213 in [yolo]
+    l.random = option_find_float_quiet(options, "random", 0);
+
+    char *map_file = option_find_str(options, "map", 0);
+    if (map_file) l.map = read_map(map_file);
+
+    a = option_find_str(options, "anchors", 0);
+    if (a) {
+        int len = strlen(a);
+        int n = 1;
+        int i;
+        for (i = 0; i < len; ++i) {
+            if (a[i] == ',') ++n;
+        }
+        for (i = 0; i < n; ++i) {
+            float bias = atof(a);
+            l.biases[i] = bias;
+            a = strchr(a, ',') + 1;
+        }
+    }
+    return l;
+}
+
+layer parse_region(list *options, size_params params)
+{
+    int coords = option_find_int(options, "coords", 4);
+    int classes = option_find_int(options, "classes", 20);
+    int num = option_find_int(options, "num", 1);
+    int max_boxes = option_find_int_quiet(options, "max", 200);
+
+    layer l = make_region_layer(params.batch, params.w, params.h, num, classes, coords, max_boxes);
+    if (l.outputs != params.inputs) {
+        printf("Error: l.outputs == params.inputs \n");
+        printf("filters= in the [convolutional]-layer doesn't correspond to classes= or num= in [region]-layer \n");
+        exit(EXIT_FAILURE);
+    }
+    //assert(l.outputs == params.inputs);
+
+    l.log = option_find_int_quiet(options, "log", 0);
+    l.sqrt = option_find_int_quiet(options, "sqrt", 0);
+
+    l.softmax = option_find_int(options, "softmax", 0);
+    l.focal_loss = option_find_int_quiet(options, "focal_loss", 0);
+    //l.max_boxes = option_find_int_quiet(options, "max",30);
+    l.jitter = option_find_float(options, "jitter", .2);
+    l.resize = option_find_float_quiet(options, "resize", 1.0);
+    l.rescore = option_find_int_quiet(options, "rescore",0);
+
+    l.thresh = option_find_float(options, "thresh", .5);
+    l.classfix = option_find_int_quiet(options, "classfix", 0);
+    l.absolute = option_find_int_quiet(options, "absolute", 0);
+    l.random = option_find_float_quiet(options, "random", 0);
+
+    l.coord_scale = option_find_float(options, "coord_scale", 1);
+    l.object_scale = option_find_float(options, "object_scale", 1);
+    l.noobject_scale = option_find_float(options, "noobject_scale", 1);
+    l.mask_scale = option_find_float(options, "mask_scale", 1);
+    l.class_scale = option_find_float(options, "class_scale", 1);
+    l.bias_match = option_find_int_quiet(options, "bias_match",0);
+
+    char *tree_file = option_find_str(options, "tree", 0);
+    if (tree_file) l.softmax_tree = read_tree(tree_file);
+    char *map_file = option_find_str(options, "map", 0);
+    if (map_file) l.map = read_map(map_file);
+
+    char *a = option_find_str(options, "anchors", 0);
+    if(a){
+        int len = strlen(a);
+        int n = 1;
+        int i;
+        for(i = 0; i < len; ++i){
+            if (a[i] == ',') ++n;
+        }
+        for(i = 0; i < n && i < num*2; ++i){
+            float bias = atof(a);
+            l.biases[i] = bias;
+            a = strchr(a, ',')+1;
+        }
+    }
+    return l;
+}
+detection_layer parse_detection(list *options, size_params params)
+{
+    int coords = option_find_int(options, "coords", 1);
+    int classes = option_find_int(options, "classes", 1);
+    int rescore = option_find_int(options, "rescore", 0);
+    int num = option_find_int(options, "num", 1);
+    int side = option_find_int(options, "side", 7);
+    detection_layer layer = make_detection_layer(params.batch, params.inputs, num, side, classes, coords, rescore);
+
+    layer.softmax = option_find_int(options, "softmax", 0);
+    layer.sqrt = option_find_int(options, "sqrt", 0);
+
+    layer.max_boxes = option_find_int_quiet(options, "max",200);
+    layer.coord_scale = option_find_float(options, "coord_scale", 1);
+    layer.forced = option_find_int(options, "forced", 0);
+    layer.object_scale = option_find_float(options, "object_scale", 1);
+    layer.noobject_scale = option_find_float(options, "noobject_scale", 1);
+    layer.class_scale = option_find_float(options, "class_scale", 1);
+    layer.jitter = option_find_float(options, "jitter", .2);
+    layer.resize = option_find_float_quiet(options, "resize", 1.0);
+    layer.random = option_find_float_quiet(options, "random", 0);
+    layer.reorg = option_find_int_quiet(options, "reorg", 0);
+    return layer;
+}
+
+cost_layer parse_cost(list *options, size_params params)
+{
+    char *type_s = option_find_str(options, "type", "sse");
+    COST_TYPE type = get_cost_type(type_s);
+    float scale = option_find_float_quiet(options, "scale",1);
+    cost_layer layer = make_cost_layer(params.batch, params.inputs, type, scale);
+    layer.ratio =  option_find_float_quiet(options, "ratio",0);
+    return layer;
+}
+
+crop_layer parse_crop(list *options, size_params params)
+{
+    int crop_height = option_find_int(options, "crop_height",1);
+    int crop_width = option_find_int(options, "crop_width",1);
+    int flip = option_find_int(options, "flip",0);
+    float angle = option_find_float(options, "angle",0);
+    float saturation = option_find_float(options, "saturation",1);
+    float exposure = option_find_float(options, "exposure",1);
+
+    int batch,h,w,c;
+    h = params.h;
+    w = params.w;
+    c = params.c;
+    batch=params.batch;
+    if(!(h && w && c)) error("Layer before crop layer must output image.");
+
+    int noadjust = option_find_int_quiet(options, "noadjust",0);
+
+    crop_layer l = make_crop_layer(batch,h,w,c,crop_height,crop_width,flip, angle, saturation, exposure);
+    l.shift = option_find_float(options, "shift", 0);
+    l.noadjust = noadjust;
+    return l;
+}
+
+layer parse_reorg(list *options, size_params params)
+{
+    int stride = option_find_int(options, "stride",1);
+    int reverse = option_find_int_quiet(options, "reverse",0);
+
+    int batch,h,w,c;
+    h = params.h;
+    w = params.w;
+    c = params.c;
+    batch=params.batch;
+    if(!(h && w && c)) error("Layer before reorg layer must output image.");
+
+    layer layer = make_reorg_layer(batch,w,h,c,stride,reverse);
+    return layer;
+}
+
+layer parse_reorg_old(list *options, size_params params)
+{
+    printf("\n reorg_old \n");
+    int stride = option_find_int(options, "stride", 1);
+    int reverse = option_find_int_quiet(options, "reverse", 0);
+
+    int batch, h, w, c;
+    h = params.h;
+    w = params.w;
+    c = params.c;
+    batch = params.batch;
+    if (!(h && w && c)) error("Layer before reorg layer must output image.");
+
+    layer layer = make_reorg_old_layer(batch, w, h, c, stride, reverse);
+    return layer;
+}
+
+maxpool_layer parse_local_avgpool(list *options, size_params params)
+{
+    int stride = option_find_int(options, "stride", 1);
+    int stride_x = option_find_int_quiet(options, "stride_x", stride);
+    int stride_y = option_find_int_quiet(options, "stride_y", stride);
+    int size = option_find_int(options, "size", stride);
+    int padding = option_find_int_quiet(options, "padding", size - 1);
+    int maxpool_depth = 0;
+    int out_channels = 1;
+    int antialiasing = 0;
+    const int avgpool = 1;
+
+    int batch, h, w, c;
+    h = params.h;
+    w = params.w;
+    c = params.c;
+    batch = params.batch;
+    if (!(h && w && c)) error("Layer before [local_avgpool] layer must output image.");
+
+    maxpool_layer layer = make_maxpool_layer(batch, h, w, c, size, stride_x, stride_y, padding, maxpool_depth, out_channels, antialiasing, avgpool, params.train);
+    return layer;
+}
+
+maxpool_layer parse_maxpool(list *options, size_params params)
+{
+    int stride = option_find_int(options, "stride",1);
+    int stride_x = option_find_int_quiet(options, "stride_x", stride);
+    int stride_y = option_find_int_quiet(options, "stride_y", stride);
+    int size = option_find_int(options, "size",stride);
+    int padding = option_find_int_quiet(options, "padding", size-1);
+    int maxpool_depth = option_find_int_quiet(options, "maxpool_depth", 0);
+    int out_channels = option_find_int_quiet(options, "out_channels", 1);
+    int antialiasing = option_find_int_quiet(options, "antialiasing", 0);
+    const int avgpool = 0;
+
+    int batch,h,w,c;
+    h = params.h;
+    w = params.w;
+    c = params.c;
+    batch=params.batch;
+    if(!(h && w && c)) error("Layer before [maxpool] layer must output image.");
+
+    maxpool_layer layer = make_maxpool_layer(batch, h, w, c, size, stride_x, stride_y, padding, maxpool_depth, out_channels, antialiasing, avgpool, params.train);
+    layer.maxpool_zero_nonmax = option_find_int_quiet(options, "maxpool_zero_nonmax", 0);
+    return layer;
+}
+
+avgpool_layer parse_avgpool(list *options, size_params params)
+{
+    int batch,w,h,c;
+    w = params.w;
+    h = params.h;
+    c = params.c;
+    batch=params.batch;
+    if(!(h && w && c)) error("Layer before avgpool layer must output image.");
+
+    avgpool_layer layer = make_avgpool_layer(batch,w,h,c);
+    return layer;
+}
+
+dropout_layer parse_dropout(list *options, size_params params)
+{
+    float probability = option_find_float(options, "probability", .2);
+    int dropblock = option_find_int_quiet(options, "dropblock", 0);
+    float dropblock_size_rel = option_find_float_quiet(options, "dropblock_size_rel", 0);
+    int dropblock_size_abs = option_find_float_quiet(options, "dropblock_size_abs", 0);
+    if (dropblock_size_abs > params.w || dropblock_size_abs > params.h) {
+        printf(" [dropout] - dropblock_size_abs = %d that is bigger than layer size %d x %d \n", dropblock_size_abs, params.w, params.h);
+        dropblock_size_abs = min_val_cmp(params.w, params.h);
+    }
+    if (dropblock && !dropblock_size_rel && !dropblock_size_abs) {
+        printf(" [dropout] - None of the parameters (dropblock_size_rel or dropblock_size_abs) are set, will be used: dropblock_size_abs = 7 \n");
+        dropblock_size_abs = 7;
+    }
+    if (dropblock_size_rel && dropblock_size_abs) {
+        printf(" [dropout] - Both parameters are set, only the parameter will be used: dropblock_size_abs = %d \n", dropblock_size_abs);
+        dropblock_size_rel = 0;
+    }
+    dropout_layer layer = make_dropout_layer(params.batch, params.inputs, probability, dropblock, dropblock_size_rel, dropblock_size_abs, params.w, params.h, params.c);
+    layer.out_w = params.w;
+    layer.out_h = params.h;
+    layer.out_c = params.c;
+    return layer;
+}
+
+layer parse_normalization(list *options, size_params params)
+{
+    float alpha = option_find_float(options, "alpha", .0001);
+    float beta =  option_find_float(options, "beta" , .75);
+    float kappa = option_find_float(options, "kappa", 1);
+    int size = option_find_int(options, "size", 5);
+    layer l = make_normalization_layer(params.batch, params.w, params.h, params.c, size, alpha, beta, kappa);
+    return l;
+}
+
+layer parse_batchnorm(list *options, size_params params)
+{
+    layer l = make_batchnorm_layer(params.batch, params.w, params.h, params.c, params.train);
+    return l;
+}
+
+layer parse_shortcut(list *options, size_params params, network net)
+{
+    char *activation_s = option_find_str(options, "activation", "linear");
+    ACTIVATION activation = get_activation(activation_s);
+
+    char *weights_type_str = option_find_str_quiet(options, "weights_type", "none");
+    WEIGHTS_TYPE_T weights_type = NO_WEIGHTS;
+    if(strcmp(weights_type_str, "per_feature") == 0 || strcmp(weights_type_str, "per_layer") == 0) weights_type = PER_FEATURE;
+    else if (strcmp(weights_type_str, "per_channel") == 0) weights_type = PER_CHANNEL;
+    else if (strcmp(weights_type_str, "none") != 0) {
+        printf("Error: Incorrect weights_type = %s \n Use one of: none, per_feature, per_channel \n", weights_type_str);
+        getchar();
+        exit(0);
+    }
+
+    char *weights_normalization_str = option_find_str_quiet(options, "weights_normalization", "none");
+    WEIGHTS_NORMALIZATION_T weights_normalization = NO_NORMALIZATION;
+    if (strcmp(weights_normalization_str, "relu") == 0 || strcmp(weights_normalization_str, "avg_relu") == 0) weights_normalization = RELU_NORMALIZATION;
+    else if (strcmp(weights_normalization_str, "softmax") == 0) weights_normalization = SOFTMAX_NORMALIZATION;
+    else if (strcmp(weights_type_str, "none") != 0) {
+        printf("Error: Incorrect weights_normalization = %s \n Use one of: none, relu, softmax \n", weights_normalization_str);
+        getchar();
+        exit(0);
+    }
+
+    char *l = option_find(options, "from");
+    int len = strlen(l);
+    if (!l) error("Route Layer must specify input layers: from = ...");
+    int n = 1;
+    int i;
+    for (i = 0; i < len; ++i) {
+        if (l[i] == ',') ++n;
+    }
+
+    int* layers = (int*)calloc(n, sizeof(int));
+    int* sizes = (int*)calloc(n, sizeof(int));
+    float **layers_output = (float **)calloc(n, sizeof(float *));
+    float **layers_delta = (float **)calloc(n, sizeof(float *));
+    float **layers_output_gpu = (float **)calloc(n, sizeof(float *));
+    float **layers_delta_gpu = (float **)calloc(n, sizeof(float *));
+
+    for (i = 0; i < n; ++i) {
+        int index = atoi(l);
+        l = strchr(l, ',') + 1;
+        if (index < 0) index = params.index + index;
+        layers[i] = index;
+        sizes[i] = params.net.layers[index].outputs;
+        layers_output[i] = params.net.layers[index].output;
+        layers_delta[i] = params.net.layers[index].delta;
+    }
+
+#ifdef GPU
+    for (i = 0; i < n; ++i) {
+        layers_output_gpu[i] = params.net.layers[layers[i]].output_gpu;
+        layers_delta_gpu[i] = params.net.layers[layers[i]].delta_gpu;
+    }
+#endif// GPU
+
+    layer s = make_shortcut_layer(params.batch, n, layers, sizes, params.w, params.h, params.c, layers_output, layers_delta,
+        layers_output_gpu, layers_delta_gpu, weights_type, weights_normalization, activation, params.train);
+
+    free(layers_output_gpu);
+    free(layers_delta_gpu);
+
+    for (i = 0; i < n; ++i) {
+        int index = layers[i];
+        assert(params.w == net.layers[index].out_w && params.h == net.layers[index].out_h);
+
+        if (params.w != net.layers[index].out_w || params.h != net.layers[index].out_h || params.c != net.layers[index].out_c)
+            fprintf(stderr, " (%4d x%4d x%4d) + (%4d x%4d x%4d) \n",
+                params.w, params.h, params.c, net.layers[index].out_w, net.layers[index].out_h, params.net.layers[index].out_c);
+    }
+
+    return s;
+}
+
+
+layer parse_scale_channels(list *options, size_params params, network net)
+{
+    char *l = option_find(options, "from");
+    int index = atoi(l);
+    if (index < 0) index = params.index + index;
+    int scale_wh = option_find_int_quiet(options, "scale_wh", 0);
+
+    int batch = params.batch;
+    layer from = net.layers[index];
+
+    layer s = make_scale_channels_layer(batch, index, params.w, params.h, params.c, from.out_w, from.out_h, from.out_c, scale_wh);
+
+    char *activation_s = option_find_str_quiet(options, "activation", "linear");
+    ACTIVATION activation = get_activation(activation_s);
+    s.activation = activation;
+    if (activation == SWISH || activation == MISH) {
+        printf(" [scale_channels] layer doesn't support SWISH or MISH activations \n");
+    }
+    return s;
+}
+
+layer parse_sam(list *options, size_params params, network net)
+{
+    char *l = option_find(options, "from");
+    int index = atoi(l);
+    if (index < 0) index = params.index + index;
+
+    int batch = params.batch;
+    layer from = net.layers[index];
+
+    layer s = make_sam_layer(batch, index, params.w, params.h, params.c, from.out_w, from.out_h, from.out_c);
+
+    char *activation_s = option_find_str_quiet(options, "activation", "linear");
+    ACTIVATION activation = get_activation(activation_s);
+    s.activation = activation;
+    if (activation == SWISH || activation == MISH) {
+        printf(" [sam] layer doesn't support SWISH or MISH activations \n");
+    }
+    return s;
+}
+
+layer parse_implicit(list *options, size_params params, network net)
+{
+    float mean_init = option_find_float(options, "mean", 0.0);
+    float std_init = option_find_float(options, "std", 0.2);
+    int filters = option_find_int(options, "filters", 128);
+    int atoms = option_find_int_quiet(options, "atoms", 1);
+
+    layer s = make_implicit_layer(params.batch, params.index, mean_init, std_init, filters, atoms);
+
+    return s;
+}
+
+layer parse_activation(list *options, size_params params)
+{
+    char *activation_s = option_find_str(options, "activation", "linear");
+    ACTIVATION activation = get_activation(activation_s);
+
+    layer l = make_activation_layer(params.batch, params.inputs, activation);
+
+    l.out_h = params.h;
+    l.out_w = params.w;
+    l.out_c = params.c;
+    l.h = params.h;
+    l.w = params.w;
+    l.c = params.c;
+
+    return l;
+}
+
+layer parse_upsample(list *options, size_params params, network net)
+{
+
+    int stride = option_find_int(options, "stride", 2);
+    layer l = make_upsample_layer(params.batch, params.w, params.h, params.c, stride);
+    l.scale = option_find_float_quiet(options, "scale", 1);
+    return l;
+}
+
+route_layer parse_route(list *options, size_params params)
+{
+    char *l = option_find(options, "layers");
+    if(!l) error("Route Layer must specify input layers");
+    int len = strlen(l);
+    int n = 1;
+    int i;
+    for(i = 0; i < len; ++i){
+        if (l[i] == ',') ++n;
+    }
+
+    int* layers = (int*)xcalloc(n, sizeof(int));
+    int* sizes = (int*)xcalloc(n, sizeof(int));
+    for(i = 0; i < n; ++i){
+        int index = atoi(l);
+        l = strchr(l, ',')+1;
+        if(index < 0) index = params.index + index;
+        layers[i] = index;
+        sizes[i] = params.net.layers[index].outputs;
+    }
+    int batch = params.batch;
+
+    int groups = option_find_int_quiet(options, "groups", 1);
+    int group_id = option_find_int_quiet(options, "group_id", 0);
+
+    route_layer layer = make_route_layer(batch, n, layers, sizes, groups, group_id);
+
+    convolutional_layer first = params.net.layers[layers[0]];
+    layer.out_w = first.out_w;
+    layer.out_h = first.out_h;
+    layer.out_c = first.out_c;
+    for(i = 1; i < n; ++i){
+        int index = layers[i];
+        convolutional_layer next = params.net.layers[index];
+        if(next.out_w == first.out_w && next.out_h == first.out_h){
+            layer.out_c += next.out_c;
+        }else{
+            fprintf(stderr, " The width and height of the input layers are different. \n");
+            layer.out_h = layer.out_w = layer.out_c = 0;
+        }
+    }
+    layer.out_c = layer.out_c / layer.groups;
+
+    layer.w = first.w;
+    layer.h = first.h;
+    layer.c = layer.out_c;
+
+    layer.stream = option_find_int_quiet(options, "stream", -1);
+    layer.wait_stream_id = option_find_int_quiet(options, "wait_stream", -1);
+
+    if (n > 3) fprintf(stderr, " \t    ");
+    else if (n > 1) fprintf(stderr, " \t            ");
+    else fprintf(stderr, " \t\t            ");
+
+    fprintf(stderr, "           ");
+    if (layer.groups > 1) fprintf(stderr, "%d/%d", layer.group_id, layer.groups);
+    else fprintf(stderr, "   ");
+    fprintf(stderr, " -> %4d x%4d x%4d \n", layer.out_w, layer.out_h, layer.out_c);
+
+    return layer;
+}
+
+learning_rate_policy get_policy(char *s)
+{
+    if (strcmp(s, "random")==0) return RANDOM;
+    if (strcmp(s, "poly")==0) return POLY;
+    if (strcmp(s, "constant")==0) return CONSTANT;
+    if (strcmp(s, "step")==0) return STEP;
+    if (strcmp(s, "exp")==0) return EXP;
+    if (strcmp(s, "sigmoid")==0) return SIG;
+    if (strcmp(s, "steps")==0) return STEPS;
+    if (strcmp(s, "sgdr")==0) return SGDR;
+    fprintf(stderr, "Couldn't find policy %s, going with constant\n", s);
+    return CONSTANT;
+}
+
+void parse_net_options(list *options, network *net)
+{
+    net->max_batches = option_find_int(options, "max_batches", 0);
+    net->batch = option_find_int(options, "batch",1);
+    net->learning_rate = option_find_float(options, "learning_rate", .001);
+    net->learning_rate_min = option_find_float_quiet(options, "learning_rate_min", .00001);
+    net->batches_per_cycle = option_find_int_quiet(options, "sgdr_cycle", net->max_batches);
+    net->batches_cycle_mult = option_find_int_quiet(options, "sgdr_mult", 2);
+    net->momentum = option_find_float(options, "momentum", .9);
+    net->decay = option_find_float(options, "decay", .0001);
+    int subdivs = option_find_int(options, "subdivisions",1);
+    net->time_steps = option_find_int_quiet(options, "time_steps",1);
+    net->track = option_find_int_quiet(options, "track", 0);
+    net->augment_speed = option_find_int_quiet(options, "augment_speed", 2);
+    net->init_sequential_subdivisions = net->sequential_subdivisions = option_find_int_quiet(options, "sequential_subdivisions", subdivs);
+    if (net->sequential_subdivisions > subdivs) net->init_sequential_subdivisions = net->sequential_subdivisions = subdivs;
+    net->try_fix_nan = option_find_int_quiet(options, "try_fix_nan", 0);
+    net->batch /= subdivs;          // mini_batch
+    const int mini_batch = net->batch;
+    net->batch *= net->time_steps;  // mini_batch * time_steps
+    net->subdivisions = subdivs;    // number of mini_batches
+
+    net->weights_reject_freq = option_find_int_quiet(options, "weights_reject_freq", 0);
+    net->equidistant_point = option_find_int_quiet(options, "equidistant_point", 0);
+    net->badlabels_rejection_percentage = option_find_float_quiet(options, "badlabels_rejection_percentage", 0);
+    net->num_sigmas_reject_badlabels = option_find_float_quiet(options, "num_sigmas_reject_badlabels", 0);
+    net->ema_alpha = option_find_float_quiet(options, "ema_alpha", 0);
+    *net->badlabels_reject_threshold = 0;
+    *net->delta_rolling_max = 0;
+    *net->delta_rolling_avg = 0;
+    *net->delta_rolling_std = 0;
+    *net->seen = 0;
+    *net->cur_iteration = 0;
+    *net->cuda_graph_ready = 0;
+    net->use_cuda_graph = option_find_int_quiet(options, "use_cuda_graph", 0);
+    net->loss_scale = option_find_float_quiet(options, "loss_scale", 1);
+    net->dynamic_minibatch = option_find_int_quiet(options, "dynamic_minibatch", 0);
+    net->optimized_memory = option_find_int_quiet(options, "optimized_memory", 0);
+    net->workspace_size_limit = (size_t)1024*1024 * option_find_float_quiet(options, "workspace_size_limit_MB", 1024);  // 1024 MB by default
+
+
+    net->adam = option_find_int_quiet(options, "adam", 0);
+    if(net->adam){
+        net->B1 = option_find_float(options, "B1", .9);
+        net->B2 = option_find_float(options, "B2", .999);
+        net->eps = option_find_float(options, "eps", .000001);
+    }
+
+    net->h = option_find_int_quiet(options, "height",0);
+    net->w = option_find_int_quiet(options, "width",0);
+    net->c = option_find_int_quiet(options, "channels",0);
+    net->inputs = option_find_int_quiet(options, "inputs", net->h * net->w * net->c);
+    net->max_crop = option_find_int_quiet(options, "max_crop",net->w*2);
+    net->min_crop = option_find_int_quiet(options, "min_crop",net->w);
+    net->flip = option_find_int_quiet(options, "flip", 1);
+    net->blur = option_find_int_quiet(options, "blur", 0);
+    net->gaussian_noise = option_find_int_quiet(options, "gaussian_noise", 0);
+    net->mixup = option_find_int_quiet(options, "mixup", 0);
+    int cutmix = option_find_int_quiet(options, "cutmix", 0);
+    int mosaic = option_find_int_quiet(options, "mosaic", 0);
+    if (mosaic && cutmix) net->mixup = 4;
+    else if (cutmix) net->mixup = 2;
+    else if (mosaic) net->mixup = 3;
+    net->letter_box = option_find_int_quiet(options, "letter_box", 0);
+    net->mosaic_bound = option_find_int_quiet(options, "mosaic_bound", 0);
+    net->contrastive = option_find_int_quiet(options, "contrastive", 0);
+    net->contrastive_jit_flip = option_find_int_quiet(options, "contrastive_jit_flip", 0);
+    net->contrastive_color = option_find_int_quiet(options, "contrastive_color", 0);
+    net->unsupervised = option_find_int_quiet(options, "unsupervised", 0);
+    if (net->contrastive && mini_batch < 2) {
+        printf(" Error: mini_batch size (batch/subdivisions) should be higher than 1 for Contrastive loss \n");
+        exit(0);
+    }
+    net->label_smooth_eps = option_find_float_quiet(options, "label_smooth_eps", 0.0f);
+    net->resize_step = option_find_float_quiet(options, "resize_step", 32);
+    net->attention = option_find_int_quiet(options, "attention", 0);
+    net->adversarial_lr = option_find_float_quiet(options, "adversarial_lr", 0);
+    net->max_chart_loss = option_find_float_quiet(options, "max_chart_loss", 20.0);
+
+    net->angle = option_find_float_quiet(options, "angle", 0);
+    net->aspect = option_find_float_quiet(options, "aspect", 1);
+    net->saturation = option_find_float_quiet(options, "saturation", 1);
+    net->exposure = option_find_float_quiet(options, "exposure", 1);
+    net->hue = option_find_float_quiet(options, "hue", 0);
+    net->power = option_find_float_quiet(options, "power", 4);
+
+    if(!net->inputs && !(net->h && net->w && net->c)) error("No input parameters supplied");
+
+    char *policy_s = option_find_str(options, "policy", "constant");
+    net->policy = get_policy(policy_s);
+    net->burn_in = option_find_int_quiet(options, "burn_in", 0);
+#ifdef GPU
+    if (net->gpu_index >= 0) {
+        char device_name[1024];
+        int compute_capability = get_gpu_compute_capability(net->gpu_index, device_name);
+#ifdef CUDNN_HALF
+        if (compute_capability >= 700) net->cudnn_half = 1;
+        else net->cudnn_half = 0;
+#endif// CUDNN_HALF
+        fprintf(stderr, " %d : compute_capability = %d, cudnn_half = %d, GPU: %s \n", net->gpu_index, compute_capability, net->cudnn_half, device_name);
+    }
+    else fprintf(stderr, " GPU isn't used \n");
+#endif// GPU
+    if(net->policy == STEP){
+        net->step = option_find_int(options, "step", 1);
+        net->scale = option_find_float(options, "scale", 1);
+    } else if (net->policy == STEPS || net->policy == SGDR){
+        char *l = option_find(options, "steps");
+        char *p = option_find(options, "scales");
+        char *s = option_find(options, "seq_scales");
+        if(net->policy == STEPS && (!l || !p)) error("STEPS policy must have steps and scales in cfg file");
+
+        if (l) {
+            int len = strlen(l);
+            int n = 1;
+            int i;
+            for (i = 0; i < len; ++i) {
+                if (l[i] == '#') break;
+                if (l[i] == ',') ++n;
+            }
+            int* steps = (int*)xcalloc(n, sizeof(int));
+            float* scales = (float*)xcalloc(n, sizeof(float));
+            float* seq_scales = (float*)xcalloc(n, sizeof(float));
+            for (i = 0; i < n; ++i) {
+                float scale = 1.0;
+                if (p) {
+                    scale = atof(p);
+                    p = strchr(p, ',') + 1;
+                }
+                float sequence_scale = 1.0;
+                if (s) {
+                    sequence_scale = atof(s);
+                    s = strchr(s, ',') + 1;
+                }
+                int step = atoi(l);
+                l = strchr(l, ',') + 1;
+                steps[i] = step;
+                scales[i] = scale;
+                seq_scales[i] = sequence_scale;
+            }
+            net->scales = scales;
+            net->steps = steps;
+            net->seq_scales = seq_scales;
+            net->num_steps = n;
+        }
+    } else if (net->policy == EXP){
+        net->gamma = option_find_float(options, "gamma", 1);
+    } else if (net->policy == SIG){
+        net->gamma = option_find_float(options, "gamma", 1);
+        net->step = option_find_int(options, "step", 1);
+    } else if (net->policy == POLY || net->policy == RANDOM){
+        //net->power = option_find_float(options, "power", 1);
+    }
+
+}
+
+int is_network(section *s)
+{
+    return (strcmp(s->type, "[net]")==0
+            || strcmp(s->type, "[network]")==0);
+}
+
+void set_train_only_bn(network net)
+{
+    int train_only_bn = 0;
+    int i;
+    for (i = net.n - 1; i >= 0; --i) {
+        if (net.layers[i].train_only_bn) train_only_bn = net.layers[i].train_only_bn;  // set l.train_only_bn for all previous layers
+        if (train_only_bn) {
+            net.layers[i].train_only_bn = train_only_bn;
+
+            if (net.layers[i].type == CONV_LSTM) {
+                net.layers[i].wf->train_only_bn = train_only_bn;
+                net.layers[i].wi->train_only_bn = train_only_bn;
+                net.layers[i].wg->train_only_bn = train_only_bn;
+                net.layers[i].wo->train_only_bn = train_only_bn;
+                net.layers[i].uf->train_only_bn = train_only_bn;
+                net.layers[i].ui->train_only_bn = train_only_bn;
+                net.layers[i].ug->train_only_bn = train_only_bn;
+                net.layers[i].uo->train_only_bn = train_only_bn;
+                if (net.layers[i].peephole) {
+                    net.layers[i].vf->train_only_bn = train_only_bn;
+                    net.layers[i].vi->train_only_bn = train_only_bn;
+                    net.layers[i].vo->train_only_bn = train_only_bn;
+                }
+            }
+            else if (net.layers[i].type == CRNN) {
+                net.layers[i].input_layer->train_only_bn = train_only_bn;
+                net.layers[i].self_layer->train_only_bn = train_only_bn;
+                net.layers[i].output_layer->train_only_bn = train_only_bn;
+            }
+        }
+    }
+}
+
+network parse_network_cfg(char *filename)
+{
+    return parse_network_cfg_custom(filename, 0, 0);
+}
+
+network parse_network_cfg_custom(char *filename, int batch, int time_steps)
+{
+    list *sections = read_cfg(filename);
+    node *n = sections->front;
+    if(!n) error("Config file has no sections");
+    network net = make_network(sections->size - 1);
+    net.gpu_index = gpu_index;
+    size_params params;
+
+    if (batch > 0) params.train = 0;    // allocates memory for Detection only
+    else params.train = 1;              // allocates memory for Detection & Training
+
+    section *s = (section *)n->val;
+    list *options = s->options;
+    if(!is_network(s)) error("First section must be [net] or [network]");
+    parse_net_options(options, &net);
+
+#ifdef GPU
+    printf("net.optimized_memory = %d \n", net.optimized_memory);
+    if (net.optimized_memory >= 2 && params.train) {
+        pre_allocate_pinned_memory((size_t)1024 * 1024 * 1024 * 8);   // pre-allocate 8 GB CPU-RAM for pinned memory
+    }
+#endif  // GPU
+
+    params.h = net.h;
+    params.w = net.w;
+    params.c = net.c;
+    params.inputs = net.inputs;
+    if (batch > 0) net.batch = batch;
+    if (time_steps > 0) net.time_steps = time_steps;
+    if (net.batch < 1) net.batch = 1;
+    if (net.time_steps < 1) net.time_steps = 1;
+    if (net.batch < net.time_steps) net.batch = net.time_steps;
+    params.batch = net.batch;
+    params.time_steps = net.time_steps;
+    params.net = net;
+    printf("mini_batch = %d, batch = %d, time_steps = %d, train = %d \n", net.batch, net.batch * net.subdivisions, net.time_steps, params.train);
+
+    int avg_outputs = 0;
+    int avg_counter = 0;
+    float bflops = 0;
+    size_t workspace_size = 0;
+    size_t max_inputs = 0;
+    size_t max_outputs = 0;
+    int receptive_w = 1, receptive_h = 1;
+    int receptive_w_scale = 1, receptive_h_scale = 1;
+    const int show_receptive_field = option_find_float_quiet(options, "show_receptive_field", 0);
+
+    n = n->next;
+    int count = 0;
+    free_section(s);
+    fprintf(stderr, "   layer   filters  size/strd(dil)      input                output\n");
+    while(n){
+        params.index = count;
+        fprintf(stderr, "%4d ", count);
+        s = (section *)n->val;
+        options = s->options;
+        layer l = { (LAYER_TYPE)0 };
+        LAYER_TYPE lt = string_to_layer_type(s->type);
+        if(lt == CONVOLUTIONAL){
+            l = parse_convolutional(options, params);
+        }else if(lt == LOCAL){
+            l = parse_local(options, params);
+        }else if(lt == ACTIVE){
+            l = parse_activation(options, params);
+        }else if(lt == RNN){
+            l = parse_rnn(options, params);
+        }else if(lt == GRU){
+            l = parse_gru(options, params);
+        }else if(lt == LSTM){
+            l = parse_lstm(options, params);
+        }else if (lt == CONV_LSTM) {
+            l = parse_conv_lstm(options, params);
+        }else if (lt == HISTORY) {
+            l = parse_history(options, params);
+        }else if(lt == CRNN){
+            l = parse_crnn(options, params);
+        }else if(lt == CONNECTED){
+            l = parse_connected(options, params);
+        }else if(lt == CROP){
+            l = parse_crop(options, params);
+        }else if(lt == COST){
+            l = parse_cost(options, params);
+            l.keep_delta_gpu = 1;
+        }else if(lt == REGION){
+            l = parse_region(options, params);
+            l.keep_delta_gpu = 1;
+        }else if (lt == YOLO) {
+            l = parse_yolo(options, params);
+            l.keep_delta_gpu = 1;
+        }else if (lt == GAUSSIAN_YOLO) {
+            l = parse_gaussian_yolo(options, params);
+            l.keep_delta_gpu = 1;
+        }else if(lt == DETECTION){
+            l = parse_detection(options, params);
+        }else if(lt == SOFTMAX){
+            l = parse_softmax(options, params);
+            net.hierarchy = l.softmax_tree;
+            l.keep_delta_gpu = 1;
+        }else if (lt == CONTRASTIVE) {
+            l = parse_contrastive(options, params);
+            l.keep_delta_gpu = 1;
+        }else if(lt == NORMALIZATION){
+            l = parse_normalization(options, params);
+        }else if(lt == BATCHNORM){
+            l = parse_batchnorm(options, params);
+        }else if(lt == MAXPOOL){
+            l = parse_maxpool(options, params);
+        }else if (lt == LOCAL_AVGPOOL) {
+            l = parse_local_avgpool(options, params);
+        }else if(lt == REORG){
+            l = parse_reorg(options, params);        }
+        else if (lt == REORG_OLD) {
+            l = parse_reorg_old(options, params);
+        }else if(lt == AVGPOOL){
+            l = parse_avgpool(options, params);
+        }else if(lt == ROUTE){
+            l = parse_route(options, params);
+            int k;
+            for (k = 0; k < l.n; ++k) {
+                net.layers[l.input_layers[k]].use_bin_output = 0;
+                net.layers[l.input_layers[k]].keep_delta_gpu = 1;
+            }
+        }else if (lt == UPSAMPLE) {
+            l = parse_upsample(options, params, net);
+        }else if(lt == SHORTCUT){
+            l = parse_shortcut(options, params, net);
+            net.layers[count - 1].use_bin_output = 0;
+            net.layers[l.index].use_bin_output = 0;
+            net.layers[l.index].keep_delta_gpu = 1;
+        }else if (lt == SCALE_CHANNELS) {
+            l = parse_scale_channels(options, params, net);
+            net.layers[count - 1].use_bin_output = 0;
+            net.layers[l.index].use_bin_output = 0;
+            net.layers[l.index].keep_delta_gpu = 1;
+        }
+        else if (lt == SAM) {
+            l = parse_sam(options, params, net);
+            net.layers[count - 1].use_bin_output = 0;
+            net.layers[l.index].use_bin_output = 0;
+            net.layers[l.index].keep_delta_gpu = 1;
+        } else if (lt == IMPLICIT) {
+            l = parse_implicit(options, params, net);
+        }else if(lt == DROPOUT){
+            l = parse_dropout(options, params);
+            l.output = net.layers[count-1].output;
+            l.delta = net.layers[count-1].delta;
+#ifdef GPU
+            l.output_gpu = net.layers[count-1].output_gpu;
+            l.delta_gpu = net.layers[count-1].delta_gpu;
+            l.keep_delta_gpu = 1;
+#endif
+        }
+        else if (lt == EMPTY) {
+            layer empty_layer = {(LAYER_TYPE)0};
+            l = empty_layer;
+            l.type = EMPTY;
+            l.w = l.out_w = params.w;
+            l.h = l.out_h = params.h;
+            l.c = l.out_c = params.c;
+            l.batch = params.batch;
+            l.inputs = l.outputs = params.inputs;
+            l.output = net.layers[count - 1].output;
+            l.delta = net.layers[count - 1].delta;
+            l.forward = empty_func;
+            l.backward = empty_func;
+#ifdef GPU
+            l.output_gpu = net.layers[count - 1].output_gpu;
+            l.delta_gpu = net.layers[count - 1].delta_gpu;
+            l.keep_delta_gpu = 1;
+            l.forward_gpu = empty_func;
+            l.backward_gpu = empty_func;
+#endif
+            fprintf(stderr, "empty \n");
+        }else{
+            fprintf(stderr, "Type not recognized: %s\n", s->type);
+        }
+
+        // calculate receptive field
+        if(show_receptive_field)
+        {
+            int dilation = max_val_cmp(1, l.dilation);
+            int stride = max_val_cmp(1, l.stride);
+            int size = max_val_cmp(1, l.size);
+
+            if (l.type == UPSAMPLE || (l.type == REORG))
+            {
+
+                l.receptive_w = receptive_w;
+                l.receptive_h = receptive_h;
+                l.receptive_w_scale = receptive_w_scale = receptive_w_scale / stride;
+                l.receptive_h_scale = receptive_h_scale = receptive_h_scale / stride;
+
+            }
+            else {
+                if (l.type == ROUTE) {
+                    receptive_w = receptive_h = receptive_w_scale = receptive_h_scale = 0;
+                    int k;
+                    for (k = 0; k < l.n; ++k) {
+                        layer route_l = net.layers[l.input_layers[k]];
+                        receptive_w = max_val_cmp(receptive_w, route_l.receptive_w);
+                        receptive_h = max_val_cmp(receptive_h, route_l.receptive_h);
+                        receptive_w_scale = max_val_cmp(receptive_w_scale, route_l.receptive_w_scale);
+                        receptive_h_scale = max_val_cmp(receptive_h_scale, route_l.receptive_h_scale);
+                    }
+                }
+                else
+                {
+                    int increase_receptive = size + (dilation - 1) * 2 - 1;// stride;
+                    increase_receptive = max_val_cmp(0, increase_receptive);
+
+                    receptive_w += increase_receptive * receptive_w_scale;
+                    receptive_h += increase_receptive * receptive_h_scale;
+                    receptive_w_scale *= stride;
+                    receptive_h_scale *= stride;
+                }
+
+                l.receptive_w = receptive_w;
+                l.receptive_h = receptive_h;
+                l.receptive_w_scale = receptive_w_scale;
+                l.receptive_h_scale = receptive_h_scale;
+            }
+            //printf(" size = %d, dilation = %d, stride = %d, receptive_w = %d, receptive_w_scale = %d - ", size, dilation, stride, receptive_w, receptive_w_scale);
+
+            int cur_receptive_w = receptive_w;
+            int cur_receptive_h = receptive_h;
+
+            fprintf(stderr, "%4d - receptive field: %d x %d \n", count, cur_receptive_w, cur_receptive_h);
+        }
+
+#ifdef GPU
+        // futher GPU-memory optimization: net.optimized_memory == 2
+        l.optimized_memory = net.optimized_memory;
+        if (net.optimized_memory >= 2 && params.train && l.type != DROPOUT)
+        {
+            if (l.output_gpu) {
+                cuda_free(l.output_gpu);
+                //l.output_gpu = cuda_make_array_pinned(l.output, l.batch*l.outputs); // l.steps
+                l.output_gpu = cuda_make_array_pinned_preallocated(NULL, l.batch*l.outputs); // l.steps
+            }
+            if (l.activation_input_gpu) {
+                cuda_free(l.activation_input_gpu);
+                l.activation_input_gpu = cuda_make_array_pinned_preallocated(NULL, l.batch*l.outputs); // l.steps
+            }
+
+            if (l.x_gpu) {
+                cuda_free(l.x_gpu);
+                l.x_gpu = cuda_make_array_pinned_preallocated(NULL, l.batch*l.outputs); // l.steps
+            }
+
+            // maximum optimization
+            if (net.optimized_memory >= 3 && l.type != DROPOUT) {
+                if (l.delta_gpu) {
+                    cuda_free(l.delta_gpu);
+                    //l.delta_gpu = cuda_make_array_pinned_preallocated(NULL, l.batch*l.outputs); // l.steps
+                    //printf("\n\n PINNED DELTA GPU = %d \n", l.batch*l.outputs);
+                }
+            }
+
+            if (l.type == CONVOLUTIONAL) {
+                set_specified_workspace_limit(&l, net.workspace_size_limit);   // workspace size limit 1 GB
+            }
+        }
+#endif // GPU
+
+        l.clip = option_find_float_quiet(options, "clip", 0);
+        l.dynamic_minibatch = net.dynamic_minibatch;
+        l.onlyforward = option_find_int_quiet(options, "onlyforward", 0);
+        l.dont_update = option_find_int_quiet(options, "dont_update", 0);
+        l.burnin_update = option_find_int_quiet(options, "burnin_update", 0);
+        l.stopbackward = option_find_int_quiet(options, "stopbackward", 0);
+        l.train_only_bn = option_find_int_quiet(options, "train_only_bn", 0);
+        l.dontload = option_find_int_quiet(options, "dontload", 0);
+        l.dontloadscales = option_find_int_quiet(options, "dontloadscales", 0);
+        l.learning_rate_scale = option_find_float_quiet(options, "learning_rate", 1);
+        option_unused(options);
+
+        net.layers[count] = l;
+        if (l.workspace_size > workspace_size) workspace_size = l.workspace_size;
+        if (l.inputs > max_inputs) max_inputs = l.inputs;
+        if (l.outputs > max_outputs) max_outputs = l.outputs;
+        free_section(s);
+        n = n->next;
+        ++count;
+        if(n){
+            if (l.antialiasing) {
+                params.h = l.input_layer->out_h;
+                params.w = l.input_layer->out_w;
+                params.c = l.input_layer->out_c;
+                params.inputs = l.input_layer->outputs;
+            }
+            else {
+                params.h = l.out_h;
+                params.w = l.out_w;
+                params.c = l.out_c;
+                params.inputs = l.outputs;
+            }
+        }
+        if (l.bflops > 0) bflops += l.bflops;
+
+        if (l.w > 1 && l.h > 1) {
+            avg_outputs += l.outputs;
+            avg_counter++;
+        }
+    }
+    free_list(sections);
+
+#ifdef GPU
+    if (net.optimized_memory && params.train)
+    {
+        int k;
+        for (k = 0; k < net.n; ++k) {
+            layer l = net.layers[k];
+            // delta GPU-memory optimization: net.optimized_memory == 1
+            if (!l.keep_delta_gpu) {
+                const size_t delta_size = l.outputs*l.batch; // l.steps
+                if (net.max_delta_gpu_size < delta_size) {
+                    net.max_delta_gpu_size = delta_size;
+                    if (net.global_delta_gpu) cuda_free(net.global_delta_gpu);
+                    if (net.state_delta_gpu) cuda_free(net.state_delta_gpu);
+                    assert(net.max_delta_gpu_size > 0);
+                    net.global_delta_gpu = (float *)cuda_make_array(NULL, net.max_delta_gpu_size);
+                    net.state_delta_gpu = (float *)cuda_make_array(NULL, net.max_delta_gpu_size);
+                }
+                if (l.delta_gpu) {
+                    if (net.optimized_memory >= 3) {}
+                    else cuda_free(l.delta_gpu);
+                }
+                l.delta_gpu = net.global_delta_gpu;
+            }
+
+            // maximum optimization
+            if (net.optimized_memory >= 3 && l.type != DROPOUT) {
+                if (l.delta_gpu && l.keep_delta_gpu) {
+                    //cuda_free(l.delta_gpu);   // already called above
+                    l.delta_gpu = cuda_make_array_pinned_preallocated(NULL, l.batch*l.outputs); // l.steps
+                    //printf("\n\n PINNED DELTA GPU = %d \n", l.batch*l.outputs);
+                }
+            }
+
+            net.layers[k] = l;
+        }
+    }
+#endif
+
+    set_train_only_bn(net); // set l.train_only_bn for all required layers
+
+    net.outputs = get_network_output_size(net);
+    net.output = get_network_output(net);
+    avg_outputs = avg_outputs / avg_counter;
+    fprintf(stderr, "Total BFLOPS %5.3f \n", bflops);
+    fprintf(stderr, "avg_outputs = %d \n", avg_outputs);
+#ifdef GPU
+    get_cuda_stream();
+    //get_cuda_memcpy_stream();
+    if (gpu_index >= 0)
+    {
+        int size = get_network_input_size(net) * net.batch;
+        net.input_state_gpu = cuda_make_array(0, size);
+        if (cudaSuccess == cudaHostAlloc(&net.input_pinned_cpu, size * sizeof(float), cudaHostRegisterMapped)) net.input_pinned_cpu_flag = 1;
+        else {
+            cudaGetLastError(); // reset CUDA-error
+            net.input_pinned_cpu = (float*)xcalloc(size, sizeof(float));
+        }
+
+        // pre-allocate memory for inference on Tensor Cores (fp16)
+        *net.max_input16_size = 0;
+        *net.max_output16_size = 0;
+        if (net.cudnn_half) {
+            *net.max_input16_size = max_inputs;
+            CHECK_CUDA(cudaMalloc((void **)net.input16_gpu, *net.max_input16_size * sizeof(short))); //sizeof(half)
+            *net.max_output16_size = max_outputs;
+            CHECK_CUDA(cudaMalloc((void **)net.output16_gpu, *net.max_output16_size * sizeof(short))); //sizeof(half)
+        }
+        if (workspace_size) {
+            fprintf(stderr, " Allocate additional workspace_size = %1.2f MB \n", (float)workspace_size/1000000);
+            net.workspace = cuda_make_array(0, workspace_size / sizeof(float) + 1);
+        }
+        else {
+            net.workspace = (float*)xcalloc(1, workspace_size);
+        }
+    }
+#else
+        if (workspace_size) {
+            net.workspace = (float*)xcalloc(1, workspace_size);
+        }
+#endif
+
+    LAYER_TYPE lt = net.layers[net.n - 1].type;
+    if ((net.w % 32 != 0 || net.h % 32 != 0) && (lt == YOLO || lt == REGION || lt == DETECTION)) {
+        printf("\n Warning: width=%d and height=%d in cfg-file must be divisible by 32 for default networks Yolo v1/v2/v3!!! \n\n",
+            net.w, net.h);
+    }
+    return net;
+}
+
+
+
+list *read_cfg(char *filename)
+{
+    FILE *file = fopen(filename, "r");
+    if(file == 0) file_error(filename);
+    char *line;
+    int nu = 0;
+    list *sections = make_list();
+    section *current = 0;
+    while((line=fgetl(file)) != 0){
+        ++ nu;
+        strip(line);
+        switch(line[0]){
+            case '[':
+                current = (section*)xmalloc(sizeof(section));
+                list_insert(sections, current);
+                current->options = make_list();
+                current->type = line;
+                break;
+            case '\0':
+            case '#':
+            case ';':
+                free(line);
+                break;
+            default:
+                if(!read_option(line, current->options)){
+                    fprintf(stderr, "Config file error line %d, could parse: %s\n", nu, line);
+                    free(line);
+                }
+                break;
+        }
+    }
+    fclose(file);
+    return sections;
+}
+
+void save_convolutional_weights_binary(layer l, FILE *fp)
+{
+#ifdef GPU
+    if(gpu_index >= 0){
+        pull_convolutional_layer(l);
+    }
+#endif
+    int size = (l.c/l.groups)*l.size*l.size;
+    binarize_weights(l.weights, l.n, size, l.binary_weights);
+    int i, j, k;
+    fwrite(l.biases, sizeof(float), l.n, fp);
+    if (l.batch_normalize){
+        fwrite(l.scales, sizeof(float), l.n, fp);
+        fwrite(l.rolling_mean, sizeof(float), l.n, fp);
+        fwrite(l.rolling_variance, sizeof(float), l.n, fp);
+    }
+    for(i = 0; i < l.n; ++i){
+        float mean = l.binary_weights[i*size];
+        if(mean < 0) mean = -mean;
+        fwrite(&mean, sizeof(float), 1, fp);
+        for(j = 0; j < size/8; ++j){
+            int index = i*size + j*8;
+            unsigned char c = 0;
+            for(k = 0; k < 8; ++k){
+                if (j*8 + k >= size) break;
+                if (l.binary_weights[index + k] > 0) c = (c | 1<<k);
+            }
+            fwrite(&c, sizeof(char), 1, fp);
+        }
+    }
+}
+
+void save_shortcut_weights(layer l, FILE *fp)
+{
+#ifdef GPU
+    if (gpu_index >= 0) {
+        pull_shortcut_layer(l);
+        printf("\n pull_shortcut_layer \n");
+    }
+#endif
+    int i;
+    //if(l.weight_updates) for (i = 0; i < l.nweights; ++i) printf(" %f, ", l.weight_updates[i]);
+    //printf(" l.nweights = %d - update \n", l.nweights);
+    for (i = 0; i < l.nweights; ++i) printf(" %f, ", l.weights[i]);
+    printf(" l.nweights = %d \n\n", l.nweights);
+
+    int num = l.nweights;
+    fwrite(l.weights, sizeof(float), num, fp);
+}
+
+void save_implicit_weights(layer l, FILE *fp)
+{
+#ifdef GPU
+    if (gpu_index >= 0) {
+        pull_implicit_layer(l);
+        //printf("\n pull_implicit_layer \n");
+    }
+#endif
+    int i;
+    //if(l.weight_updates) for (i = 0; i < l.nweights; ++i) printf(" %f, ", l.weight_updates[i]);
+    //printf(" l.nweights = %d - update \n", l.nweights);
+    //for (i = 0; i < l.nweights; ++i) printf(" %f, ", l.weights[i]);
+    //printf(" l.nweights = %d \n\n", l.nweights);
+
+    int num = l.nweights;
+    fwrite(l.weights, sizeof(float), num, fp);
+}
+
+void save_convolutional_weights(layer l, FILE *fp)
+{
+    if(l.binary){
+        //save_convolutional_weights_binary(l, fp);
+        //return;
+    }
+#ifdef GPU
+    if(gpu_index >= 0){
+        pull_convolutional_layer(l);
+    }
+#endif
+    int num = l.nweights;
+    fwrite(l.biases, sizeof(float), l.n, fp);
+    if (l.batch_normalize){
+        fwrite(l.scales, sizeof(float), l.n, fp);
+        fwrite(l.rolling_mean, sizeof(float), l.n, fp);
+        fwrite(l.rolling_variance, sizeof(float), l.n, fp);
+    }
+    fwrite(l.weights, sizeof(float), num, fp);
+    //if(l.adam){
+    //    fwrite(l.m, sizeof(float), num, fp);
+    //    fwrite(l.v, sizeof(float), num, fp);
+    //}
+}
+
+void save_convolutional_weights_ema(layer l, FILE *fp)
+{
+    if (l.binary) {
+        //save_convolutional_weights_binary(l, fp);
+        //return;
+    }
+#ifdef GPU
+    if (gpu_index >= 0) {
+        pull_convolutional_layer(l);
+    }
+#endif
+    int num = l.nweights;
+    fwrite(l.biases_ema, sizeof(float), l.n, fp);
+    if (l.batch_normalize) {
+        fwrite(l.scales_ema, sizeof(float), l.n, fp);
+        fwrite(l.rolling_mean, sizeof(float), l.n, fp);
+        fwrite(l.rolling_variance, sizeof(float), l.n, fp);
+    }
+    fwrite(l.weights_ema, sizeof(float), num, fp);
+    //if(l.adam){
+    //    fwrite(l.m, sizeof(float), num, fp);
+    //    fwrite(l.v, sizeof(float), num, fp);
+    //}
+}
+
+void save_batchnorm_weights(layer l, FILE *fp)
+{
+#ifdef GPU
+    if(gpu_index >= 0){
+        pull_batchnorm_layer(l);
+    }
+#endif
+    fwrite(l.biases, sizeof(float), l.c, fp);
+    fwrite(l.scales, sizeof(float), l.c, fp);
+    fwrite(l.rolling_mean, sizeof(float), l.c, fp);
+    fwrite(l.rolling_variance, sizeof(float), l.c, fp);
+}
+
+void save_connected_weights(layer l, FILE *fp)
+{
+#ifdef GPU
+    if(gpu_index >= 0){
+        pull_connected_layer(l);
+    }
+#endif
+    fwrite(l.biases, sizeof(float), l.outputs, fp);
+    fwrite(l.weights, sizeof(float), l.outputs*l.inputs, fp);
+    if (l.batch_normalize){
+        fwrite(l.scales, sizeof(float), l.outputs, fp);
+        fwrite(l.rolling_mean, sizeof(float), l.outputs, fp);
+        fwrite(l.rolling_variance, sizeof(float), l.outputs, fp);
+    }
+}
+
+void save_weights_upto(network net, char *filename, int cutoff, int save_ema)
+{
+#ifdef GPU
+    if(net.gpu_index >= 0){
+        cuda_set_device(net.gpu_index);
+    }
+#endif
+    fprintf(stderr, "Saving weights to %s\n", filename);
+    FILE *fp = fopen(filename, "wb");
+    if(!fp) file_error(filename);
+
+    int major = MAJOR_VERSION;
+    int minor = MINOR_VERSION;
+    int revision = PATCH_VERSION;
+    fwrite(&major, sizeof(int), 1, fp);
+    fwrite(&minor, sizeof(int), 1, fp);
+    fwrite(&revision, sizeof(int), 1, fp);
+    (*net.seen) = get_current_iteration(net) * net.batch * net.subdivisions; // remove this line, when you will save to weights-file both: seen & cur_iteration
+    fwrite(net.seen, sizeof(uint64_t), 1, fp);
+
+    int i;
+    for(i = 0; i < net.n && i < cutoff; ++i){
+        layer l = net.layers[i];
+        if (l.type == CONVOLUTIONAL && l.share_layer == NULL) {
+            if (save_ema) {
+                save_convolutional_weights_ema(l, fp);
+            }
+            else {
+                save_convolutional_weights(l, fp);
+            }
+        } if (l.type == SHORTCUT && l.nweights > 0) {
+            save_shortcut_weights(l, fp);
+        } if (l.type == IMPLICIT) {
+            save_implicit_weights(l, fp);
+        } if(l.type == CONNECTED){
+            save_connected_weights(l, fp);
+        } if(l.type == BATCHNORM){
+            save_batchnorm_weights(l, fp);
+        } if(l.type == RNN){
+            save_connected_weights(*(l.input_layer), fp);
+            save_connected_weights(*(l.self_layer), fp);
+            save_connected_weights(*(l.output_layer), fp);
+        } if(l.type == GRU){
+            save_connected_weights(*(l.input_z_layer), fp);
+            save_connected_weights(*(l.input_r_layer), fp);
+            save_connected_weights(*(l.input_h_layer), fp);
+            save_connected_weights(*(l.state_z_layer), fp);
+            save_connected_weights(*(l.state_r_layer), fp);
+            save_connected_weights(*(l.state_h_layer), fp);
+        } if(l.type == LSTM){
+            save_connected_weights(*(l.wf), fp);
+            save_connected_weights(*(l.wi), fp);
+            save_connected_weights(*(l.wg), fp);
+            save_connected_weights(*(l.wo), fp);
+            save_connected_weights(*(l.uf), fp);
+            save_connected_weights(*(l.ui), fp);
+            save_connected_weights(*(l.ug), fp);
+            save_connected_weights(*(l.uo), fp);
+        } if (l.type == CONV_LSTM) {
+            if (l.peephole) {
+                save_convolutional_weights(*(l.vf), fp);
+                save_convolutional_weights(*(l.vi), fp);
+                save_convolutional_weights(*(l.vo), fp);
+            }
+            save_convolutional_weights(*(l.wf), fp);
+            if (!l.bottleneck) {
+                save_convolutional_weights(*(l.wi), fp);
+                save_convolutional_weights(*(l.wg), fp);
+                save_convolutional_weights(*(l.wo), fp);
+            }
+            save_convolutional_weights(*(l.uf), fp);
+            save_convolutional_weights(*(l.ui), fp);
+            save_convolutional_weights(*(l.ug), fp);
+            save_convolutional_weights(*(l.uo), fp);
+        } if(l.type == CRNN){
+            save_convolutional_weights(*(l.input_layer), fp);
+            save_convolutional_weights(*(l.self_layer), fp);
+            save_convolutional_weights(*(l.output_layer), fp);
+        } if(l.type == LOCAL){
+#ifdef GPU
+            if(gpu_index >= 0){
+                pull_local_layer(l);
+            }
+#endif
+            int locations = l.out_w*l.out_h;
+            int size = l.size*l.size*l.c*l.n*locations;
+            fwrite(l.biases, sizeof(float), l.outputs, fp);
+            fwrite(l.weights, sizeof(float), size, fp);
+        }
+    }
+    fclose(fp);
+}
+void save_weights(network net, char *filename)
+{
+    save_weights_upto(net, filename, net.n, 0);
+}
+
+void transpose_matrix(float *a, int rows, int cols)
+{
+    float* transpose = (float*)xcalloc(rows * cols, sizeof(float));
+    int x, y;
+    for(x = 0; x < rows; ++x){
+        for(y = 0; y < cols; ++y){
+            transpose[y*rows + x] = a[x*cols + y];
+        }
+    }
+    memcpy(a, transpose, rows*cols*sizeof(float));
+    free(transpose);
+}
+
+void load_connected_weights(layer l, FILE *fp, int transpose)
+{
+    fread(l.biases, sizeof(float), l.outputs, fp);
+    fread(l.weights, sizeof(float), l.outputs*l.inputs, fp);
+    if(transpose){
+        transpose_matrix(l.weights, l.inputs, l.outputs);
+    }
+    //printf("Biases: %f mean %f variance\n", mean_array(l.biases, l.outputs), variance_array(l.biases, l.outputs));
+    //printf("Weights: %f mean %f variance\n", mean_array(l.weights, l.outputs*l.inputs), variance_array(l.weights, l.outputs*l.inputs));
+    if (l.batch_normalize && (!l.dontloadscales)){
+        fread(l.scales, sizeof(float), l.outputs, fp);
+        fread(l.rolling_mean, sizeof(float), l.outputs, fp);
+        fread(l.rolling_variance, sizeof(float), l.outputs, fp);
+        //printf("Scales: %f mean %f variance\n", mean_array(l.scales, l.outputs), variance_array(l.scales, l.outputs));
+        //printf("rolling_mean: %f mean %f variance\n", mean_array(l.rolling_mean, l.outputs), variance_array(l.rolling_mean, l.outputs));
+        //printf("rolling_variance: %f mean %f variance\n", mean_array(l.rolling_variance, l.outputs), variance_array(l.rolling_variance, l.outputs));
+    }
+#ifdef GPU
+    if(gpu_index >= 0){
+        push_connected_layer(l);
+    }
+#endif
+}
+
+void load_batchnorm_weights(layer l, FILE *fp)
+{
+    fread(l.biases, sizeof(float), l.c, fp);
+    fread(l.scales, sizeof(float), l.c, fp);
+    fread(l.rolling_mean, sizeof(float), l.c, fp);
+    fread(l.rolling_variance, sizeof(float), l.c, fp);
+#ifdef GPU
+    if(gpu_index >= 0){
+        push_batchnorm_layer(l);
+    }
+#endif
+}
+
+void load_convolutional_weights_binary(layer l, FILE *fp)
+{
+    fread(l.biases, sizeof(float), l.n, fp);
+    if (l.batch_normalize && (!l.dontloadscales)){
+        fread(l.scales, sizeof(float), l.n, fp);
+        fread(l.rolling_mean, sizeof(float), l.n, fp);
+        fread(l.rolling_variance, sizeof(float), l.n, fp);
+    }
+    int size = (l.c / l.groups)*l.size*l.size;
+    int i, j, k;
+    for(i = 0; i < l.n; ++i){
+        float mean = 0;
+        fread(&mean, sizeof(float), 1, fp);
+        for(j = 0; j < size/8; ++j){
+            int index = i*size + j*8;
+            unsigned char c = 0;
+            fread(&c, sizeof(char), 1, fp);
+            for(k = 0; k < 8; ++k){
+                if (j*8 + k >= size) break;
+                l.weights[index + k] = (c & 1<<k) ? mean : -mean;
+            }
+        }
+    }
+#ifdef GPU
+    if(gpu_index >= 0){
+        push_convolutional_layer(l);
+    }
+#endif
+}
+
+void load_convolutional_weights(layer l, FILE *fp)
+{
+    if(l.binary){
+        //load_convolutional_weights_binary(l, fp);
+        //return;
+    }
+    int num = l.nweights;
+    int read_bytes;
+    read_bytes = fread(l.biases, sizeof(float), l.n, fp);
+    if (read_bytes > 0 && read_bytes < l.n) printf("\n Warning: Unexpected end of wights-file! l.biases - l.index = %d \n", l.index);
+    //fread(l.weights, sizeof(float), num, fp); // as in connected layer
+    if (l.batch_normalize && (!l.dontloadscales)){
+        read_bytes = fread(l.scales, sizeof(float), l.n, fp);
+        if (read_bytes > 0 && read_bytes < l.n) printf("\n Warning: Unexpected end of wights-file! l.scales - l.index = %d \n", l.index);
+        read_bytes = fread(l.rolling_mean, sizeof(float), l.n, fp);
+        if (read_bytes > 0 && read_bytes < l.n) printf("\n Warning: Unexpected end of wights-file! l.rolling_mean - l.index = %d \n", l.index);
+        read_bytes = fread(l.rolling_variance, sizeof(float), l.n, fp);
+        if (read_bytes > 0 && read_bytes < l.n) printf("\n Warning: Unexpected end of wights-file! l.rolling_variance - l.index = %d \n", l.index);
+        if(0){
+            int i;
+            for(i = 0; i < l.n; ++i){
+                printf("%g, ", l.rolling_mean[i]);
+            }
+            printf("\n");
+            for(i = 0; i < l.n; ++i){
+                printf("%g, ", l.rolling_variance[i]);
+            }
+            printf("\n");
+        }
+        if(0){
+            fill_cpu(l.n, 0, l.rolling_mean, 1);
+            fill_cpu(l.n, 0, l.rolling_variance, 1);
+        }
+    }
+    read_bytes = fread(l.weights, sizeof(float), num, fp);
+    if (read_bytes > 0 && read_bytes < l.n) printf("\n Warning: Unexpected end of wights-file! l.weights - l.index = %d \n", l.index);
+    //if(l.adam){
+    //    fread(l.m, sizeof(float), num, fp);
+    //    fread(l.v, sizeof(float), num, fp);
+    //}
+    //if(l.c == 3) scal_cpu(num, 1./256, l.weights, 1);
+    if (l.flipped) {
+        transpose_matrix(l.weights, (l.c/l.groups)*l.size*l.size, l.n);
+    }
+    //if (l.binary) binarize_weights(l.weights, l.n, (l.c/l.groups)*l.size*l.size, l.weights);
+#ifdef GPU
+    if(gpu_index >= 0){
+        push_convolutional_layer(l);
+    }
+#endif
+}
+
+void load_shortcut_weights(layer l, FILE *fp)
+{
+    int num = l.nweights;
+    int read_bytes;
+    read_bytes = fread(l.weights, sizeof(float), num, fp);
+    if (read_bytes > 0 && read_bytes < num) printf("\n Warning: Unexpected end of wights-file! l.weights - l.index = %d \n", l.index);
+    //for (int i = 0; i < l.nweights; ++i) printf(" %f, ", l.weights[i]);
+    //printf(" read_bytes = %d \n\n", read_bytes);
+#ifdef GPU
+    if (gpu_index >= 0) {
+        push_shortcut_layer(l);
+    }
+#endif
+}
+
+void load_implicit_weights(layer l, FILE *fp)
+{
+    int num = l.nweights;
+    int read_bytes;
+    read_bytes = fread(l.weights, sizeof(float), num, fp);
+    if (read_bytes > 0 && read_bytes < num) printf("\n Warning: Unexpected end of wights-file! l.weights - l.index = %d \n", l.index);
+    //for (int i = 0; i < l.nweights; ++i) printf(" %f, ", l.weights[i]);
+    //printf(" read_bytes = %d \n\n", read_bytes);
+#ifdef GPU
+    if (gpu_index >= 0) {
+        push_implicit_layer(l);
+    }
+#endif
+}
+
+void load_weights_upto(network *net, char *filename, int cutoff)
+{
+#ifdef GPU
+    if(net->gpu_index >= 0){
+        cuda_set_device(net->gpu_index);
+    }
+#endif
+    fprintf(stderr, "Loading weights from %s...", filename);
+    fflush(stdout);
+    FILE *fp = fopen(filename, "rb");
+    if(!fp) file_error(filename);
+
+    int major;
+    int minor;
+    int revision;
+    fread(&major, sizeof(int), 1, fp);
+    fread(&minor, sizeof(int), 1, fp);
+    fread(&revision, sizeof(int), 1, fp);
+    if ((major * 10 + minor) >= 2) {
+        printf("\n seen 64");
+        uint64_t iseen = 0;
+        fread(&iseen, sizeof(uint64_t), 1, fp);
+        *net->seen = iseen;
+    }
+    else {
+        printf("\n seen 32");
+        uint32_t iseen = 0;
+        fread(&iseen, sizeof(uint32_t), 1, fp);
+        *net->seen = iseen;
+    }
+    *net->cur_iteration = get_current_batch(*net);
+    printf(", trained: %.0f K-images (%.0f Kilo-batches_64) \n", (float)(*net->seen / 1000), (float)(*net->seen / 64000));
+    int transpose = (major > 1000) || (minor > 1000);
+
+    int i;
+    for(i = 0; i < net->n && i < cutoff; ++i){
+        layer l = net->layers[i];
+        if (l.dontload) continue;
+        if(l.type == CONVOLUTIONAL && l.share_layer == NULL){
+            load_convolutional_weights(l, fp);
+        }
+        if (l.type == SHORTCUT && l.nweights > 0) {
+            load_shortcut_weights(l, fp);
+        }
+        if (l.type == IMPLICIT) {
+            load_implicit_weights(l, fp);
+        }
+        if(l.type == CONNECTED){
+            load_connected_weights(l, fp, transpose);
+        }
+        if(l.type == BATCHNORM){
+            load_batchnorm_weights(l, fp);
+        }
+        if(l.type == CRNN){
+            load_convolutional_weights(*(l.input_layer), fp);
+            load_convolutional_weights(*(l.self_layer), fp);
+            load_convolutional_weights(*(l.output_layer), fp);
+        }
+        if(l.type == RNN){
+            load_connected_weights(*(l.input_layer), fp, transpose);
+            load_connected_weights(*(l.self_layer), fp, transpose);
+            load_connected_weights(*(l.output_layer), fp, transpose);
+        }
+        if(l.type == GRU){
+            load_connected_weights(*(l.input_z_layer), fp, transpose);
+            load_connected_weights(*(l.input_r_layer), fp, transpose);
+            load_connected_weights(*(l.input_h_layer), fp, transpose);
+            load_connected_weights(*(l.state_z_layer), fp, transpose);
+            load_connected_weights(*(l.state_r_layer), fp, transpose);
+            load_connected_weights(*(l.state_h_layer), fp, transpose);
+        }
+        if(l.type == LSTM){
+            load_connected_weights(*(l.wf), fp, transpose);
+            load_connected_weights(*(l.wi), fp, transpose);
+            load_connected_weights(*(l.wg), fp, transpose);
+            load_connected_weights(*(l.wo), fp, transpose);
+            load_connected_weights(*(l.uf), fp, transpose);
+            load_connected_weights(*(l.ui), fp, transpose);
+            load_connected_weights(*(l.ug), fp, transpose);
+            load_connected_weights(*(l.uo), fp, transpose);
+        }
+        if (l.type == CONV_LSTM) {
+            if (l.peephole) {
+                load_convolutional_weights(*(l.vf), fp);
+                load_convolutional_weights(*(l.vi), fp);
+                load_convolutional_weights(*(l.vo), fp);
+            }
+            load_convolutional_weights(*(l.wf), fp);
+            if (!l.bottleneck) {
+                load_convolutional_weights(*(l.wi), fp);
+                load_convolutional_weights(*(l.wg), fp);
+                load_convolutional_weights(*(l.wo), fp);
+            }
+            load_convolutional_weights(*(l.uf), fp);
+            load_convolutional_weights(*(l.ui), fp);
+            load_convolutional_weights(*(l.ug), fp);
+            load_convolutional_weights(*(l.uo), fp);
+        }
+        if(l.type == LOCAL){
+            int locations = l.out_w*l.out_h;
+            int size = l.size*l.size*l.c*l.n*locations;
+            fread(l.biases, sizeof(float), l.outputs, fp);
+            fread(l.weights, sizeof(float), size, fp);
+#ifdef GPU
+            if(gpu_index >= 0){
+                push_local_layer(l);
+            }
+#endif
+        }
+        if (feof(fp)) break;
+    }
+    fprintf(stderr, "Done! Loaded %d layers from weights-file \n", i);
+    fclose(fp);
+}
+
+void load_weights(network *net, char *filename)
+{
+    load_weights_upto(net, filename, net->n);
+}
+
+// load network & force - set batch size
+network *load_network_custom(char *cfg, char *weights, int clear, int batch)
+{
+    printf(" Try to load cfg: %s, weights: %s, clear = %d \n", cfg, weights, clear);
+    network* net = (network*)xcalloc(1, sizeof(network));
+    *net = parse_network_cfg_custom(cfg, batch, 1);
+    if (weights && weights[0] != 0) {
+        printf(" Try to load weights: %s \n", weights);
+        load_weights(net, weights);
+    }
+    fuse_conv_batchnorm(*net);
+    if (clear) {
+        (*net->seen) = 0;
+        (*net->cur_iteration) = 0;
+    }
+    return net;
+}
+
+// load network & get batch size from cfg-file
+network *load_network(char *cfg, char *weights, int clear)
+{
+    printf(" Try to load cfg: %s, clear = %d \n", cfg, clear);
+    network* net = (network*)xcalloc(1, sizeof(network));
+    *net = parse_network_cfg(cfg);
+    if (weights && weights[0] != 0) {
+        printf(" Try to load weights: %s \n", weights);
+        load_weights(net, weights);
+    }
+    if (clear) {
+        (*net->seen) = 0;
+        (*net->cur_iteration) = 0;
+    }
+    return net;
+}

--
Gitblit v1.8.0