From 168af40fe9a3cc81c6ee16b3e81f154780c36bdb Mon Sep 17 00:00:00 2001
From: Scheaven <xuepengqiang>
Date: 星期四, 03 六月 2021 15:03:27 +0800
Subject: [PATCH] up new v4

---
 lib/detecter_tools/darknet/im2col.h |  176 +++++++++++++++++++++++++++++-----------------------------
 1 files changed, 88 insertions(+), 88 deletions(-)

diff --git a/lib/detecter_tools/darknet/im2col.h b/lib/detecter_tools/darknet/im2col.h
index c696095..65dd6ec 100644
--- a/lib/detecter_tools/darknet/im2col.h
+++ b/lib/detecter_tools/darknet/im2col.h
@@ -1,88 +1,88 @@
-#ifndef IM2COL_H
-#define IM2COL_H
-
-#include <stddef.h>
-#include <stdint.h>
-#include "darknet.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-void im2col_cpu(float* data_im,
-        int channels, int height, int width,
-        int ksize, int stride, int pad, float* data_col);
-float im2col_get_pixel(float* im, int height, int width, int channels,
-    int row, int col, int channel, int pad);
-
-void im2col_cpu_ext(const float* data_im, const int channels,
-    const int height, const int width, const int kernel_h, const int kernel_w,
-    const int pad_h, const int pad_w,
-    const int stride_h, const int stride_w,
-    const int dilation_h, const int dilation_w,
-    float* data_col);
-
-#ifdef GPU
-
-void im2col_ongpu(float *im,
-         int channels, int height, int width,
-         int ksize, int stride, int pad,float *data_col);
-
-void im2col_gpu_ext(const float* data_im, const int channels,
-    const int height, const int width, const int kernel_h, const int kernel_w,
-    const int pad_h, const int pad_w,
-    const int stride_h, const int stride_w,
-    const int dilation_h, const int dilation_w,
-    float* data_col);
-
-void im2col_align_ongpu(float *im,
-    int channels, int height, int width,
-    int ksize, int stride, int pad, float *data_col, int bit_align);
-
-void im2col_align_bin_ongpu(float *im,
-    int channels, int height, int width,
-    int ksize, int stride, int pad, float *data_col, int bit_align);
-
-void float_to_bit_gpu(float *src, unsigned char *dst, size_t size);
-
-void transpose_bin_gpu(unsigned char *A, unsigned char *B, const int n, const int m,
-    const int lda, const int ldb, const int block_size);
-
-void transpose_uint32_gpu(uint32_t *src, uint32_t *dst, int src_h, int src_w, int src_align, int dst_align);
-
-void transpose_uint32_gpu_2(uint32_t *src, uint32_t *dst, int src_h, int src_w, int src_align, int dst_align);
-
-void repack_input_gpu(float *input, float *re_packed_input, int w, int h, int c);
-
-void repack_input_gpu_2(float *input, float *re_packed_input, int w, int h, int c);
-
-void repack_input_gpu_bin(float *input, uint32_t *re_packed_input_bin, int w, int h, int c);
-
-void fill_int8_gpu(unsigned char *src, unsigned char val, size_t size);
-
-// shared_memory + partial coalescing = GOOD
-void gemm_nn_custom_bin_mean_transposed_gpu(int M, int N, int K,
-    unsigned char *A, int lda,
-    unsigned char *B, int ldb,
-    float *C, int ldc, float *mean_arr, float *bias, int leaky_activation,
-    float *shortcut_in_gpu, float *shortcut_out_gpu);
-
-// sequentially - BAD
-void gemm_nn_custom_bin_mean_transposed_sequentially_gpu(int M, int N, int K,
-    unsigned char *A, int lda,
-    unsigned char *B, int ldb,
-    float *C, int ldc, float *mean_arr);
-
-void convolve_gpu(float *input, float *weights, float *output, int in_w, int in_h, int in_c, int n, int size, int pad);
-
-void convolve_bin_gpu(float *input, float *weights, float *output, int in_w, int in_h, int in_c, int n, int size, int pad,
-    int new_lda, float *mean_arr_gpu);
-
-//void convolve_bin_cpu(float *input, float *weights, float *output, int in_w, int in_h, int in_c, int n, int size, int pad, int new_lda, float *mean_arr_gpu);
-
-//void convolve_cpu(float *input, float *weights, float *output, int in_w, int in_h, int in_c, int n, int size, int pad);
-
-#endif
-#ifdef __cplusplus
-}
-#endif
-#endif
+#ifndef IM2COL_H
+#define IM2COL_H
+
+#include <stddef.h>
+#include <stdint.h>
+#include "darknet.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+void im2col_cpu(float* data_im,
+        int channels, int height, int width,
+        int ksize, int stride, int pad, float* data_col);
+float im2col_get_pixel(float* im, int height, int width, int channels,
+    int row, int col, int channel, int pad);
+
+void im2col_cpu_ext(const float* data_im, const int channels,
+    const int height, const int width, const int kernel_h, const int kernel_w,
+    const int pad_h, const int pad_w,
+    const int stride_h, const int stride_w,
+    const int dilation_h, const int dilation_w,
+    float* data_col);
+
+#ifdef GPU
+
+void im2col_ongpu(float *im,
+         int channels, int height, int width,
+         int ksize, int stride, int pad,float *data_col);
+
+void im2col_gpu_ext(const float* data_im, const int channels,
+    const int height, const int width, const int kernel_h, const int kernel_w,
+    const int pad_h, const int pad_w,
+    const int stride_h, const int stride_w,
+    const int dilation_h, const int dilation_w,
+    float* data_col);
+
+void im2col_align_ongpu(float *im,
+    int channels, int height, int width,
+    int ksize, int stride, int pad, float *data_col, int bit_align);
+
+void im2col_align_bin_ongpu(float *im,
+    int channels, int height, int width,
+    int ksize, int stride, int pad, float *data_col, int bit_align);
+
+void float_to_bit_gpu(float *src, unsigned char *dst, size_t size);
+
+void transpose_bin_gpu(unsigned char *A, unsigned char *B, const int n, const int m,
+    const int lda, const int ldb, const int block_size);
+
+void transpose_uint32_gpu(uint32_t *src, uint32_t *dst, int src_h, int src_w, int src_align, int dst_align);
+
+void transpose_uint32_gpu_2(uint32_t *src, uint32_t *dst, int src_h, int src_w, int src_align, int dst_align);
+
+void repack_input_gpu(float *input, float *re_packed_input, int w, int h, int c);
+
+void repack_input_gpu_2(float *input, float *re_packed_input, int w, int h, int c);
+
+void repack_input_gpu_bin(float *input, uint32_t *re_packed_input_bin, int w, int h, int c);
+
+void fill_int8_gpu(unsigned char *src, unsigned char val, size_t size);
+
+// shared_memory + partial coalescing = GOOD
+void gemm_nn_custom_bin_mean_transposed_gpu(int M, int N, int K,
+    unsigned char *A, int lda,
+    unsigned char *B, int ldb,
+    float *C, int ldc, float *mean_arr, float *bias, int leaky_activation,
+    float *shortcut_in_gpu, float *shortcut_out_gpu);
+
+// sequentially - BAD
+void gemm_nn_custom_bin_mean_transposed_sequentially_gpu(int M, int N, int K,
+    unsigned char *A, int lda,
+    unsigned char *B, int ldb,
+    float *C, int ldc, float *mean_arr);
+
+void convolve_gpu(float *input, float *weights, float *output, int in_w, int in_h, int in_c, int n, int size, int pad);
+
+void convolve_bin_gpu(float *input, float *weights, float *output, int in_w, int in_h, int in_c, int n, int size, int pad,
+    int new_lda, float *mean_arr_gpu);
+
+//void convolve_bin_cpu(float *input, float *weights, float *output, int in_w, int in_h, int in_c, int n, int size, int pad, int new_lda, float *mean_arr_gpu);
+
+//void convolve_cpu(float *input, float *weights, float *output, int in_w, int in_h, int in_c, int n, int size, int pad);
+
+#endif
+#ifdef __cplusplus
+}
+#endif
+#endif

--
Gitblit v1.8.0