From 168af40fe9a3cc81c6ee16b3e81f154780c36bdb Mon Sep 17 00:00:00 2001 From: Scheaven <xuepengqiang> Date: 星期四, 03 六月 2021 15:03:27 +0800 Subject: [PATCH] up new v4 --- lib/detecter_tools/darknet/cpu_gemm.c | 192 ++++++++++++++++++++++++------------------------ 1 files changed, 96 insertions(+), 96 deletions(-) diff --git a/lib/detecter_tools/darknet/cpu_gemm.c b/lib/detecter_tools/darknet/cpu_gemm.c index 1919907..ca1a8e4 100644 --- a/lib/detecter_tools/darknet/cpu_gemm.c +++ b/lib/detecter_tools/darknet/cpu_gemm.c @@ -1,96 +1,96 @@ -//#include "mini_blas.h" -#ifdef __cplusplus -#define PUT_IN_REGISTER -#else -#define PUT_IN_REGISTER register -#endif - -void cpu_gemm_nn(int TA, int TB, int M, int N, int K, float ALPHA, - float *A, int lda, - float *B, int ldb, - float BETA, - float *C, int ldc) -{ - int i,j,k; - for(i = 0; i < M; ++i){ - for(k = 0; k < K; ++k){ - PUT_IN_REGISTER float A_PART = ALPHA * A[i * lda + k]; - for(j = 0; j < N; ++j){ - C[i*ldc+j] += A_PART*B[k*ldb+j]; - } - } - } -} - -void cpu_gemm_nt(int TA, int TB, int M, int N, int K, float ALPHA, - float *A, int lda, - float *B, int ldb, - float BETA, - float *C, int ldc) -{ - int i,j,k; - for(i = 0; i < M; ++i){ - for(j = 0; j < N; ++j){ - PUT_IN_REGISTER float sum = 0; - for(k = 0; k < K; ++k){ - sum += ALPHA*A[i*lda+k]*B[k+j*ldb]; - } - C[i*ldc+j] += sum; - } - } -} - -void cpu_gemm_tn(int TA, int TB, int M, int N, int K, float ALPHA, - float *A, int lda, - float *B, int ldb, - float BETA, - float *C, int ldc) -{ - int i,j,k; - for(i = 0; i < M; ++i){ - for(k = 0; k < K; ++k){ - PUT_IN_REGISTER float A_PART = ALPHA * A[k * lda + i]; - for(j = 0; j < N; ++j){ - C[i*ldc+j] += A_PART*B[k*ldb+j]; - } - } - } -} -void cpu_gemm_tt(int TA, int TB, int M, int N, int K, float ALPHA, - float *A, int lda, - float *B, int ldb, - float BETA, - float *C, int ldc) -{ - int i,j,k; - for(i = 0; i < M; ++i){ - for(j = 0; j < N; ++j){ - for(k = 0; k < K; ++k){ - C[i*ldc+j] += ALPHA*A[i+k*lda]*B[k+j*ldb]; - } - } - } -} - - -void cpu_gemm(int TA, int TB, int M, int N, int K, float ALPHA, - float *A, int lda, - float *B, int ldb, - float BETA, - float *C, int ldc) -{ - int i, j; - for(i = 0; i < M; ++i){ - for(j = 0; j < N; ++j){ - C[i*ldc + j] *= BETA; - } - } - if(!TA && !TB) - cpu_gemm_nn( TA, TB, M, N, K, ALPHA,A,lda, B, ldb,BETA,C,ldc); - else if(TA && !TB) - cpu_gemm_tn( TA, TB, M, N, K, ALPHA,A,lda, B, ldb,BETA,C,ldc); - else if(!TA && TB) - cpu_gemm_nt( TA, TB, M, N, K, ALPHA,A,lda, B, ldb,BETA,C,ldc); - else - cpu_gemm_tt( TA, TB, M, N, K, ALPHA,A,lda, B, ldb,BETA,C,ldc); -} +//#include "mini_blas.h" +#ifdef __cplusplus +#define PUT_IN_REGISTER +#else +#define PUT_IN_REGISTER register +#endif + +void cpu_gemm_nn(int TA, int TB, int M, int N, int K, float ALPHA, + float *A, int lda, + float *B, int ldb, + float BETA, + float *C, int ldc) +{ + int i,j,k; + for(i = 0; i < M; ++i){ + for(k = 0; k < K; ++k){ + PUT_IN_REGISTER float A_PART = ALPHA * A[i * lda + k]; + for(j = 0; j < N; ++j){ + C[i*ldc+j] += A_PART*B[k*ldb+j]; + } + } + } +} + +void cpu_gemm_nt(int TA, int TB, int M, int N, int K, float ALPHA, + float *A, int lda, + float *B, int ldb, + float BETA, + float *C, int ldc) +{ + int i,j,k; + for(i = 0; i < M; ++i){ + for(j = 0; j < N; ++j){ + PUT_IN_REGISTER float sum = 0; + for(k = 0; k < K; ++k){ + sum += ALPHA*A[i*lda+k]*B[k+j*ldb]; + } + C[i*ldc+j] += sum; + } + } +} + +void cpu_gemm_tn(int TA, int TB, int M, int N, int K, float ALPHA, + float *A, int lda, + float *B, int ldb, + float BETA, + float *C, int ldc) +{ + int i,j,k; + for(i = 0; i < M; ++i){ + for(k = 0; k < K; ++k){ + PUT_IN_REGISTER float A_PART = ALPHA * A[k * lda + i]; + for(j = 0; j < N; ++j){ + C[i*ldc+j] += A_PART*B[k*ldb+j]; + } + } + } +} +void cpu_gemm_tt(int TA, int TB, int M, int N, int K, float ALPHA, + float *A, int lda, + float *B, int ldb, + float BETA, + float *C, int ldc) +{ + int i,j,k; + for(i = 0; i < M; ++i){ + for(j = 0; j < N; ++j){ + for(k = 0; k < K; ++k){ + C[i*ldc+j] += ALPHA*A[i+k*lda]*B[k+j*ldb]; + } + } + } +} + + +void cpu_gemm(int TA, int TB, int M, int N, int K, float ALPHA, + float *A, int lda, + float *B, int ldb, + float BETA, + float *C, int ldc) +{ + int i, j; + for(i = 0; i < M; ++i){ + for(j = 0; j < N; ++j){ + C[i*ldc + j] *= BETA; + } + } + if(!TA && !TB) + cpu_gemm_nn( TA, TB, M, N, K, ALPHA,A,lda, B, ldb,BETA,C,ldc); + else if(TA && !TB) + cpu_gemm_tn( TA, TB, M, N, K, ALPHA,A,lda, B, ldb,BETA,C,ldc); + else if(!TA && TB) + cpu_gemm_nt( TA, TB, M, N, K, ALPHA,A,lda, B, ldb,BETA,C,ldc); + else + cpu_gemm_tt( TA, TB, M, N, K, ALPHA,A,lda, B, ldb,BETA,C,ldc); +} -- Gitblit v1.8.0