| | |
| | | #ifndef DARKCUDA_H
|
| | | #define DARKCUDA_H
|
| | | #include "darknet.h"
|
| | |
|
| | | #ifdef __cplusplus
|
| | | extern "C" {
|
| | | #endif
|
| | |
|
| | |
|
| | | extern int cuda_debug_sync;
|
| | | extern int gpu_index;
|
| | | #ifdef __cplusplus
|
| | | }
|
| | | #endif // __cplusplus
|
| | |
|
| | | #ifdef GPU
|
| | |
|
| | | #define BLOCK 512
|
| | | #define FULL_MASK 0xffffffff
|
| | | #define WARP_SIZE 32
|
| | | #define BLOCK_TRANSPOSE32 256
|
| | |
|
| | | #include <cuda.h>
|
| | | #include <cuda_runtime.h>
|
| | | #include <curand.h>
|
| | | #include <cublas_v2.h>
|
| | | #include <cuda_runtime_api.h>
|
| | | //#include <driver_types.h>
|
| | |
|
| | | #ifdef CUDNN
|
| | | #include <cudnn.h>
|
| | | #endif // CUDNN
|
| | |
|
| | | #ifndef __DATE__
|
| | | #define __DATE__
|
| | | #endif
|
| | |
|
| | | #ifndef __TIME__
|
| | | #define __TIME__
|
| | | #endif
|
| | |
|
| | | #ifndef __FUNCTION__
|
| | | #define __FUNCTION__
|
| | | #endif
|
| | |
|
| | | #ifndef __LINE__
|
| | | #define __LINE__ 0
|
| | | #endif
|
| | |
|
| | | #ifndef __FILE__
|
| | | #define __FILE__
|
| | | #endif
|
| | |
|
| | | #ifdef __cplusplus
|
| | | extern "C" {
|
| | | #endif // __cplusplus
|
| | | void check_error(cudaError_t status);
|
| | | void check_error_extended(cudaError_t status, const char *file, int line, const char *date_time);
|
| | | #define CHECK_CUDA(X) check_error_extended(X, __FILE__ " : " __FUNCTION__, __LINE__, __DATE__ " - " __TIME__ );
|
| | |
|
| | | cublasHandle_t blas_handle();
|
| | | void free_pinned_memory();
|
| | | void pre_allocate_pinned_memory(size_t size);
|
| | | float *cuda_make_array_pinned_preallocated(float *x, size_t n);
|
| | | float *cuda_make_array_pinned(float *x, size_t n);
|
| | | float *cuda_make_array(float *x, size_t n);
|
| | | void **cuda_make_array_pointers(void **x, size_t n);
|
| | | int *cuda_make_int_array(size_t n);
|
| | | int *cuda_make_int_array_new_api(int *x, size_t n);
|
| | | void cuda_push_array(float *x_gpu, float *x, size_t n);
|
| | | //LIB_API void cuda_pull_array(float *x_gpu, float *x, size_t n);
|
| | | //LIB_API void cuda_set_device(int n);
|
| | | int cuda_get_device();
|
| | | void cuda_free_host(float *x_cpu);
|
| | | void cuda_free(float *x_gpu);
|
| | | void cuda_random(float *x_gpu, size_t n);
|
| | | float cuda_compare(float *x_gpu, float *x, size_t n, char *s);
|
| | | dim3 cuda_gridsize(size_t n);
|
| | | cudaStream_t get_cuda_stream();
|
| | | cudaStream_t get_cuda_memcpy_stream();
|
| | | int get_number_of_blocks(int array_size, int block_size);
|
| | | int get_gpu_compute_capability(int i, char *device_name);
|
| | | void show_cuda_cudnn_info();
|
| | |
|
| | | #ifdef CUDNN
|
| | | cudnnHandle_t cudnn_handle();
|
| | | enum {cudnn_fastest, cudnn_smallest, cudnn_specify};
|
| | |
|
| | | void cudnn_check_error_extended(cudnnStatus_t status, const char *file, int line, const char *date_time);
|
| | | #define CHECK_CUDNN(X) cudnn_check_error_extended(X, __FILE__ " : " __FUNCTION__, __LINE__, __DATE__ " - " __TIME__ );
|
| | | #endif
|
| | |
|
| | | #ifdef __cplusplus
|
| | | }
|
| | | #endif // __cplusplus
|
| | |
|
| | | #else // GPU
|
| | | //LIB_API void cuda_set_device(int n);
|
| | | #endif // GPU
|
| | | #endif // DARKCUDA_H
|
| | | #ifndef DARKCUDA_H |
| | | #define DARKCUDA_H |
| | | #include "darknet.h" |
| | | |
| | | #ifdef __cplusplus |
| | | extern "C" { |
| | | #endif |
| | | |
| | | |
| | | extern int cuda_debug_sync; |
| | | extern int gpu_index; |
| | | #ifdef __cplusplus |
| | | } |
| | | #endif // __cplusplus |
| | | |
| | | #ifdef GPU |
| | | |
| | | #define BLOCK 512 |
| | | #define FULL_MASK 0xffffffff |
| | | #define WARP_SIZE 32 |
| | | #define BLOCK_TRANSPOSE32 256 |
| | | |
| | | #include <cuda.h> |
| | | #include <cuda_runtime.h> |
| | | #include <curand.h> |
| | | #include <cublas_v2.h> |
| | | #include <cuda_runtime_api.h> |
| | | //#include <driver_types.h> |
| | | |
| | | #ifdef CUDNN |
| | | #include <cudnn.h> |
| | | #endif // CUDNN |
| | | |
| | | #ifndef __DATE__ |
| | | #define __DATE__ |
| | | #endif |
| | | |
| | | #ifndef __TIME__ |
| | | #define __TIME__ |
| | | #endif |
| | | |
| | | #ifndef __FUNCTION__ |
| | | #define __FUNCTION__ |
| | | #endif |
| | | |
| | | #ifndef __LINE__ |
| | | #define __LINE__ 0 |
| | | #endif |
| | | |
| | | #ifndef __FILE__ |
| | | #define __FILE__ |
| | | #endif |
| | | |
| | | #ifdef __cplusplus |
| | | extern "C" { |
| | | #endif // __cplusplus |
| | | void check_error(cudaError_t status); |
| | | void check_error_extended(cudaError_t status, const char *file, int line, const char *date_time); |
| | | void cublas_check_error_extended(cublasStatus_t status, const char *file, int line, const char *date_time); |
| | | #define CHECK_CUDA(X) check_error_extended(X, __FILE__ " : " __FUNCTION__, __LINE__, __DATE__ " - " __TIME__ ); |
| | | #define CHECK_CUBLAS(X) cublas_check_error_extended(X, __FILE__ " : " __FUNCTION__, __LINE__, __DATE__ " - " __TIME__ ); |
| | | |
| | | cublasHandle_t blas_handle(); |
| | | void free_pinned_memory(); |
| | | void pre_allocate_pinned_memory(size_t size); |
| | | float *cuda_make_array_pinned_preallocated(float *x, size_t n); |
| | | float *cuda_make_array_pinned(float *x, size_t n); |
| | | float *cuda_make_array(float *x, size_t n); |
| | | void **cuda_make_array_pointers(void **x, size_t n); |
| | | int *cuda_make_int_array(size_t n); |
| | | int *cuda_make_int_array_new_api(int *x, size_t n); |
| | | void cuda_push_array(float *x_gpu, float *x, size_t n); |
| | | //LIB_API void cuda_pull_array(float *x_gpu, float *x, size_t n); |
| | | //LIB_API void cuda_set_device(int n); |
| | | int cuda_get_device(); |
| | | void cuda_free_host(float *x_cpu); |
| | | void cuda_free(float *x_gpu); |
| | | void cuda_random(float *x_gpu, size_t n); |
| | | float cuda_compare(float *x_gpu, float *x, size_t n, char *s); |
| | | dim3 cuda_gridsize(size_t n); |
| | | cudaStream_t get_cuda_stream(); |
| | | //cudaStream_t get_cuda_memcpy_stream(); |
| | | int get_number_of_blocks(int array_size, int block_size); |
| | | int get_gpu_compute_capability(int i, char *device_name); |
| | | void show_cuda_cudnn_info(); |
| | | |
| | | cudaStream_t switch_stream(int i); |
| | | void wait_stream(int i); |
| | | void reset_wait_stream_events(); |
| | | |
| | | #ifdef CUDNN |
| | | cudnnHandle_t cudnn_handle(); |
| | | enum {cudnn_fastest, cudnn_smallest, cudnn_specify}; |
| | | |
| | | void cudnn_check_error_extended(cudnnStatus_t status, const char *file, int line, const char *date_time); |
| | | #define CHECK_CUDNN(X) cudnn_check_error_extended(X, __FILE__ " : " __FUNCTION__, __LINE__, __DATE__ " - " __TIME__ ); |
| | | #endif |
| | | |
| | | #ifdef __cplusplus |
| | | } |
| | | #endif // __cplusplus |
| | | |
| | | #else // GPU |
| | | //LIB_API void cuda_set_device(int n); |
| | | #endif // GPU |
| | | #endif // DARKCUDA_H |