#ifndef DARKCUDA_H #define DARKCUDA_H #include "darknet.h" #ifdef __cplusplus extern "C" { #endif extern int cuda_debug_sync; extern int gpu_index; #ifdef __cplusplus } #endif // __cplusplus #ifdef GPU #define BLOCK 512 #define FULL_MASK 0xffffffff #define WARP_SIZE 32 #define BLOCK_TRANSPOSE32 256 #include #include #include #include #include //#include #ifdef CUDNN #include #endif // CUDNN #ifndef __DATE__ #define __DATE__ #endif #ifndef __TIME__ #define __TIME__ #endif #ifndef __FUNCTION__ #define __FUNCTION__ #endif #ifndef __LINE__ #define __LINE__ 0 #endif #ifndef __FILE__ #define __FILE__ #endif #ifdef __cplusplus extern "C" { #endif // __cplusplus void check_error(cudaError_t status); void check_error_extended(cudaError_t status, const char *file, int line, const char *date_time); void cublas_check_error_extended(cublasStatus_t status, const char *file, int line, const char *date_time); #define CHECK_CUDA(X) check_error_extended(X, __FILE__ " : " __FUNCTION__, __LINE__, __DATE__ " - " __TIME__ ); #define CHECK_CUBLAS(X) cublas_check_error_extended(X, __FILE__ " : " __FUNCTION__, __LINE__, __DATE__ " - " __TIME__ ); cublasHandle_t blas_handle(); void free_pinned_memory(); void pre_allocate_pinned_memory(size_t size); float *cuda_make_array_pinned_preallocated(float *x, size_t n); float *cuda_make_array_pinned(float *x, size_t n); float *cuda_make_array(float *x, size_t n); void **cuda_make_array_pointers(void **x, size_t n); int *cuda_make_int_array(size_t n); int *cuda_make_int_array_new_api(int *x, size_t n); void cuda_push_array(float *x_gpu, float *x, size_t n); //LIB_API void cuda_pull_array(float *x_gpu, float *x, size_t n); //LIB_API void cuda_set_device(int n); int cuda_get_device(); void cuda_free_host(float *x_cpu); void cuda_free(float *x_gpu); void cuda_random(float *x_gpu, size_t n); float cuda_compare(float *x_gpu, float *x, size_t n, char *s); dim3 cuda_gridsize(size_t n); cudaStream_t get_cuda_stream(); //cudaStream_t get_cuda_memcpy_stream(); int get_number_of_blocks(int array_size, int block_size); int get_gpu_compute_capability(int i, char *device_name); void show_cuda_cudnn_info(); cudaStream_t switch_stream(int i); void wait_stream(int i); void reset_wait_stream_events(); #ifdef CUDNN cudnnHandle_t cudnn_handle(); enum {cudnn_fastest, cudnn_smallest, cudnn_specify}; void cudnn_check_error_extended(cudnnStatus_t status, const char *file, int line, const char *date_time); #define CHECK_CUDNN(X) cudnn_check_error_extended(X, __FILE__ " : " __FUNCTION__, __LINE__, __DATE__ " - " __TIME__ ); #endif #ifdef __cplusplus } #endif // __cplusplus #else // GPU //LIB_API void cuda_set_device(int n); #endif // GPU #endif // DARKCUDA_H