From a28e3552b904f0ba175ba779ac3b2bbc83592dbc Mon Sep 17 00:00:00 2001 From: zhangmeng <775834166@qq.com> Date: 星期四, 12 十二月 2019 16:10:35 +0800 Subject: [PATCH] update --- /dev/null | 526 ---------------------------------------------------------- 1 files changed, 0 insertions(+), 526 deletions(-) diff --git a/goconv/conv.cpp b/goconv/conv.cpp deleted file mode 100644 index 098a396..0000000 --- a/goconv/conv.cpp +++ /dev/null @@ -1,592 +0,0 @@ -#include "conv.h" - -#include <cmath> -#include <sys/time.h> - -#include <npp.h> -#include <helper_cuda.h> -#include <helper_string.h> -#include "Exceptions.h" - - -static const int MEMORY_ALGN_DEVICE = 511; -static const int HD_MEMORY_ALGN_DEVICE = 511; - -static inline int DivUp(int x, int d) -{ - return (x + d - 1) / d; -} - -static int set_data(uint8_t *data, const int width, const int height, unsigned char *mY, unsigned char *mU, unsigned char *mV) -{ - uint8_t* yuv_data = (uint8_t*)data; - if (!yuv_data) - { - return -1; - } - - uint32_t i, j; - uint32_t off; - uint32_t off_yuv; - uint32_t half_h; - uint32_t half_w; - uint32_t u_size; - uint8_t* yuv_ptr; - uint8_t* u_ptr; - uint8_t* v_ptr; - - int w = width; - int h = height; - - //浠庤繖涓�鍙ユ潵鐪嬶紝鍗充娇鏄悓涓�绉嶆牸寮忥紝杩涙潵涔熻澶勭悊涓�涓嬨�� - size_t nPitch = (w + HD_MEMORY_ALGN_DEVICE) & ~HD_MEMORY_ALGN_DEVICE; - off = 0; - off_yuv = 0; - for (i = 0; i < (uint32_t)h; i++) - { - memcpy(mY + off, yuv_data + off_yuv, w); - off += nPitch; - off_yuv += w; - } - - half_w = w >> 1; - half_h = h >> 1; - u_size = half_w * half_h; - nPitch = (half_w + HD_MEMORY_ALGN_DEVICE) & ~HD_MEMORY_ALGN_DEVICE; - - off_yuv = w * h; - off = 0; - for (i = 0; i < half_h; i++) - { - yuv_ptr = yuv_data + off_yuv; - u_ptr = mU + off; - v_ptr = mV + off; - for (j = 0; j < (uint32_t)w; j += 2) - { - *u_ptr++ = *yuv_ptr++; - *v_ptr++ = *yuv_ptr++; - } - off_yuv += w; - off += nPitch; - } - - return 0; -} - -/////////////handle -class convertor{ -public: - convertor(const int srcW, const int srcH, const int dstW, const int dstH, const int gpu); - ~convertor(); - int yuv2bgr(unsigned char **bgr, int *bgrLen); - int resize2bgr(unsigned char *in, unsigned char **data, int *data_len); - int resizeyuv(unsigned char *in, unsigned char **data, int *data_len); - int fill_yuv(const unsigned char *yuv); -private: - void init_yuv(); - void init_resize(); - void init_resize_bgr(); - void init_resize_yuv(); -private: - int width; - int height; - - unsigned char aSamplingFactors[3]; - int nMCUBlocksH; - int nMCUBlocksV; - - Npp8u *apSrcImage[3]; - NppiSize aSrcSize[3]; - Npp32s aSrcImageStep[3]; - size_t aSrcPitch[3]; - - uint8_t *mY; - uint8_t *mU; - uint8_t *mV; - -/////////////////////////// - int rWidth; - int rHeight; - float fx; - float fy; - - Npp8u *apDstImage[3] = {0,0,0}; - Npp32s aDstImageStep[3]; - NppiSize aDstSize[3]; - -///////////////////////////// - Npp8u *imgOrigin; - size_t pitchOrigin; - NppiSize sizeOrigin; - - unsigned char *bgrOrigin; - int bgrOriginLen; - size_t bgrOriginPitch; - -//////////////////////////// - Npp8u *imgResize; - size_t pitchResize; - NppiSize sizeResize; - - unsigned char *bgrScale; - int bgrScaleLen; - size_t bgrScalePitch; - -// resize only -//////////////////////////// - Npp8u *originBGR; - int pitchOriginBGR; - Npp8u *resizedBGR; - int pitchResizedBGR; - unsigned char *hostResizedBGR; - -/////////////////////////// - unsigned char *nv12; - - bool initialized_yuv, initialized_resize, initialized_resize_bgr, initialized_resize_yuv; - int gpu_index; -}; - - -convertor::convertor(const int srcW, const int srcH, const int dstW, const int dstH, const int gpu) -:width(srcW) -,height(srcH) -,rWidth(dstW) -,rHeight(dstH) -,fx(-1) -,fy(-1) -,mY(NULL) -,mU(NULL) -,mV(NULL) -,imgOrigin(0) -,imgResize(0) -,bgrOrigin(NULL) -,bgrOriginLen(0) -,bgrScale(NULL) -,bgrScaleLen(0) -,originBGR(0) -,pitchOriginBGR(0) -,resizedBGR(0) -,pitchResizedBGR(0) -,hostResizedBGR(NULL) -,nv12(NULL) -,initialized_yuv(false) -,initialized_resize(false) -,initialized_resize_bgr(false) -,initialized_resize_yuv(false) -,gpu_index(gpu) -{} - -static void setGPUDevice(const int gpu){ - if (gpu >= 0){ - cudaSetDevice(gpu); - } -} - -void convertor::init_yuv(){ - if (initialized_yuv) return; - initialized_yuv = true; - - setGPUDevice(gpu_index); - - for(int i = 0; i < 3; i++){ - apSrcImage[i] = 0; - apDstImage[i] = 0; - } - - aSamplingFactors[0] = 34; - aSamplingFactors[1] = 17; - aSamplingFactors[2] = 17; - - nMCUBlocksH = 0; - nMCUBlocksV = 0; - - for (int i = 0; i < 3; ++i) - { - nMCUBlocksV = std::max(nMCUBlocksV, aSamplingFactors[i] & 0x0f); - nMCUBlocksH = std::max(nMCUBlocksH, aSamplingFactors[i] >> 4); - } - - for (int i = 0; i < 3; ++i) - { - NppiSize oBlocks; - NppiSize oBlocksPerMCU = { aSamplingFactors[i] >> 4, aSamplingFactors[i] & 0x0f }; - - oBlocks.width = (int)ceil((width + 7) / 8 * - static_cast<float>(oBlocksPerMCU.width) / nMCUBlocksH); - oBlocks.width = DivUp(oBlocks.width, oBlocksPerMCU.width) * oBlocksPerMCU.width; - - oBlocks.height = (int)ceil((height + 7) / 8 * - static_cast<float>(oBlocksPerMCU.height) / nMCUBlocksV); - oBlocks.height = DivUp(oBlocks.height, oBlocksPerMCU.height) * oBlocksPerMCU.height; - - aSrcSize[i].width = oBlocks.width * 8; - aSrcSize[i].height = oBlocks.height * 8; - - // Allocate Memory - size_t nPitch; - NPP_CHECK_CUDA(cudaMallocPitch((void**)&(apSrcImage[i]), &nPitch, aSrcSize[i].width, aSrcSize[i].height)); - aSrcPitch[i] = nPitch; - aSrcImageStep[i] = static_cast<Npp32s>(nPitch); - } - - NPP_CHECK_CUDA(cudaMallocPitch((void**)&imgOrigin, &pitchOrigin, width * 3, height)); - - bgrOriginPitch = width * 3; - bgrOriginLen = bgrOriginPitch * height; - NPP_CHECK_CUDA(cudaHostAlloc((void**)&bgrOrigin, bgrOriginLen, cudaHostAllocDefault)); - - sizeOrigin.width = width; - sizeOrigin.height = height; - - uint32_t nPitch = (width + MEMORY_ALGN_DEVICE) & ~MEMORY_ALGN_DEVICE; - NPP_CHECK_CUDA(cudaHostAlloc((void**)&mY, nPitch * height, cudaHostAllocDefault)); - nPitch = (width/2 + MEMORY_ALGN_DEVICE) & ~MEMORY_ALGN_DEVICE; - NPP_CHECK_CUDA(cudaHostAlloc((void**)&mU, nPitch * height / 2, cudaHostAllocDefault)); - NPP_CHECK_CUDA(cudaHostAlloc((void**)&mV, nPitch * height / 2, cudaHostAllocDefault)); - -} - -void convertor::init_resize(){ - if (initialized_resize) return; - initialized_resize = true; - - setGPUDevice(gpu_index); - - NppiSize oDstImageSize; - oDstImageSize.width = std::max(1, rWidth); - oDstImageSize.height = std::max(1, rHeight); - - sizeResize.width = oDstImageSize.width; - sizeResize.height = oDstImageSize.height; - - for (int i=0; i < 3; ++i) - { - NppiSize oBlocks; - NppiSize oBlocksPerMCU = { aSamplingFactors[i] & 0x0f, aSamplingFactors[i] >> 4}; - - oBlocks.width = (int)ceil((oDstImageSize.width + 7)/8 * - static_cast<float>(oBlocksPerMCU.width)/nMCUBlocksH); - oBlocks.width = DivUp(oBlocks.width, oBlocksPerMCU.width) * oBlocksPerMCU.width; - - oBlocks.height = (int)ceil((oDstImageSize.height+7)/8 * - static_cast<float>(oBlocksPerMCU.height)/nMCUBlocksV); - oBlocks.height = DivUp(oBlocks.height, oBlocksPerMCU.height) * oBlocksPerMCU.height; - - aDstSize[i].width = oBlocks.width * 8; - aDstSize[i].height = oBlocks.height * 8; - - // Allocate Memory - size_t nPitch; - NPP_CHECK_CUDA(cudaMallocPitch((void**)&apDstImage[i], &nPitch, aDstSize[i].width, aDstSize[i].height)); - aDstImageStep[i] = static_cast<Npp32s>(nPitch); - } - - if (rWidth > 0 && rHeight > 0 && rWidth < width && rHeight < height){ - fx = (float)(rWidth) / (float)(width); - fy = (float)(rHeight) / (float)(height); - } - - if (imgResize == 0){ - if (rWidth > 0 && rHeight > 0 && rWidth < width && rHeight < height){ - NPP_CHECK_CUDA(cudaMallocPitch((void**)&imgResize, &pitchResize, rWidth * 3, rHeight)); - } - } - if (!bgrScale){ - if (rWidth > 0 && rHeight > 0 && rWidth < width && rHeight < height){ - bgrScalePitch = rWidth * 3; - bgrScaleLen = bgrScalePitch * rHeight; - NPP_CHECK_CUDA(cudaHostAlloc((void**)&bgrScale, bgrScaleLen, cudaHostAllocDefault)); - } - } -} - -void convertor::init_resize_bgr(){ - if (initialized_resize_bgr) return; - initialized_resize_bgr = true; - - setGPUDevice(gpu_index); - if (originBGR == 0){ - originBGR = nppiMalloc_8u_C3(width, height, &pitchOriginBGR); - } - if (resizedBGR == 0){ - resizedBGR = nppiMalloc_8u_C3(rWidth, rHeight, &pitchResizedBGR); - } - if (hostResizedBGR == NULL){ - NPP_CHECK_CUDA(cudaHostAlloc((void**)&hostResizedBGR, rWidth * 3 * rHeight, cudaHostAllocDefault)); - } -} - -void convertor::init_resize_yuv(){ - if (initialized_resize_yuv) return; - initialized_resize_yuv = true; - - if (rWidth > 0 && rHeight > 0){ - fx = (float)(width) / (float)(rWidth); - fy = (float)(height) / (float)(rHeight); - } - - nv12 = (unsigned char*)malloc(rWidth*rHeight*3/2); -} - -convertor::~convertor(){ - setGPUDevice(gpu_index); - - if(mY) cudaFreeHost(mY); - if(mU) cudaFreeHost(mU); - if(mV) cudaFreeHost(mV); - - for (int i = 0; i < 3; ++i)//鍐呭瓨閲婃斁 - { - cudaFree(apSrcImage[i]); - cudaFree(apDstImage[i]); - } - - if (imgOrigin) cudaFree(imgOrigin); - if (imgResize) cudaFree(imgResize); - - if (bgrOrigin) cudaFreeHost(bgrOrigin); - if (bgrScale) cudaFreeHost(bgrScale); - - if (originBGR) nppiFree(originBGR); - if (resizedBGR) nppiFree(resizedBGR); - if (hostResizedBGR) cudaFreeHost(hostResizedBGR); - - if (nv12) free(nv12); -} - -int convertor::fill_yuv(const unsigned char *yuv){ - init_yuv(); - int ret = set_data((uint8_t*)yuv, width, height, mY, mU, mV); - if (ret < 0) return ret; - - setGPUDevice(gpu_index); - - NPP_CHECK_CUDA(cudaMemcpy(apSrcImage[0], mY, aSrcPitch[0] * height, cudaMemcpyHostToDevice)); - NPP_CHECK_CUDA(cudaMemcpy(apSrcImage[1], mU, aSrcPitch[1] * height / 2, cudaMemcpyHostToDevice)); - NPP_CHECK_CUDA(cudaMemcpy(apSrcImage[2], mV, aSrcPitch[2] * height / 2, cudaMemcpyHostToDevice)); - return 0; -} - -int convertor::yuv2bgr(unsigned char **bgr, int *bgrLen){ - - *bgr = NULL; - *bgrLen = 0; - - setGPUDevice(gpu_index); - - NPP_CHECK_NPP(nppiYUV420ToBGR_8u_P3C3R(apSrcImage, aSrcImageStep, imgOrigin, pitchOrigin, sizeOrigin)); - - NPP_CHECK_CUDA(cudaMemcpy2D(bgrOrigin, bgrOriginPitch, imgOrigin, pitchOrigin, bgrOriginPitch, height, cudaMemcpyDeviceToHost)); - *bgr = bgrOrigin; - *bgrLen = bgrOriginLen; - - return 0; -} - -int convertor::resize2bgr(unsigned char *in, unsigned char **data, int *data_len){ - *data = NULL; - *data_len = 0; - - if ((rWidth < 0 && rHeight < 0) || (rWidth > width && rHeight > height)){ - return -1; - } - - setGPUDevice(gpu_index); - - if (!in){ - - init_resize(); - - NppiSize oDstImageSize; - oDstImageSize.width = std::max(1, rWidth); - oDstImageSize.height = std::max(1, rHeight); - for (int i = 0; i < 3; ++i) - { - NppiSize oBlocksPerMCU = { aSamplingFactors[i] & 0x0f, aSamplingFactors[i] >> 4}; - NppiSize oSrcImageSize = {(width * oBlocksPerMCU.width) / nMCUBlocksH, (height * oBlocksPerMCU.height)/nMCUBlocksV}; - NppiRect oSrcImageROI = {0,0,oSrcImageSize.width, oSrcImageSize.height}; - NppiRect oDstImageROI; - NppiInterpolationMode eInterploationMode = NPPI_INTER_SUPER; - NPP_CHECK_NPP(nppiGetResizeRect(oSrcImageROI, &oDstImageROI, - fx, - fy, - 0.0, 0.0, eInterploationMode)); - NPP_CHECK_NPP(nppiResizeSqrPixel_8u_C1R(apSrcImage[i], oSrcImageSize, aSrcImageStep[i], oSrcImageROI, - apDstImage[i], aDstImageStep[i], oDstImageROI , - fx, - fy, - 0.0, 0.0, eInterploationMode)); - } - NPP_CHECK_NPP(nppiYUV420ToBGR_8u_P3C3R(apDstImage, aDstImageStep, imgResize, pitchResize, sizeResize)); - NPP_CHECK_CUDA(cudaMemcpy2D(bgrScale, bgrScalePitch, imgResize, pitchResize, bgrScalePitch, rHeight, cudaMemcpyDeviceToHost)); - *data = bgrScale; - *data_len = bgrScaleLen; - }else{ - - init_resize_bgr(); - - NppiSize oSrcSize; - oSrcSize.width = width; - oSrcSize.height = height; - - NPP_CHECK_CUDA(cudaMemcpy2D(originBGR, pitchOriginBGR, in, width*3, width*3, height, cudaMemcpyHostToDevice)); - - NppiRect oSrcROI; - oSrcROI.x = 0; - oSrcROI.y = 0; - oSrcROI.width = width; - oSrcROI.height = height; - - - NppiRect oDstROI; - oDstROI.x = 0; - oDstROI.y = 0; - oDstROI.width = rWidth; - oDstROI.height = rHeight; - - // Scale Factor - double nXFactor = double(oDstROI.width) / double(oSrcROI.width); - double nYFactor = double(oDstROI.height) / double(oSrcROI.height); - - // Scaled X/Y Shift - double nXShift = - oSrcROI.x * nXFactor ; - double nYShift = - oSrcROI.y * nYFactor; - int eInterpolation = NPPI_INTER_SUPER; - if (nXFactor >= 1.f || nYFactor >= 1.f) - eInterpolation = NPPI_INTER_LANCZOS; - - NppStatus ret = nppiResizeSqrPixel_8u_C3R(originBGR, oSrcSize, pitchOriginBGR, oSrcROI, - resizedBGR, pitchResizedBGR, oDstROI, nXFactor, nYFactor, nXShift, nYShift, eInterpolation ); - - if(ret != NPP_SUCCESS) { - printf("imageResize_8u_C3R failed %d.\n", ret); - return -2; - } - size_t pitch = rWidth * 3; - *data_len = pitch * rHeight; - NPP_CHECK_CUDA(cudaMemcpy2D(hostResizedBGR, pitch, resizedBGR, pitchResizedBGR, pitch, rHeight, cudaMemcpyDeviceToHost)); - *data = hostResizedBGR; - } - return 0; -} - -static int nv12_nearest_scale(uint8_t* __restrict src, uint8_t* __restrict dst, - int srcWidth, int srcHeight, int dstWidth, int dstHeight) -{ - register int sw = srcWidth; //register keyword is for local var to accelorate - register int sh = srcHeight; - register int dw = dstWidth; - register int dh = dstHeight; - register int y, x; - unsigned long int srcy, srcx, src_index, dst_index; - unsigned long int xrIntFloat_16 = (sw << 16) / dw + 1; //better than float division - unsigned long int yrIntFloat_16 = (sh << 16) / dh + 1; - - uint8_t* dst_uv = dst + dh * dw; //memory start pointer of dest uv - uint8_t* src_uv = src + sh * sw; //memory start pointer of source uv - uint8_t* dst_uv_yScanline; - uint8_t* src_uv_yScanline; - uint8_t* dst_y_slice = dst; //memory start pointer of dest y - uint8_t* src_y_slice; - uint8_t* sp; - uint8_t* dp; - - for (y = 0; y < (dh & ~7); ++y) //'dh & ~7' is to generate faster assembly code - { - srcy = (y * yrIntFloat_16) >> 16; - src_y_slice = src + srcy * sw; - - if((y & 1) == 0) - { - dst_uv_yScanline = dst_uv + (y / 2) * dw; - src_uv_yScanline = src_uv + (srcy / 2) * sw; - } - - for(x = 0; x < (dw & ~7); ++x) - { - srcx = (x * xrIntFloat_16) >> 16; - dst_y_slice[x] = src_y_slice[srcx]; - - if((y & 1) == 0) //y is even - { - if((x & 1) == 0) //x is even - { - src_index = (srcx / 2) * 2; - - sp = dst_uv_yScanline + x; - dp = src_uv_yScanline + src_index; - *sp = *dp; - ++sp; - ++dp; - *sp = *dp; - } - } - } - dst_y_slice += dw; - } - return 0; -} - -int convertor::resizeyuv(unsigned char *in, unsigned char **data, int *data_len){ - - init_resize_yuv(); - - *data_len = rWidth*rHeight*3/2; - *data = nv12; - - return nv12_nearest_scale(in, nv12, width, height, rWidth, rHeight); -} - -convHandle conv_create(const int srcW, const int srcH, const int dstW, const int dstH, const int gpu){ - if (gpu < 0) return NULL; - - convertor *conv = new convertor(srcW, srcH, dstW, dstH, gpu); - return conv; -} - -void conv_destroy(convHandle h){ - if (!h) return; - convertor *conv = (convertor*)h; - delete conv; -} - -int yuv2bgrandresize(convHandle h, void *yuv, unsigned char **bgr, int *bgrLen, unsigned char **scaleBGR, int *scaleBGRLen){ - if (!h) return -2; - convertor *conv = (convertor*)h; - int ret = conv->fill_yuv((unsigned char*)yuv); - if (ret != 0) return ret; - ret = conv->yuv2bgr(bgr, bgrLen); - if (ret != 0) return ret; - ret = conv->resize2bgr(NULL, scaleBGR, scaleBGRLen); - return ret; -} - -int yuv2bgr(convHandle h, void *yuv, unsigned char **bgr, int *bgrLen){ - if (!h) return -2; - convertor *conv = (convertor*)h; - int ret = conv->fill_yuv((unsigned char*)yuv); - if (ret != 0) return ret; - return conv->yuv2bgr(bgr, bgrLen); -} - -int yuv2resizedbgr(convHandle h, void *yuv, unsigned char **bgr, int *bgrLen){ - if (!h) return -2; - convertor *conv = (convertor*)h; - int ret = conv->fill_yuv((unsigned char*)yuv); - if (ret != 0) return ret; - ret = conv->resize2bgr(NULL, bgr, bgrLen); - return ret; -} - -int resizebgr(convHandle h, void *data, unsigned char **resized, int *len){ - if (!h) return -2; - convertor *conv = (convertor*)h; - return conv->resize2bgr((unsigned char*)data, resized, len); -} - -int resizeyuv(convHandle h, void *data, unsigned char **resized, int *len){ - if (!h) return -2; - convertor *conv = (convertor*)h; - return conv->resizeyuv((unsigned char*)data, resized, len); -} diff --git a/goconv/conv.h b/goconv/conv.h deleted file mode 100644 index a55d9a8..0000000 --- a/goconv/conv.h +++ /dev/null @@ -1,23 +0,0 @@ -#ifndef __RESIZE_NPP_H__ -#define __RESIZE_NPP_H__ - -#ifdef __cplusplus -extern "C"{ -#endif - -typedef void* convHandle; -convHandle conv_create(const int srcW, const int srcH, const int dstW, const int dstH, const int gpu); - -void conv_destroy(convHandle h); -int yuv2bgrandresize(convHandle h, void *yuv, unsigned char **bgr, int *bgrLen, unsigned char **scaleBGR, int *scaleBGRLen); -int yuv2bgr(convHandle h, void *yuv, unsigned char **bgr, int *bgrLen); -int yuv2resizedbgr(convHandle h, void *yuv, unsigned char **bgr, int *bgrLen); - -int resizebgr(convHandle h, void *data, unsigned char **resized, int *len); -int resizeyuv(convHandle h, void *data, unsigned char **resized, int *len); - -#ifdef __cplusplus -} -#endif - -#endif //__RESIZE_NPP_H__ diff --git a/goconv/goconv.go b/goconv/goconv.go deleted file mode 100644 index ff3ff4d..0000000 --- a/goconv/goconv.go +++ /dev/null @@ -1,258 +0,0 @@ -package goconv - -/* -#cgo CFLAGS: -I./ -I./inc -I/usr/local/cuda/include -#cgo CXXFLAGS: -I./ -I./inc -I/usr/local/cuda/include -std=c++11 -#cgo LDFLAGS: -L/usr/local/cuda/lib64 -lnppi -lcudart -ldl -#include <stdlib.h> -#include "conv.h" -*/ -import "C" -import ( - "unsafe" - - "basic.com/valib/godraw.git" - "basic.com/valib/gogpu.git" - "github.com/disintegration/imaging" -) - -const ( - need = 200 - reserved = 512 -) - -func gpuIndex(lastIndex int) int { - indices := gogpu.RankGPU() - if len(indices) == 0 { - return -1 - } - - for _, v := range indices { - if v != lastIndex { - if gogpu.SatisfyGPU(v, need, need/2) { - return v - } - } - } - - if gogpu.SatisfyGPU(lastIndex, need, reserved) { - return lastIndex - } - return -1 -} - -type convertor struct { - width int - height int - rWidth int - rHeight int - conv C.convHandle -} - -var convts []*convertor - -func find(w, h, rw, rh int) *convertor { - for _, v := range convts { - if v.width == w && v.height == h && v.rWidth == rw && v.rHeight == rh { - return v - } - } - gpu := gpuIndex(0) - if gpu < 0 { - return nil - } - cw := C.conv_create(C.int(w), C.int(h), C.int(rw), C.int(rh), C.int(gpu)) - if cw == nil { - return nil - } - c := &convertor{w, h, rw, rh, cw} - convts = append(convts, c) - return c -} - -// YUV2BGR yuv->bgr -func YUV2BGR(yuv []byte, w, h int) []byte { - - cw := find(w, h, -1, -1) - if cw == nil { - return yuv2bgr(yuv, w, h) - } - var bgr *C.uchar - var bgrLen C.int - ret := C.yuv2bgr(cw.conv, unsafe.Pointer(&yuv[0]), &bgr, &bgrLen) - if ret != 0 { - return nil - } - const maxLen = 0x7fffffff - goBGRLen := int(bgrLen) - if goBGRLen > 0 { - return (*[maxLen]byte)(unsafe.Pointer(bgr))[:goBGRLen:goBGRLen] - } - return nil -} - -// YUV2ResizedBGR yuv -> resized bgr -func YUV2ResizedBGR(yuv []byte, w, h, rw, rh int) []byte { - - cw := find(w, h, rw, rh) - if cw == nil { - bgr := yuv2bgr(yuv, w, h) - return bgresize(bgr, w, h, rw, rh) - } - var bgr *C.uchar - var bgrLen C.int - ret := C.yuv2resizedbgr(cw.conv, unsafe.Pointer(&yuv[0]), &bgr, &bgrLen) - if ret != 0 { - return nil - } - const maxLen = 0x7fffffff - goBGRLen := int(bgrLen) - if goBGRLen > 0 { - return (*[maxLen]byte)(unsafe.Pointer(bgr))[:goBGRLen:goBGRLen] - } - return nil - -} - -// ResizeBGR resize -func ResizeBGR(bgrO []byte, w, h, rw, rh int) []byte { - if (rw < 0 && rh < 0) || (rw > w && rh > h) { - return bgrO - } - - cw := find(w, h, rw, rh) - if cw == nil { - return bgresize(bgrO, w, h, rw, rh) - } - - var bgr *C.uchar - var bgrLen C.int - ret := C.resizebgr(cw.conv, unsafe.Pointer(&bgrO[0]), &bgr, &bgrLen) - if ret != 0 { - return nil - } - const maxLen = 0x7fffffff - goBGRLen := int(bgrLen) - if goBGRLen > 0 { - return (*[maxLen]byte)(unsafe.Pointer(bgr))[:goBGRLen:goBGRLen] - } - return nil -} - -// ResizeYUV yuv -func ResizeYUV(yuv []byte, w, h, rw, rh int) []byte { - if (rw < 0 && rh < 0) || (rw > w && rh > h) { - return yuv - } - - cw := find(w, h, rw, rh) - if cw == nil { - return yuv - } - - var resized *C.uchar - var resizedLen C.int - ret := C.resizeyuv(cw.conv, unsafe.Pointer(&yuv[0]), &resized, &resizedLen) - if ret != 0 { - return nil - } - - const maxLen = 0x7fffffff - goResizedLen := int(resizedLen) - if goResizedLen > 0 { - return (*[maxLen]byte)(unsafe.Pointer(resized))[:goResizedLen:goResizedLen] - } - return nil -} - -// YUV2BGRandResize conv and resize -func YUV2BGRandResize(yuv []byte, w, h, rw, rh int) ([]byte, []byte) { - cw := find(w, h, rw, rh) - if cw == nil { - origin := yuv2bgr(yuv, w, h) - resized := bgresize(origin, w, h, rw, rh) - return origin, resized - } - - var bgr *C.uchar - var bgrLen C.int - var scale *C.uchar - var scaleLen C.int - - ret := C.yuv2bgrandresize(cw.conv, unsafe.Pointer(&yuv[0]), &bgr, &bgrLen, &scale, &scaleLen) - - if ret != 0 { - return nil, nil - } - var out, resized []byte - - const maxLen = 0x7fffffff - goBGRLen, goScaleLen := int(bgrLen), int(scaleLen) - if goBGRLen > 0 { - out = (*[maxLen]byte)(unsafe.Pointer(bgr))[:goBGRLen:goBGRLen] - } - if goScaleLen > 0 { - resized = (*[maxLen]byte)(unsafe.Pointer(scale))[:goScaleLen:goScaleLen] - } - return out, resized - -} - -// Free free -func Free() { - for _, v := range convts { - if v.conv != nil { - C.conv_destroy(v.conv) - } - } -} - -func yuv2bgr(yuv []byte, w, h int) []byte { - - data := make([]byte, 0, w*h*3) - start := w * h - for i := 0; i < h; i++ { - for j := 0; j < w; j++ { - - index := i/2*w + j - (j & 0x01) - - y := int32(yuv[j+i*w]) - v := int32(yuv[start+index]) - u := int32(yuv[start+index+1]) - - r := y + (140*(v-128))/100 - g := y - (34*(u-128)+71*(v-128))/100 - b := y + (177*(u-128))/100 - - if r > 255 { - r = 255 - } - if r < 0 { - r = 0 - } - if g > 255 { - g = 255 - } - if g < 0 { - g = 0 - } - if b > 255 { - b = 255 - } - if b < 0 { - b = 0 - } - data = append(data, byte(r), byte(g), byte(b)) - } - } - return data -} - -func bgresize(bgr []byte, w, h, rw, rh int) []byte { - img, err := godraw.ToImage(bgr, w, h) - if err != nil { - return nil - } - dstImg := imaging.Resize(img, rw, rh, imaging.NearestNeighbor) - return godraw.Image2BGR(dstImg) -} diff --git a/goconv/inc/Exceptions.h b/goconv/inc/Exceptions.h deleted file mode 100644 index 627bfd9..0000000 --- a/goconv/inc/Exceptions.h +++ /dev/null @@ -1,181 +0,0 @@ -/** - * Copyright 1993-2015 NVIDIA Corporation. All rights reserved. - * - * Please refer to the NVIDIA end user license agreement (EULA) associated - * with this source code for terms and conditions that govern your use of - * this software. Any use, reproduction, disclosure, or distribution of - * this software and related documentation outside the terms of the EULA - * is strictly prohibited. - * - */ - -#ifndef NV_UTIL_NPP_EXCEPTIONS_H -#define NV_UTIL_NPP_EXCEPTIONS_H - - -#include <string> -#include <sstream> -#include <iostream> - -/// All npp related C++ classes are put into the npp namespace. -namespace npp -{ - - /// Exception base class. - /// This exception base class will be used for everything C++ throught - /// the NPP project. - /// The exception contains a string message, as well as data fields for a string - /// containing the name of the file as well as the line number where the exception was thrown. - /// The easiest way of throwing exceptions and providing filename and line number is - /// to use one of the ASSERT macros defined for that purpose. - class Exception - { - public: - /// Constructor. - /// \param rMessage A message with information as to why the exception was thrown. - /// \param rFileName The name of the file where the exception was thrown. - /// \param nLineNumber Line number in the file where the exception was thrown. - explicit - Exception(const std::string &rMessage = "", const std::string &rFileName = "", unsigned int nLineNumber = 0) - : sMessage_(rMessage), sFileName_(rFileName), nLineNumber_(nLineNumber) - { }; - - Exception(const Exception &rException) - : sMessage_(rException.sMessage_), sFileName_(rException.sFileName_), nLineNumber_(rException.nLineNumber_) - { }; - - virtual - ~Exception() - { }; - - /// Get the exception's message. - const - std::string & - message() - const - { - return sMessage_; - } - - /// Get the exception's file info. - const - std::string & - fileName() - const - { - return sFileName_; - } - - /// Get the exceptions's line info. - unsigned int - lineNumber() - const - { - return nLineNumber_; - } - - - /// Create a clone of this exception. - /// This creates a new Exception object on the heap. It is - /// the responsibility of the user of this function to free this memory - /// (delete x). - virtual - Exception * - clone() - const - { - return new Exception(*this); - } - - /// Create a single string with all the exceptions information. - /// The virtual toString() method is used by the operator<<() - /// so that all exceptions derived from this base-class can print - /// their full information correctly even if a reference to their - /// exact type is not had at the time of printing (i.e. the basic - /// operator<<() is used). - virtual - std::string - toString() - const - { - std::ostringstream oOutputString; - oOutputString << fileName() << ":" << lineNumber() << ": " << message(); - return oOutputString.str(); - } - - private: - std::string sMessage_; ///< Message regarding the cause of the exception. - std::string sFileName_; ///< Name of the file where the exception was thrown. - unsigned int nLineNumber_; ///< Line number in the file where the exception was thrown - }; - - /// Output stream inserter for Exception. - /// \param rOutputStream The stream the exception information is written to. - /// \param rException The exception that's being written. - /// \return Reference to the output stream being used. - static std::ostream & - operator << (std::ostream &rOutputStream, const Exception &rException) - { - rOutputStream << rException.toString(); - return rOutputStream; - } - - /// Basic assert macro. - /// This macro should be used to enforce any kind of pre or post conditions. - /// Unlike the C-runtime assert macro, this macro does not abort execution, but throws - /// a C++ exception. The exception is automatically filled with information about the failing - /// condition, the filename and line number where the exception was thrown. - /// \note The macro is written in such a way that omitting a semicolon after its usage - /// causes a compiler error. The correct way to invoke this macro is: - /// NPP_ASSERT(n < MAX); -#define NPP_ASSERT(C) do {if (!(C)) throw npp::Exception(#C " assertion faild!", __FILE__, __LINE__);} while(false) - - // ASSERT macro. - // Same functionality as the basic assert macro with the added ability to pass - // a message M. M should be a string literal. - // Note: Never use code inside ASSERT() that causes a side-effect ASSERT macros may get compiled - // out in release mode. -#define NPP_ASSERT_MSG(C, M) do {if (!(C)) throw npp::Exception(#C " assertion faild! Message: " M, __FILE__, __LINE__);} while(false) - -#ifdef _DEBUG - /// Basic debug assert macro. - /// This macro is identical in every respect to NPP_ASSERT(C) but it does get compiled to a - /// no-op in release builds. It is therefor of utmost importance to not put statements into - /// this macro that cause side effects required for correct program execution. -#define NPP_DEBUG_ASSERT(C) do {if (!(C)) throw npp::Exception(#C " debug assertion faild!", __FILE__, __LINE__);} while(false) -#else -#define NPP_DEBUG_ASSERT(C) -#endif - - /// ASSERT for null-pointer test. - /// It is safe to put code with side effects into this macro. Also: This macro never - /// gets compiled to a no-op because resource allocation may fail based on external causes not under - /// control of a software developer. -#define NPP_ASSERT_NOT_NULL(P) do {if ((P) == 0) throw npp::Exception(#P " not null assertion faild!", __FILE__, __LINE__);} while(false) - - /// Macro for flagging methods as not implemented. - /// The macro throws an exception with a message that an implementation was missing -#define NPP_NOT_IMPLEMENTED() do {throw npp::Exception("Implementation missing!", __FILE__, __LINE__);} while(false) - - /// Macro for checking error return code of CUDA (runtime) calls. - /// This macro never gets disabled. -#define NPP_CHECK_CUDA(S) do {cudaError_t eCUDAResult; \ - eCUDAResult = S; \ - if (eCUDAResult != cudaSuccess) std::cout << "NPP_CHECK_CUDA - eCUDAResult = " << eCUDAResult << std::endl; \ - NPP_ASSERT(eCUDAResult == cudaSuccess);} while (false) - - /// Macro for checking error return code for NPP calls. -#define NPP_CHECK_NPP(S) do {NppStatus eStatusNPP; \ - eStatusNPP = S; \ - if (eStatusNPP != NPP_SUCCESS) std::cout << "NPP_CHECK_NPP - eStatusNPP = " << _cudaGetErrorEnum(eStatusNPP) << "("<< eStatusNPP << ")" << std::endl; \ - NPP_ASSERT(eStatusNPP == NPP_SUCCESS);} while (false) - - /// Macro for checking error return codes from cuFFT calls. -#define NPP_CHECK_CUFFT(S) do {cufftResult eCUFFTResult; \ - eCUFFTResult = S; \ - if (eCUFFTResult != NPP_SUCCESS) std::cout << "NPP_CHECK_CUFFT - eCUFFTResult = " << eCUFFTResult << std::endl; \ - NPP_ASSERT(eCUFFTResult == CUFFT_SUCCESS);} while (false) - -} // npp namespace - -#endif // NV_UTIL_NPP_EXCEPTIONS_H diff --git a/goconv/inc/helper_cuda.h b/goconv/inc/helper_cuda.h deleted file mode 100644 index b24684c..0000000 --- a/goconv/inc/helper_cuda.h +++ /dev/null @@ -1,1261 +0,0 @@ -/** - * Copyright 1993-2013 NVIDIA Corporation. All rights reserved. - * - * Please refer to the NVIDIA end user license agreement (EULA) associated - * with this source code for terms and conditions that govern your use of - * this software. Any use, reproduction, disclosure, or distribution of - * this software and related documentation outside the terms of the EULA - * is strictly prohibited. - * - */ - -//////////////////////////////////////////////////////////////////////////////// -// These are CUDA Helper functions for initialization and error checking - -#ifndef HELPER_CUDA_H -#define HELPER_CUDA_H - -#pragma once - -#include <stdlib.h> -#include <stdio.h> -#include <string.h> - -#include <helper_string.h> - -#ifndef EXIT_WAIVED -#define EXIT_WAIVED 2 -#endif - -// Note, it is required that your SDK sample to include the proper header files, please -// refer the CUDA examples for examples of the needed CUDA headers, which may change depending -// on which CUDA functions are used. - -// CUDA Runtime error messages -#ifdef __DRIVER_TYPES_H__ -static const char *_cudaGetErrorEnum(cudaError_t error) -{ - switch (error) - { - case cudaSuccess: - return "cudaSuccess"; - - case cudaErrorMissingConfiguration: - return "cudaErrorMissingConfiguration"; - - case cudaErrorMemoryAllocation: - return "cudaErrorMemoryAllocation"; - - case cudaErrorInitializationError: - return "cudaErrorInitializationError"; - - case cudaErrorLaunchFailure: - return "cudaErrorLaunchFailure"; - - case cudaErrorPriorLaunchFailure: - return "cudaErrorPriorLaunchFailure"; - - case cudaErrorLaunchTimeout: - return "cudaErrorLaunchTimeout"; - - case cudaErrorLaunchOutOfResources: - return "cudaErrorLaunchOutOfResources"; - - case cudaErrorInvalidDeviceFunction: - return "cudaErrorInvalidDeviceFunction"; - - case cudaErrorInvalidConfiguration: - return "cudaErrorInvalidConfiguration"; - - case cudaErrorInvalidDevice: - return "cudaErrorInvalidDevice"; - - case cudaErrorInvalidValue: - return "cudaErrorInvalidValue"; - - case cudaErrorInvalidPitchValue: - return "cudaErrorInvalidPitchValue"; - - case cudaErrorInvalidSymbol: - return "cudaErrorInvalidSymbol"; - - case cudaErrorMapBufferObjectFailed: - return "cudaErrorMapBufferObjectFailed"; - - case cudaErrorUnmapBufferObjectFailed: - return "cudaErrorUnmapBufferObjectFailed"; - - case cudaErrorInvalidHostPointer: - return "cudaErrorInvalidHostPointer"; - - case cudaErrorInvalidDevicePointer: - return "cudaErrorInvalidDevicePointer"; - - case cudaErrorInvalidTexture: - return "cudaErrorInvalidTexture"; - - case cudaErrorInvalidTextureBinding: - return "cudaErrorInvalidTextureBinding"; - - case cudaErrorInvalidChannelDescriptor: - return "cudaErrorInvalidChannelDescriptor"; - - case cudaErrorInvalidMemcpyDirection: - return "cudaErrorInvalidMemcpyDirection"; - - case cudaErrorAddressOfConstant: - return "cudaErrorAddressOfConstant"; - - case cudaErrorTextureFetchFailed: - return "cudaErrorTextureFetchFailed"; - - case cudaErrorTextureNotBound: - return "cudaErrorTextureNotBound"; - - case cudaErrorSynchronizationError: - return "cudaErrorSynchronizationError"; - - case cudaErrorInvalidFilterSetting: - return "cudaErrorInvalidFilterSetting"; - - case cudaErrorInvalidNormSetting: - return "cudaErrorInvalidNormSetting"; - - case cudaErrorMixedDeviceExecution: - return "cudaErrorMixedDeviceExecution"; - - case cudaErrorCudartUnloading: - return "cudaErrorCudartUnloading"; - - case cudaErrorUnknown: - return "cudaErrorUnknown"; - - case cudaErrorNotYetImplemented: - return "cudaErrorNotYetImplemented"; - - case cudaErrorMemoryValueTooLarge: - return "cudaErrorMemoryValueTooLarge"; - - case cudaErrorInvalidResourceHandle: - return "cudaErrorInvalidResourceHandle"; - - case cudaErrorNotReady: - return "cudaErrorNotReady"; - - case cudaErrorInsufficientDriver: - return "cudaErrorInsufficientDriver"; - - case cudaErrorSetOnActiveProcess: - return "cudaErrorSetOnActiveProcess"; - - case cudaErrorInvalidSurface: - return "cudaErrorInvalidSurface"; - - case cudaErrorNoDevice: - return "cudaErrorNoDevice"; - - case cudaErrorECCUncorrectable: - return "cudaErrorECCUncorrectable"; - - case cudaErrorSharedObjectSymbolNotFound: - return "cudaErrorSharedObjectSymbolNotFound"; - - case cudaErrorSharedObjectInitFailed: - return "cudaErrorSharedObjectInitFailed"; - - case cudaErrorUnsupportedLimit: - return "cudaErrorUnsupportedLimit"; - - case cudaErrorDuplicateVariableName: - return "cudaErrorDuplicateVariableName"; - - case cudaErrorDuplicateTextureName: - return "cudaErrorDuplicateTextureName"; - - case cudaErrorDuplicateSurfaceName: - return "cudaErrorDuplicateSurfaceName"; - - case cudaErrorDevicesUnavailable: - return "cudaErrorDevicesUnavailable"; - - case cudaErrorInvalidKernelImage: - return "cudaErrorInvalidKernelImage"; - - case cudaErrorNoKernelImageForDevice: - return "cudaErrorNoKernelImageForDevice"; - - case cudaErrorIncompatibleDriverContext: - return "cudaErrorIncompatibleDriverContext"; - - case cudaErrorPeerAccessAlreadyEnabled: - return "cudaErrorPeerAccessAlreadyEnabled"; - - case cudaErrorPeerAccessNotEnabled: - return "cudaErrorPeerAccessNotEnabled"; - - case cudaErrorDeviceAlreadyInUse: - return "cudaErrorDeviceAlreadyInUse"; - - case cudaErrorProfilerDisabled: - return "cudaErrorProfilerDisabled"; - - case cudaErrorProfilerNotInitialized: - return "cudaErrorProfilerNotInitialized"; - - case cudaErrorProfilerAlreadyStarted: - return "cudaErrorProfilerAlreadyStarted"; - - case cudaErrorProfilerAlreadyStopped: - return "cudaErrorProfilerAlreadyStopped"; - - /* Since CUDA 4.0*/ - case cudaErrorAssert: - return "cudaErrorAssert"; - - case cudaErrorTooManyPeers: - return "cudaErrorTooManyPeers"; - - case cudaErrorHostMemoryAlreadyRegistered: - return "cudaErrorHostMemoryAlreadyRegistered"; - - case cudaErrorHostMemoryNotRegistered: - return "cudaErrorHostMemoryNotRegistered"; - - /* Since CUDA 5.0 */ - case cudaErrorOperatingSystem: - return "cudaErrorOperatingSystem"; - - case cudaErrorPeerAccessUnsupported: - return "cudaErrorPeerAccessUnsupported"; - - case cudaErrorLaunchMaxDepthExceeded: - return "cudaErrorLaunchMaxDepthExceeded"; - - case cudaErrorLaunchFileScopedTex: - return "cudaErrorLaunchFileScopedTex"; - - case cudaErrorLaunchFileScopedSurf: - return "cudaErrorLaunchFileScopedSurf"; - - case cudaErrorSyncDepthExceeded: - return "cudaErrorSyncDepthExceeded"; - - case cudaErrorLaunchPendingCountExceeded: - return "cudaErrorLaunchPendingCountExceeded"; - - case cudaErrorNotPermitted: - return "cudaErrorNotPermitted"; - - case cudaErrorNotSupported: - return "cudaErrorNotSupported"; - - /* Since CUDA 6.0 */ - case cudaErrorHardwareStackError: - return "cudaErrorHardwareStackError"; - - case cudaErrorIllegalInstruction: - return "cudaErrorIllegalInstruction"; - - case cudaErrorMisalignedAddress: - return "cudaErrorMisalignedAddress"; - - case cudaErrorInvalidAddressSpace: - return "cudaErrorInvalidAddressSpace"; - - case cudaErrorInvalidPc: - return "cudaErrorInvalidPc"; - - case cudaErrorIllegalAddress: - return "cudaErrorIllegalAddress"; - - /* Since CUDA 6.5*/ - case cudaErrorInvalidPtx: - return "cudaErrorInvalidPtx"; - - case cudaErrorInvalidGraphicsContext: - return "cudaErrorInvalidGraphicsContext"; - - case cudaErrorStartupFailure: - return "cudaErrorStartupFailure"; - - case cudaErrorApiFailureBase: - return "cudaErrorApiFailureBase"; - } - - return "<unknown>"; -} -#endif - -#ifdef __cuda_cuda_h__ -// CUDA Driver API errors -static const char *_cudaGetErrorEnum(CUresult error) -{ - switch (error) - { - case CUDA_SUCCESS: - return "CUDA_SUCCESS"; - - case CUDA_ERROR_INVALID_VALUE: - return "CUDA_ERROR_INVALID_VALUE"; - - case CUDA_ERROR_OUT_OF_MEMORY: - return "CUDA_ERROR_OUT_OF_MEMORY"; - - case CUDA_ERROR_NOT_INITIALIZED: - return "CUDA_ERROR_NOT_INITIALIZED"; - - case CUDA_ERROR_DEINITIALIZED: - return "CUDA_ERROR_DEINITIALIZED"; - - case CUDA_ERROR_PROFILER_DISABLED: - return "CUDA_ERROR_PROFILER_DISABLED"; - - case CUDA_ERROR_PROFILER_NOT_INITIALIZED: - return "CUDA_ERROR_PROFILER_NOT_INITIALIZED"; - - case CUDA_ERROR_PROFILER_ALREADY_STARTED: - return "CUDA_ERROR_PROFILER_ALREADY_STARTED"; - - case CUDA_ERROR_PROFILER_ALREADY_STOPPED: - return "CUDA_ERROR_PROFILER_ALREADY_STOPPED"; - - case CUDA_ERROR_NO_DEVICE: - return "CUDA_ERROR_NO_DEVICE"; - - case CUDA_ERROR_INVALID_DEVICE: - return "CUDA_ERROR_INVALID_DEVICE"; - - case CUDA_ERROR_INVALID_IMAGE: - return "CUDA_ERROR_INVALID_IMAGE"; - - case CUDA_ERROR_INVALID_CONTEXT: - return "CUDA_ERROR_INVALID_CONTEXT"; - - case CUDA_ERROR_CONTEXT_ALREADY_CURRENT: - return "CUDA_ERROR_CONTEXT_ALREADY_CURRENT"; - - case CUDA_ERROR_MAP_FAILED: - return "CUDA_ERROR_MAP_FAILED"; - - case CUDA_ERROR_UNMAP_FAILED: - return "CUDA_ERROR_UNMAP_FAILED"; - - case CUDA_ERROR_ARRAY_IS_MAPPED: - return "CUDA_ERROR_ARRAY_IS_MAPPED"; - - case CUDA_ERROR_ALREADY_MAPPED: - return "CUDA_ERROR_ALREADY_MAPPED"; - - case CUDA_ERROR_NO_BINARY_FOR_GPU: - return "CUDA_ERROR_NO_BINARY_FOR_GPU"; - - case CUDA_ERROR_ALREADY_ACQUIRED: - return "CUDA_ERROR_ALREADY_ACQUIRED"; - - case CUDA_ERROR_NOT_MAPPED: - return "CUDA_ERROR_NOT_MAPPED"; - - case CUDA_ERROR_NOT_MAPPED_AS_ARRAY: - return "CUDA_ERROR_NOT_MAPPED_AS_ARRAY"; - - case CUDA_ERROR_NOT_MAPPED_AS_POINTER: - return "CUDA_ERROR_NOT_MAPPED_AS_POINTER"; - - case CUDA_ERROR_ECC_UNCORRECTABLE: - return "CUDA_ERROR_ECC_UNCORRECTABLE"; - - case CUDA_ERROR_UNSUPPORTED_LIMIT: - return "CUDA_ERROR_UNSUPPORTED_LIMIT"; - - case CUDA_ERROR_CONTEXT_ALREADY_IN_USE: - return "CUDA_ERROR_CONTEXT_ALREADY_IN_USE"; - - case CUDA_ERROR_PEER_ACCESS_UNSUPPORTED: - return "CUDA_ERROR_PEER_ACCESS_UNSUPPORTED"; - - case CUDA_ERROR_INVALID_PTX: - return "CUDA_ERROR_INVALID_PTX"; - - case CUDA_ERROR_INVALID_GRAPHICS_CONTEXT: - return "CUDA_ERROR_INVALID_GRAPHICS_CONTEXT"; - - case CUDA_ERROR_INVALID_SOURCE: - return "CUDA_ERROR_INVALID_SOURCE"; - - case CUDA_ERROR_FILE_NOT_FOUND: - return "CUDA_ERROR_FILE_NOT_FOUND"; - - case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND: - return "CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND"; - - case CUDA_ERROR_SHARED_OBJECT_INIT_FAILED: - return "CUDA_ERROR_SHARED_OBJECT_INIT_FAILED"; - - case CUDA_ERROR_OPERATING_SYSTEM: - return "CUDA_ERROR_OPERATING_SYSTEM"; - - case CUDA_ERROR_INVALID_HANDLE: - return "CUDA_ERROR_INVALID_HANDLE"; - - case CUDA_ERROR_NOT_FOUND: - return "CUDA_ERROR_NOT_FOUND"; - - case CUDA_ERROR_NOT_READY: - return "CUDA_ERROR_NOT_READY"; - - case CUDA_ERROR_ILLEGAL_ADDRESS: - return "CUDA_ERROR_ILLEGAL_ADDRESS"; - - case CUDA_ERROR_LAUNCH_FAILED: - return "CUDA_ERROR_LAUNCH_FAILED"; - - case CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES: - return "CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES"; - - case CUDA_ERROR_LAUNCH_TIMEOUT: - return "CUDA_ERROR_LAUNCH_TIMEOUT"; - - case CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING: - return "CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING"; - - case CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED: - return "CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED"; - - case CUDA_ERROR_PEER_ACCESS_NOT_ENABLED: - return "CUDA_ERROR_PEER_ACCESS_NOT_ENABLED"; - - case CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE: - return "CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE"; - - case CUDA_ERROR_CONTEXT_IS_DESTROYED: - return "CUDA_ERROR_CONTEXT_IS_DESTROYED"; - - case CUDA_ERROR_ASSERT: - return "CUDA_ERROR_ASSERT"; - - case CUDA_ERROR_TOO_MANY_PEERS: - return "CUDA_ERROR_TOO_MANY_PEERS"; - - case CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED: - return "CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED"; - - case CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED: - return "CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED"; - - case CUDA_ERROR_HARDWARE_STACK_ERROR: - return "CUDA_ERROR_HARDWARE_STACK_ERROR"; - - case CUDA_ERROR_ILLEGAL_INSTRUCTION: - return "CUDA_ERROR_ILLEGAL_INSTRUCTION"; - - case CUDA_ERROR_MISALIGNED_ADDRESS: - return "CUDA_ERROR_MISALIGNED_ADDRESS"; - - case CUDA_ERROR_INVALID_ADDRESS_SPACE: - return "CUDA_ERROR_INVALID_ADDRESS_SPACE"; - - case CUDA_ERROR_INVALID_PC: - return "CUDA_ERROR_INVALID_PC"; - - case CUDA_ERROR_NOT_PERMITTED: - return "CUDA_ERROR_NOT_PERMITTED"; - - case CUDA_ERROR_NOT_SUPPORTED: - return "CUDA_ERROR_NOT_SUPPORTED"; - - case CUDA_ERROR_UNKNOWN: - return "CUDA_ERROR_UNKNOWN"; - } - - return "<unknown>"; -} -#endif - -#ifdef CUBLAS_API_H_ -// cuBLAS API errors -static const char *_cudaGetErrorEnum(cublasStatus_t error) -{ - switch (error) - { - case CUBLAS_STATUS_SUCCESS: - return "CUBLAS_STATUS_SUCCESS"; - - case CUBLAS_STATUS_NOT_INITIALIZED: - return "CUBLAS_STATUS_NOT_INITIALIZED"; - - case CUBLAS_STATUS_ALLOC_FAILED: - return "CUBLAS_STATUS_ALLOC_FAILED"; - - case CUBLAS_STATUS_INVALID_VALUE: - return "CUBLAS_STATUS_INVALID_VALUE"; - - case CUBLAS_STATUS_ARCH_MISMATCH: - return "CUBLAS_STATUS_ARCH_MISMATCH"; - - case CUBLAS_STATUS_MAPPING_ERROR: - return "CUBLAS_STATUS_MAPPING_ERROR"; - - case CUBLAS_STATUS_EXECUTION_FAILED: - return "CUBLAS_STATUS_EXECUTION_FAILED"; - - case CUBLAS_STATUS_INTERNAL_ERROR: - return "CUBLAS_STATUS_INTERNAL_ERROR"; - - case CUBLAS_STATUS_NOT_SUPPORTED: - return "CUBLAS_STATUS_NOT_SUPPORTED"; - - case CUBLAS_STATUS_LICENSE_ERROR: - return "CUBLAS_STATUS_LICENSE_ERROR"; - } - - return "<unknown>"; -} -#endif - -#ifdef _CUFFT_H_ -// cuFFT API errors -static const char *_cudaGetErrorEnum(cufftResult error) -{ - switch (error) - { - case CUFFT_SUCCESS: - return "CUFFT_SUCCESS"; - - case CUFFT_INVALID_PLAN: - return "CUFFT_INVALID_PLAN"; - - case CUFFT_ALLOC_FAILED: - return "CUFFT_ALLOC_FAILED"; - - case CUFFT_INVALID_TYPE: - return "CUFFT_INVALID_TYPE"; - - case CUFFT_INVALID_VALUE: - return "CUFFT_INVALID_VALUE"; - - case CUFFT_INTERNAL_ERROR: - return "CUFFT_INTERNAL_ERROR"; - - case CUFFT_EXEC_FAILED: - return "CUFFT_EXEC_FAILED"; - - case CUFFT_SETUP_FAILED: - return "CUFFT_SETUP_FAILED"; - - case CUFFT_INVALID_SIZE: - return "CUFFT_INVALID_SIZE"; - - case CUFFT_UNALIGNED_DATA: - return "CUFFT_UNALIGNED_DATA"; - - case CUFFT_INCOMPLETE_PARAMETER_LIST: - return "CUFFT_INCOMPLETE_PARAMETER_LIST"; - - case CUFFT_INVALID_DEVICE: - return "CUFFT_INVALID_DEVICE"; - - case CUFFT_PARSE_ERROR: - return "CUFFT_PARSE_ERROR"; - - case CUFFT_NO_WORKSPACE: - return "CUFFT_NO_WORKSPACE"; - - case CUFFT_NOT_IMPLEMENTED: - return "CUFFT_NOT_IMPLEMENTED"; - - case CUFFT_LICENSE_ERROR: - return "CUFFT_LICENSE_ERROR"; - } - - return "<unknown>"; -} -#endif - - -#ifdef CUSPARSEAPI -// cuSPARSE API errors -static const char *_cudaGetErrorEnum(cusparseStatus_t error) -{ - switch (error) - { - case CUSPARSE_STATUS_SUCCESS: - return "CUSPARSE_STATUS_SUCCESS"; - - case CUSPARSE_STATUS_NOT_INITIALIZED: - return "CUSPARSE_STATUS_NOT_INITIALIZED"; - - case CUSPARSE_STATUS_ALLOC_FAILED: - return "CUSPARSE_STATUS_ALLOC_FAILED"; - - case CUSPARSE_STATUS_INVALID_VALUE: - return "CUSPARSE_STATUS_INVALID_VALUE"; - - case CUSPARSE_STATUS_ARCH_MISMATCH: - return "CUSPARSE_STATUS_ARCH_MISMATCH"; - - case CUSPARSE_STATUS_MAPPING_ERROR: - return "CUSPARSE_STATUS_MAPPING_ERROR"; - - case CUSPARSE_STATUS_EXECUTION_FAILED: - return "CUSPARSE_STATUS_EXECUTION_FAILED"; - - case CUSPARSE_STATUS_INTERNAL_ERROR: - return "CUSPARSE_STATUS_INTERNAL_ERROR"; - - case CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED: - return "CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED"; - } - - return "<unknown>"; -} -#endif - -#ifdef CUSOLVER_COMMON_H_ -//cuSOLVER API errors -static const char *_cudaGetErrorEnum(cusolverStatus_t error) -{ - switch(error) - { - case CUSOLVER_STATUS_SUCCESS: - return "CUSOLVER_STATUS_SUCCESS"; - case CUSOLVER_STATUS_NOT_INITIALIZED: - return "CUSOLVER_STATUS_NOT_INITIALIZED"; - case CUSOLVER_STATUS_ALLOC_FAILED: - return "CUSOLVER_STATUS_ALLOC_FAILED"; - case CUSOLVER_STATUS_INVALID_VALUE: - return "CUSOLVER_STATUS_INVALID_VALUE"; - case CUSOLVER_STATUS_ARCH_MISMATCH: - return "CUSOLVER_STATUS_ARCH_MISMATCH"; - case CUSOLVER_STATUS_MAPPING_ERROR: - return "CUSOLVER_STATUS_MAPPING_ERROR"; - case CUSOLVER_STATUS_EXECUTION_FAILED: - return "CUSOLVER_STATUS_EXECUTION_FAILED"; - case CUSOLVER_STATUS_INTERNAL_ERROR: - return "CUSOLVER_STATUS_INTERNAL_ERROR"; - case CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED: - return "CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED"; - case CUSOLVER_STATUS_NOT_SUPPORTED : - return "CUSOLVER_STATUS_NOT_SUPPORTED "; - case CUSOLVER_STATUS_ZERO_PIVOT: - return "CUSOLVER_STATUS_ZERO_PIVOT"; - case CUSOLVER_STATUS_INVALID_LICENSE: - return "CUSOLVER_STATUS_INVALID_LICENSE"; - } - - return "<unknown>"; - -} -#endif - -#ifdef CURAND_H_ -// cuRAND API errors -static const char *_cudaGetErrorEnum(curandStatus_t error) -{ - switch (error) - { - case CURAND_STATUS_SUCCESS: - return "CURAND_STATUS_SUCCESS"; - - case CURAND_STATUS_VERSION_MISMATCH: - return "CURAND_STATUS_VERSION_MISMATCH"; - - case CURAND_STATUS_NOT_INITIALIZED: - return "CURAND_STATUS_NOT_INITIALIZED"; - - case CURAND_STATUS_ALLOCATION_FAILED: - return "CURAND_STATUS_ALLOCATION_FAILED"; - - case CURAND_STATUS_TYPE_ERROR: - return "CURAND_STATUS_TYPE_ERROR"; - - case CURAND_STATUS_OUT_OF_RANGE: - return "CURAND_STATUS_OUT_OF_RANGE"; - - case CURAND_STATUS_LENGTH_NOT_MULTIPLE: - return "CURAND_STATUS_LENGTH_NOT_MULTIPLE"; - - case CURAND_STATUS_DOUBLE_PRECISION_REQUIRED: - return "CURAND_STATUS_DOUBLE_PRECISION_REQUIRED"; - - case CURAND_STATUS_LAUNCH_FAILURE: - return "CURAND_STATUS_LAUNCH_FAILURE"; - - case CURAND_STATUS_PREEXISTING_FAILURE: - return "CURAND_STATUS_PREEXISTING_FAILURE"; - - case CURAND_STATUS_INITIALIZATION_FAILED: - return "CURAND_STATUS_INITIALIZATION_FAILED"; - - case CURAND_STATUS_ARCH_MISMATCH: - return "CURAND_STATUS_ARCH_MISMATCH"; - - case CURAND_STATUS_INTERNAL_ERROR: - return "CURAND_STATUS_INTERNAL_ERROR"; - } - - return "<unknown>"; -} -#endif - -#ifdef NV_NPPIDEFS_H -// NPP API errors -static const char *_cudaGetErrorEnum(NppStatus error) -{ - switch (error) - { - case NPP_NOT_SUPPORTED_MODE_ERROR: - return "NPP_NOT_SUPPORTED_MODE_ERROR"; - - case NPP_ROUND_MODE_NOT_SUPPORTED_ERROR: - return "NPP_ROUND_MODE_NOT_SUPPORTED_ERROR"; - - case NPP_RESIZE_NO_OPERATION_ERROR: - return "NPP_RESIZE_NO_OPERATION_ERROR"; - - case NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY: - return "NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY"; - -#if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) <= 0x5000 - - case NPP_BAD_ARG_ERROR: - return "NPP_BAD_ARGUMENT_ERROR"; - - case NPP_COEFF_ERROR: - return "NPP_COEFFICIENT_ERROR"; - - case NPP_RECT_ERROR: - return "NPP_RECTANGLE_ERROR"; - - case NPP_QUAD_ERROR: - return "NPP_QUADRANGLE_ERROR"; - - case NPP_MEM_ALLOC_ERR: - return "NPP_MEMORY_ALLOCATION_ERROR"; - - case NPP_HISTO_NUMBER_OF_LEVELS_ERROR: - return "NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR"; - - case NPP_INVALID_INPUT: - return "NPP_INVALID_INPUT"; - - case NPP_POINTER_ERROR: - return "NPP_POINTER_ERROR"; - - case NPP_WARNING: - return "NPP_WARNING"; - - case NPP_ODD_ROI_WARNING: - return "NPP_ODD_ROI_WARNING"; -#else - - // These are for CUDA 5.5 or higher - case NPP_BAD_ARGUMENT_ERROR: - return "NPP_BAD_ARGUMENT_ERROR"; - - case NPP_COEFFICIENT_ERROR: - return "NPP_COEFFICIENT_ERROR"; - - case NPP_RECTANGLE_ERROR: - return "NPP_RECTANGLE_ERROR"; - - case NPP_QUADRANGLE_ERROR: - return "NPP_QUADRANGLE_ERROR"; - - case NPP_MEMORY_ALLOCATION_ERR: - return "NPP_MEMORY_ALLOCATION_ERROR"; - - case NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR: - return "NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR"; - - case NPP_INVALID_HOST_POINTER_ERROR: - return "NPP_INVALID_HOST_POINTER_ERROR"; - - case NPP_INVALID_DEVICE_POINTER_ERROR: - return "NPP_INVALID_DEVICE_POINTER_ERROR"; -#endif - - case NPP_LUT_NUMBER_OF_LEVELS_ERROR: - return "NPP_LUT_NUMBER_OF_LEVELS_ERROR"; - - case NPP_TEXTURE_BIND_ERROR: - return "NPP_TEXTURE_BIND_ERROR"; - - case NPP_WRONG_INTERSECTION_ROI_ERROR: - return "NPP_WRONG_INTERSECTION_ROI_ERROR"; - - case NPP_NOT_EVEN_STEP_ERROR: - return "NPP_NOT_EVEN_STEP_ERROR"; - - case NPP_INTERPOLATION_ERROR: - return "NPP_INTERPOLATION_ERROR"; - - case NPP_RESIZE_FACTOR_ERROR: - return "NPP_RESIZE_FACTOR_ERROR"; - - case NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR: - return "NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR"; - - -#if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) <= 0x5000 - - case NPP_MEMFREE_ERR: - return "NPP_MEMFREE_ERR"; - - case NPP_MEMSET_ERR: - return "NPP_MEMSET_ERR"; - - case NPP_MEMCPY_ERR: - return "NPP_MEMCPY_ERROR"; - - case NPP_MIRROR_FLIP_ERR: - return "NPP_MIRROR_FLIP_ERR"; -#else - - case NPP_MEMFREE_ERROR: - return "NPP_MEMFREE_ERROR"; - - case NPP_MEMSET_ERROR: - return "NPP_MEMSET_ERROR"; - - case NPP_MEMCPY_ERROR: - return "NPP_MEMCPY_ERROR"; - - case NPP_MIRROR_FLIP_ERROR: - return "NPP_MIRROR_FLIP_ERROR"; -#endif - - case NPP_ALIGNMENT_ERROR: - return "NPP_ALIGNMENT_ERROR"; - - case NPP_STEP_ERROR: - return "NPP_STEP_ERROR"; - - case NPP_SIZE_ERROR: - return "NPP_SIZE_ERROR"; - - case NPP_NULL_POINTER_ERROR: - return "NPP_NULL_POINTER_ERROR"; - - case NPP_CUDA_KERNEL_EXECUTION_ERROR: - return "NPP_CUDA_KERNEL_EXECUTION_ERROR"; - - case NPP_NOT_IMPLEMENTED_ERROR: - return "NPP_NOT_IMPLEMENTED_ERROR"; - - case NPP_ERROR: - return "NPP_ERROR"; - - case NPP_SUCCESS: - return "NPP_SUCCESS"; - - case NPP_WRONG_INTERSECTION_QUAD_WARNING: - return "NPP_WRONG_INTERSECTION_QUAD_WARNING"; - - case NPP_MISALIGNED_DST_ROI_WARNING: - return "NPP_MISALIGNED_DST_ROI_WARNING"; - - case NPP_AFFINE_QUAD_INCORRECT_WARNING: - return "NPP_AFFINE_QUAD_INCORRECT_WARNING"; - - case NPP_DOUBLE_SIZE_WARNING: - return "NPP_DOUBLE_SIZE_WARNING"; - - case NPP_WRONG_INTERSECTION_ROI_WARNING: - return "NPP_WRONG_INTERSECTION_ROI_WARNING"; - -#if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) >= 0x6000 - /* These are 6.0 or higher */ - case NPP_LUT_PALETTE_BITSIZE_ERROR: - return "NPP_LUT_PALETTE_BITSIZE_ERROR"; - - case NPP_ZC_MODE_NOT_SUPPORTED_ERROR: - return "NPP_ZC_MODE_NOT_SUPPORTED_ERROR"; - - case NPP_QUALITY_INDEX_ERROR: - return "NPP_QUALITY_INDEX_ERROR"; - - case NPP_CHANNEL_ORDER_ERROR: - return "NPP_CHANNEL_ORDER_ERROR"; - - case NPP_ZERO_MASK_VALUE_ERROR: - return "NPP_ZERO_MASK_VALUE_ERROR"; - - case NPP_NUMBER_OF_CHANNELS_ERROR: - return "NPP_NUMBER_OF_CHANNELS_ERROR"; - - case NPP_COI_ERROR: - return "NPP_COI_ERROR"; - - case NPP_DIVISOR_ERROR: - return "NPP_DIVISOR_ERROR"; - - case NPP_CHANNEL_ERROR: - return "NPP_CHANNEL_ERROR"; - - case NPP_STRIDE_ERROR: - return "NPP_STRIDE_ERROR"; - - case NPP_ANCHOR_ERROR: - return "NPP_ANCHOR_ERROR"; - - case NPP_MASK_SIZE_ERROR: - return "NPP_MASK_SIZE_ERROR"; - - case NPP_MOMENT_00_ZERO_ERROR: - return "NPP_MOMENT_00_ZERO_ERROR"; - - case NPP_THRESHOLD_NEGATIVE_LEVEL_ERROR: - return "NPP_THRESHOLD_NEGATIVE_LEVEL_ERROR"; - - case NPP_THRESHOLD_ERROR: - return "NPP_THRESHOLD_ERROR"; - - case NPP_CONTEXT_MATCH_ERROR: - return "NPP_CONTEXT_MATCH_ERROR"; - - case NPP_FFT_FLAG_ERROR: - return "NPP_FFT_FLAG_ERROR"; - - case NPP_FFT_ORDER_ERROR: - return "NPP_FFT_ORDER_ERROR"; - - case NPP_SCALE_RANGE_ERROR: - return "NPP_SCALE_RANGE_ERROR"; - - case NPP_DATA_TYPE_ERROR: - return "NPP_DATA_TYPE_ERROR"; - - case NPP_OUT_OFF_RANGE_ERROR: - return "NPP_OUT_OFF_RANGE_ERROR"; - - case NPP_DIVIDE_BY_ZERO_ERROR: - return "NPP_DIVIDE_BY_ZERO_ERROR"; - - case NPP_RANGE_ERROR: - return "NPP_RANGE_ERROR"; - - case NPP_NO_MEMORY_ERROR: - return "NPP_NO_MEMORY_ERROR"; - - case NPP_ERROR_RESERVED: - return "NPP_ERROR_RESERVED"; - - case NPP_NO_OPERATION_WARNING: - return "NPP_NO_OPERATION_WARNING"; - - case NPP_DIVIDE_BY_ZERO_WARNING: - return "NPP_DIVIDE_BY_ZERO_WARNING"; -#endif - - } - - return "<unknown>"; -} -#endif - -#ifdef __DRIVER_TYPES_H__ -#ifndef DEVICE_RESET -#define DEVICE_RESET cudaDeviceReset(); -#endif -#else -#ifndef DEVICE_RESET -#define DEVICE_RESET -#endif -#endif - -template< typename T > -void check(T result, char const *const func, const char *const file, int const line) -{ - if (result) - { - fprintf(stderr, "CUDA error at %s:%d code=%d(%s) \"%s\" \n", - file, line, static_cast<unsigned int>(result), _cudaGetErrorEnum(result), func); - DEVICE_RESET - // Make sure we call CUDA Device Reset before exiting - exit(EXIT_FAILURE); - } -} - -#ifdef __DRIVER_TYPES_H__ -// This will output the proper CUDA error strings in the event that a CUDA host call returns an error -#define checkCudaErrors(val) check ( (val), #val, __FILE__, __LINE__ ) - -// This will output the proper error string when calling cudaGetLastError -#define getLastCudaError(msg) __getLastCudaError (msg, __FILE__, __LINE__) - -inline void __getLastCudaError(const char *errorMessage, const char *file, const int line) -{ - cudaError_t err = cudaGetLastError(); - - if (cudaSuccess != err) - { - fprintf(stderr, "%s(%i) : getLastCudaError() CUDA error : %s : (%d) %s.\n", - file, line, errorMessage, (int)err, cudaGetErrorString(err)); - DEVICE_RESET - exit(EXIT_FAILURE); - } -} -#endif - -#ifndef MAX -#define MAX(a,b) (a > b ? a : b) -#endif - -// Float To Int conversion -inline int ftoi(float value) -{ - return (value >= 0 ? (int)(value + 0.5) : (int)(value - 0.5)); -} - -// Beginning of GPU Architecture definitions -inline int _ConvertSMVer2Cores(int major, int minor) -{ - // Defines for GPU Architecture types (using the SM version to determine the # of cores per SM - typedef struct - { - int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version - int Cores; - } sSMtoCores; - - sSMtoCores nGpuArchCoresPerSM[] = - { - { 0x20, 32 }, // Fermi Generation (SM 2.0) GF100 class - { 0x21, 48 }, // Fermi Generation (SM 2.1) GF10x class - { 0x30, 192}, // Kepler Generation (SM 3.0) GK10x class - { 0x32, 192}, // Kepler Generation (SM 3.2) GK10x class - { 0x35, 192}, // Kepler Generation (SM 3.5) GK11x class - { 0x37, 192}, // Kepler Generation (SM 3.7) GK21x class - { 0x50, 128}, // Maxwell Generation (SM 5.0) GM10x class - { 0x52, 128}, // Maxwell Generation (SM 5.2) GM20x class - { -1, -1 } - }; - - int index = 0; - - while (nGpuArchCoresPerSM[index].SM != -1) - { - if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor)) - { - return nGpuArchCoresPerSM[index].Cores; - } - - index++; - } - - // If we don't find the values, we default use the previous one to run properly - printf("MapSMtoCores for SM %d.%d is undefined. Default to use %d Cores/SM\n", major, minor, nGpuArchCoresPerSM[index-1].Cores); - return nGpuArchCoresPerSM[index-1].Cores; -} -// end of GPU Architecture definitions - -#ifdef __CUDA_RUNTIME_H__ -// General GPU Device CUDA Initialization -inline int gpuDeviceInit(int devID) -{ - int device_count; - checkCudaErrors(cudaGetDeviceCount(&device_count)); - - if (device_count == 0) - { - fprintf(stderr, "gpuDeviceInit() CUDA error: no devices supporting CUDA.\n"); - exit(EXIT_FAILURE); - } - - if (devID < 0) - { - devID = 0; - } - - if (devID > device_count-1) - { - fprintf(stderr, "\n"); - fprintf(stderr, ">> %d CUDA capable GPU device(s) detected. <<\n", device_count); - fprintf(stderr, ">> gpuDeviceInit (-device=%d) is not a valid GPU device. <<\n", devID); - fprintf(stderr, "\n"); - return -devID; - } - - cudaDeviceProp deviceProp; - checkCudaErrors(cudaGetDeviceProperties(&deviceProp, devID)); - - if (deviceProp.computeMode == cudaComputeModeProhibited) - { - fprintf(stderr, "Error: device is running in <Compute Mode Prohibited>, no threads can use ::cudaSetDevice().\n"); - return -1; - } - - if (deviceProp.major < 1) - { - fprintf(stderr, "gpuDeviceInit(): GPU device does not support CUDA.\n"); - exit(EXIT_FAILURE); - } - - checkCudaErrors(cudaSetDevice(devID)); - printf("gpuDeviceInit() CUDA Device [%d]: \"%s\n", devID, deviceProp.name); - - return devID; -} - -// This function returns the best GPU (with maximum GFLOPS) -inline int gpuGetMaxGflopsDeviceId() -{ - int current_device = 0, sm_per_multiproc = 0; - int max_perf_device = 0; - int device_count = 0, best_SM_arch = 0; - int devices_prohibited = 0; - - unsigned long long max_compute_perf = 0; - cudaDeviceProp deviceProp; - cudaGetDeviceCount(&device_count); - - checkCudaErrors(cudaGetDeviceCount(&device_count)); - - if (device_count == 0) - { - fprintf(stderr, "gpuGetMaxGflopsDeviceId() CUDA error: no devices supporting CUDA.\n"); - exit(EXIT_FAILURE); - } - - // Find the best major SM Architecture GPU device - while (current_device < device_count) - { - cudaGetDeviceProperties(&deviceProp, current_device); - - // If this GPU is not running on Compute Mode prohibited, then we can add it to the list - if (deviceProp.computeMode != cudaComputeModeProhibited) - { - if (deviceProp.major > 0 && deviceProp.major < 9999) - { - best_SM_arch = MAX(best_SM_arch, deviceProp.major); - } - } - else - { - devices_prohibited++; - } - - current_device++; - } - - if (devices_prohibited == device_count) - { - fprintf(stderr, "gpuGetMaxGflopsDeviceId() CUDA error: all devices have compute mode prohibited.\n"); - exit(EXIT_FAILURE); - } - - // Find the best CUDA capable GPU device - current_device = 0; - - while (current_device < device_count) - { - cudaGetDeviceProperties(&deviceProp, current_device); - - // If this GPU is not running on Compute Mode prohibited, then we can add it to the list - if (deviceProp.computeMode != cudaComputeModeProhibited) - { - if (deviceProp.major == 9999 && deviceProp.minor == 9999) - { - sm_per_multiproc = 1; - } - else - { - sm_per_multiproc = _ConvertSMVer2Cores(deviceProp.major, deviceProp.minor); - } - - unsigned long long compute_perf = (unsigned long long) deviceProp.multiProcessorCount * sm_per_multiproc * deviceProp.clockRate; - - if (compute_perf > max_compute_perf) - { - // If we find GPU with SM major > 2, search only these - if (best_SM_arch > 2) - { - // If our device==dest_SM_arch, choose this, or else pass - if (deviceProp.major == best_SM_arch) - { - max_compute_perf = compute_perf; - max_perf_device = current_device; - } - } - else - { - max_compute_perf = compute_perf; - max_perf_device = current_device; - } - } - } - - ++current_device; - } - - return max_perf_device; -} - - -// Initialization code to find the best CUDA Device -inline int findCudaDevice(int argc, const char **argv) -{ - cudaDeviceProp deviceProp; - int devID = 0; - - // If the command-line has a device number specified, use it - if (checkCmdLineFlag(argc, argv, "device")) - { - devID = getCmdLineArgumentInt(argc, argv, "device="); - - if (devID < 0) - { - printf("Invalid command line parameter\n "); - exit(EXIT_FAILURE); - } - else - { - devID = gpuDeviceInit(devID); - - if (devID < 0) - { - printf("exiting...\n"); - exit(EXIT_FAILURE); - } - } - } - else - { - // Otherwise pick the device with highest Gflops/s - devID = gpuGetMaxGflopsDeviceId(); - checkCudaErrors(cudaSetDevice(devID)); - checkCudaErrors(cudaGetDeviceProperties(&deviceProp, devID)); - printf("GPU Device %d: \"%s\" with compute capability %d.%d\n\n", devID, deviceProp.name, deviceProp.major, deviceProp.minor); - } - - return devID; -} - -// General check for CUDA GPU SM Capabilities -inline bool checkCudaCapabilities(int major_version, int minor_version) -{ - cudaDeviceProp deviceProp; - deviceProp.major = 0; - deviceProp.minor = 0; - int dev; - - checkCudaErrors(cudaGetDevice(&dev)); - checkCudaErrors(cudaGetDeviceProperties(&deviceProp, dev)); - - if ((deviceProp.major > major_version) || - (deviceProp.major == major_version && deviceProp.minor >= minor_version)) - { - printf(" Device %d: <%16s >, Compute SM %d.%d detected\n", dev, deviceProp.name, deviceProp.major, deviceProp.minor); - return true; - } - else - { - printf(" No GPU device was found that can support CUDA compute capability %d.%d.\n", major_version, minor_version); - return false; - } -} -#endif - -// end of CUDA Helper Functions - - -#endif diff --git a/goconv/inc/helper_string.h b/goconv/inc/helper_string.h deleted file mode 100644 index 9b68cc7..0000000 --- a/goconv/inc/helper_string.h +++ /dev/null @@ -1,526 +0,0 @@ -/** - * Copyright 1993-2013 NVIDIA Corporation. All rights reserved. - * - * Please refer to the NVIDIA end user license agreement (EULA) associated - * with this source code for terms and conditions that govern your use of - * this software. Any use, reproduction, disclosure, or distribution of - * this software and related documentation outside the terms of the EULA - * is strictly prohibited. - * - */ - -// These are helper functions for the SDK samples (string parsing, timers, etc) -#ifndef STRING_HELPER_H -#define STRING_HELPER_H - -#include <stdio.h> -#include <stdlib.h> -#include <fstream> -#include <string> - -#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) -#ifndef _CRT_SECURE_NO_DEPRECATE -#define _CRT_SECURE_NO_DEPRECATE -#endif -#ifndef STRCASECMP -#define STRCASECMP _stricmp -#endif -#ifndef STRNCASECMP -#define STRNCASECMP _strnicmp -#endif -#ifndef STRCPY -#define STRCPY(sFilePath, nLength, sPath) strcpy_s(sFilePath, nLength, sPath) -#endif - -#ifndef FOPEN -#define FOPEN(fHandle,filename,mode) fopen_s(&fHandle, filename, mode) -#endif -#ifndef FOPEN_FAIL -#define FOPEN_FAIL(result) (result != 0) -#endif -#ifndef SSCANF -#define SSCANF sscanf_s -#endif -#ifndef SPRINTF -#define SPRINTF sprintf_s -#endif -#else // Linux Includes -#include <string.h> -#include <strings.h> - -#ifndef STRCASECMP -#define STRCASECMP strcasecmp -#endif -#ifndef STRNCASECMP -#define STRNCASECMP strncasecmp -#endif -#ifndef STRCPY -#define STRCPY(sFilePath, nLength, sPath) strcpy(sFilePath, sPath) -#endif - -#ifndef FOPEN -#define FOPEN(fHandle,filename,mode) (fHandle = fopen(filename, mode)) -#endif -#ifndef FOPEN_FAIL -#define FOPEN_FAIL(result) (result == NULL) -#endif -#ifndef SSCANF -#define SSCANF sscanf -#endif -#ifndef SPRINTF -#define SPRINTF sprintf -#endif -#endif - -#ifndef EXIT_WAIVED -#define EXIT_WAIVED 2 -#endif - -// CUDA Utility Helper Functions -inline int stringRemoveDelimiter(char delimiter, const char *string) -{ - int string_start = 0; - - while (string[string_start] == delimiter) - { - string_start++; - } - - if (string_start >= (int)strlen(string)-1) - { - return 0; - } - - return string_start; -} - -inline int getFileExtension(char *filename, char **extension) -{ - int string_length = (int)strlen(filename); - - while (filename[string_length--] != '.') - { - if (string_length == 0) - break; - } - - if (string_length > 0) string_length += 2; - - if (string_length == 0) - *extension = NULL; - else - *extension = &filename[string_length]; - - return string_length; -} - - -inline bool checkCmdLineFlag(const int argc, const char **argv, const char *string_ref) -{ - bool bFound = false; - - if (argc >= 1) - { - for (int i=1; i < argc; i++) - { - int string_start = stringRemoveDelimiter('-', argv[i]); - const char *string_argv = &argv[i][string_start]; - - const char *equal_pos = strchr(string_argv, '='); - int argv_length = (int)(equal_pos == 0 ? strlen(string_argv) : equal_pos - string_argv); - - int length = (int)strlen(string_ref); - - if (length == argv_length && !STRNCASECMP(string_argv, string_ref, length)) - { - bFound = true; - continue; - } - } - } - - return bFound; -} - -// This function wraps the CUDA Driver API into a template function -template <class T> -inline bool getCmdLineArgumentValue(const int argc, const char **argv, const char *string_ref, T *value) -{ - bool bFound = false; - - if (argc >= 1) - { - for (int i=1; i < argc; i++) - { - int string_start = stringRemoveDelimiter('-', argv[i]); - const char *string_argv = &argv[i][string_start]; - int length = (int)strlen(string_ref); - - if (!STRNCASECMP(string_argv, string_ref, length)) - { - if (length+1 <= (int)strlen(string_argv)) - { - int auto_inc = (string_argv[length] == '=') ? 1 : 0; - *value = (T)atoi(&string_argv[length + auto_inc]); - } - - bFound = true; - i=argc; - } - } - } - - return bFound; -} - -inline int getCmdLineArgumentInt(const int argc, const char **argv, const char *string_ref) -{ - bool bFound = false; - int value = -1; - - if (argc >= 1) - { - for (int i=1; i < argc; i++) - { - int string_start = stringRemoveDelimiter('-', argv[i]); - const char *string_argv = &argv[i][string_start]; - int length = (int)strlen(string_ref); - - if (!STRNCASECMP(string_argv, string_ref, length)) - { - if (length+1 <= (int)strlen(string_argv)) - { - int auto_inc = (string_argv[length] == '=') ? 1 : 0; - value = atoi(&string_argv[length + auto_inc]); - } - else - { - value = 0; - } - - bFound = true; - continue; - } - } - } - - if (bFound) - { - return value; - } - else - { - return 0; - } -} - -inline float getCmdLineArgumentFloat(const int argc, const char **argv, const char *string_ref) -{ - bool bFound = false; - float value = -1; - - if (argc >= 1) - { - for (int i=1; i < argc; i++) - { - int string_start = stringRemoveDelimiter('-', argv[i]); - const char *string_argv = &argv[i][string_start]; - int length = (int)strlen(string_ref); - - if (!STRNCASECMP(string_argv, string_ref, length)) - { - if (length+1 <= (int)strlen(string_argv)) - { - int auto_inc = (string_argv[length] == '=') ? 1 : 0; - value = (float)atof(&string_argv[length + auto_inc]); - } - else - { - value = 0.f; - } - - bFound = true; - continue; - } - } - } - - if (bFound) - { - return value; - } - else - { - return 0; - } -} - -inline bool getCmdLineArgumentString(const int argc, const char **argv, - const char *string_ref, char **string_retval) -{ - bool bFound = false; - - if (argc >= 1) - { - for (int i=1; i < argc; i++) - { - int string_start = stringRemoveDelimiter('-', argv[i]); - char *string_argv = (char *)&argv[i][string_start]; - int length = (int)strlen(string_ref); - - if (!STRNCASECMP(string_argv, string_ref, length)) - { - *string_retval = &string_argv[length+1]; - bFound = true; - continue; - } - } - } - - if (!bFound) - { - *string_retval = NULL; - } - - return bFound; -} - -////////////////////////////////////////////////////////////////////////////// -//! Find the path for a file assuming that -//! files are found in the searchPath. -//! -//! @return the path if succeeded, otherwise 0 -//! @param filename name of the file -//! @param executable_path optional absolute path of the executable -////////////////////////////////////////////////////////////////////////////// -inline char *sdkFindFilePath(const char *filename, const char *executable_path) -{ - // <executable_name> defines a variable that is replaced with the name of the executable - - // Typical relative search paths to locate needed companion files (e.g. sample input data, or JIT source files) - // The origin for the relative search may be the .exe file, a .bat file launching an .exe, a browser .exe launching the .exe or .bat, etc - const char *searchPath[] = - { - "./", // same dir - "./common/", // "/common/" subdir - "./common/data/", // "/common/data/" subdir - "./data/", // "/data/" subdir - "./src/", // "/src/" subdir - "./src/<executable_name>/data/", // "/src/<executable_name>/data/" subdir - "./inc/", // "/inc/" subdir - "./0_Simple/", // "/0_Simple/" subdir - "./1_Utilities/", // "/1_Utilities/" subdir - "./2_Graphics/", // "/2_Graphics/" subdir - "./3_Imaging/", // "/3_Imaging/" subdir - "./4_Finance/", // "/4_Finance/" subdir - "./5_Simulations/", // "/5_Simulations/" subdir - "./6_Advanced/", // "/6_Advanced/" subdir - "./7_CUDALibraries/", // "/7_CUDALibraries/" subdir - "./8_Android/", // "/8_Android/" subdir - "./samples/", // "/samples/" subdir - - "./0_Simple/<executable_name>/data/", // "/0_Simple/<executable_name>/data/" subdir - "./1_Utilities/<executable_name>/data/", // "/1_Utilities/<executable_name>/data/" subdir - "./2_Graphics/<executable_name>/data/", // "/2_Graphics/<executable_name>/data/" subdir - "./3_Imaging/<executable_name>/data/", // "/3_Imaging/<executable_name>/data/" subdir - "./4_Finance/<executable_name>/data/", // "/4_Finance/<executable_name>/data/" subdir - "./5_Simulations/<executable_name>/data/", // "/5_Simulations/<executable_name>/data/" subdir - "./6_Advanced/<executable_name>/data/", // "/6_Advanced/<executable_name>/data/" subdir - "./7_CUDALibraries/<executable_name>/", // "/7_CUDALibraries/<executable_name>/" subdir - "./7_CUDALibraries/<executable_name>/data/", // "/7_CUDALibraries/<executable_name>/data/" subdir - - "../", // up 1 in tree - "../common/", // up 1 in tree, "/common/" subdir - "../common/data/", // up 1 in tree, "/common/data/" subdir - "../data/", // up 1 in tree, "/data/" subdir - "../src/", // up 1 in tree, "/src/" subdir - "../inc/", // up 1 in tree, "/inc/" subdir - - "../0_Simple/<executable_name>/data/", // up 1 in tree, "/0_Simple/<executable_name>/" subdir - "../1_Utilities/<executable_name>/data/", // up 1 in tree, "/1_Utilities/<executable_name>/" subdir - "../2_Graphics/<executable_name>/data/", // up 1 in tree, "/2_Graphics/<executable_name>/" subdir - "../3_Imaging/<executable_name>/data/", // up 1 in tree, "/3_Imaging/<executable_name>/" subdir - "../4_Finance/<executable_name>/data/", // up 1 in tree, "/4_Finance/<executable_name>/" subdir - "../5_Simulations/<executable_name>/data/", // up 1 in tree, "/5_Simulations/<executable_name>/" subdir - "../6_Advanced/<executable_name>/data/", // up 1 in tree, "/6_Advanced/<executable_name>/" subdir - "../7_CUDALibraries/<executable_name>/data/",// up 1 in tree, "/7_CUDALibraries/<executable_name>/" subdir - "../8_Android/<executable_name>/data/", // up 1 in tree, "/8_Android/<executable_name>/" subdir - "../samples/<executable_name>/data/", // up 1 in tree, "/samples/<executable_name>/" subdir - "../../", // up 2 in tree - "../../common/", // up 2 in tree, "/common/" subdir - "../../common/data/", // up 2 in tree, "/common/data/" subdir - "../../data/", // up 2 in tree, "/data/" subdir - "../../src/", // up 2 in tree, "/src/" subdir - "../../inc/", // up 2 in tree, "/inc/" subdir - "../../sandbox/<executable_name>/data/", // up 2 in tree, "/sandbox/<executable_name>/" subdir - "../../0_Simple/<executable_name>/data/", // up 2 in tree, "/0_Simple/<executable_name>/" subdir - "../../1_Utilities/<executable_name>/data/", // up 2 in tree, "/1_Utilities/<executable_name>/" subdir - "../../2_Graphics/<executable_name>/data/", // up 2 in tree, "/2_Graphics/<executable_name>/" subdir - "../../3_Imaging/<executable_name>/data/", // up 2 in tree, "/3_Imaging/<executable_name>/" subdir - "../../4_Finance/<executable_name>/data/", // up 2 in tree, "/4_Finance/<executable_name>/" subdir - "../../5_Simulations/<executable_name>/data/", // up 2 in tree, "/5_Simulations/<executable_name>/" subdir - "../../6_Advanced/<executable_name>/data/", // up 2 in tree, "/6_Advanced/<executable_name>/" subdir - "../../7_CUDALibraries/<executable_name>/data/", // up 2 in tree, "/7_CUDALibraries/<executable_name>/" subdir - "../../8_Android/<executable_name>/data/", // up 2 in tree, "/8_Android/<executable_name>/" subdir - "../../samples/<executable_name>/data/", // up 2 in tree, "/samples/<executable_name>/" subdir - "../../../", // up 3 in tree - "../../../src/<executable_name>/", // up 3 in tree, "/src/<executable_name>/" subdir - "../../../src/<executable_name>/data/", // up 3 in tree, "/src/<executable_name>/data/" subdir - "../../../src/<executable_name>/src/", // up 3 in tree, "/src/<executable_name>/src/" subdir - "../../../src/<executable_name>/inc/", // up 3 in tree, "/src/<executable_name>/inc/" subdir - "../../../sandbox/<executable_name>/", // up 3 in tree, "/sandbox/<executable_name>/" subdir - "../../../sandbox/<executable_name>/data/", // up 3 in tree, "/sandbox/<executable_name>/data/" subdir - "../../../sandbox/<executable_name>/src/", // up 3 in tree, "/sandbox/<executable_name>/src/" subdir - "../../../sandbox/<executable_name>/inc/", // up 3 in tree, "/sandbox/<executable_name>/inc/" subdir - "../../../0_Simple/<executable_name>/data/", // up 3 in tree, "/0_Simple/<executable_name>/" subdir - "../../../1_Utilities/<executable_name>/data/", // up 3 in tree, "/1_Utilities/<executable_name>/" subdir - "../../../2_Graphics/<executable_name>/data/", // up 3 in tree, "/2_Graphics/<executable_name>/" subdir - "../../../3_Imaging/<executable_name>/data/", // up 3 in tree, "/3_Imaging/<executable_name>/" subdir - "../../../4_Finance/<executable_name>/data/", // up 3 in tree, "/4_Finance/<executable_name>/" subdir - "../../../5_Simulations/<executable_name>/data/", // up 3 in tree, "/5_Simulations/<executable_name>/" subdir - "../../../6_Advanced/<executable_name>/data/", // up 3 in tree, "/6_Advanced/<executable_name>/" subdir - "../../../7_CUDALibraries/<executable_name>/data/", // up 3 in tree, "/7_CUDALibraries/<executable_name>/" subdir - "../../../8_Android/<executable_name>/data/", // up 3 in tree, "/8_Android/<executable_name>/" subdir - "../../../0_Simple/<executable_name>/", // up 3 in tree, "/0_Simple/<executable_name>/" subdir - "../../../1_Utilities/<executable_name>/", // up 3 in tree, "/1_Utilities/<executable_name>/" subdir - "../../../2_Graphics/<executable_name>/", // up 3 in tree, "/2_Graphics/<executable_name>/" subdir - "../../../3_Imaging/<executable_name>/", // up 3 in tree, "/3_Imaging/<executable_name>/" subdir - "../../../4_Finance/<executable_name>/", // up 3 in tree, "/4_Finance/<executable_name>/" subdir - "../../../5_Simulations/<executable_name>/", // up 3 in tree, "/5_Simulations/<executable_name>/" subdir - "../../../6_Advanced/<executable_name>/", // up 3 in tree, "/6_Advanced/<executable_name>/" subdir - "../../../7_CUDALibraries/<executable_name>/", // up 3 in tree, "/7_CUDALibraries/<executable_name>/" subdir - "../../../8_Android/<executable_name>/", // up 3 in tree, "/8_Android/<executable_name>/" subdir - "../../../samples/<executable_name>/data/", // up 3 in tree, "/samples/<executable_name>/" subdir - "../../../common/", // up 3 in tree, "../../../common/" subdir - "../../../common/data/", // up 3 in tree, "../../../common/data/" subdir - "../../../data/", // up 3 in tree, "../../../data/" subdir - "../../../../", // up 4 in tree - "../../../../src/<executable_name>/", // up 4 in tree, "/src/<executable_name>/" subdir - "../../../../src/<executable_name>/data/", // up 4 in tree, "/src/<executable_name>/data/" subdir - "../../../../src/<executable_name>/src/", // up 4 in tree, "/src/<executable_name>/src/" subdir - "../../../../src/<executable_name>/inc/", // up 4 in tree, "/src/<executable_name>/inc/" subdir - "../../../../sandbox/<executable_name>/", // up 4 in tree, "/sandbox/<executable_name>/" subdir - "../../../../sandbox/<executable_name>/data/", // up 4 in tree, "/sandbox/<executable_name>/data/" subdir - "../../../../sandbox/<executable_name>/src/", // up 4 in tree, "/sandbox/<executable_name>/src/" subdir - "../../../../sandbox/<executable_name>/inc/", // up 4 in tree, "/sandbox/<executable_name>/inc/" subdir - "../../../../0_Simple/<executable_name>/data/", // up 4 in tree, "/0_Simple/<executable_name>/" subdir - "../../../../1_Utilities/<executable_name>/data/", // up 4 in tree, "/1_Utilities/<executable_name>/" subdir - "../../../../2_Graphics/<executable_name>/data/", // up 4 in tree, "/2_Graphics/<executable_name>/" subdir - "../../../../3_Imaging/<executable_name>/data/", // up 4 in tree, "/3_Imaging/<executable_name>/" subdir - "../../../../4_Finance/<executable_name>/data/", // up 4 in tree, "/4_Finance/<executable_name>/" subdir - "../../../../5_Simulations/<executable_name>/data/",// up 4 in tree, "/5_Simulations/<executable_name>/" subdir - "../../../../6_Advanced/<executable_name>/data/", // up 4 in tree, "/6_Advanced/<executable_name>/" subdir - "../../../../7_CUDALibraries/<executable_name>/data/", // up 4 in tree, "/7_CUDALibraries/<executable_name>/" subdir - "../../../../8_Android/<executable_name>/data/", // up 4 in tree, "/8_Android/<executable_name>/" subdir - "../../../../0_Simple/<executable_name>/", // up 4 in tree, "/0_Simple/<executable_name>/" subdir - "../../../../1_Utilities/<executable_name>/", // up 4 in tree, "/1_Utilities/<executable_name>/" subdir - "../../../../2_Graphics/<executable_name>/", // up 4 in tree, "/2_Graphics/<executable_name>/" subdir - "../../../../3_Imaging/<executable_name>/", // up 4 in tree, "/3_Imaging/<executable_name>/" subdir - "../../../../4_Finance/<executable_name>/", // up 4 in tree, "/4_Finance/<executable_name>/" subdir - "../../../../5_Simulations/<executable_name>/",// up 4 in tree, "/5_Simulations/<executable_name>/" subdir - "../../../../6_Advanced/<executable_name>/", // up 4 in tree, "/6_Advanced/<executable_name>/" subdir - "../../../../7_CUDALibraries/<executable_name>/", // up 4 in tree, "/7_CUDALibraries/<executable_name>/" subdir - "../../../../8_Android/<executable_name>/", // up 4 in tree, "/8_Android/<executable_name>/" subdir - "../../../../samples/<executable_name>/data/", // up 4 in tree, "/samples/<executable_name>/" subdir - "../../../../common/", // up 4 in tree, "../../../common/" subdir - "../../../../common/data/", // up 4 in tree, "../../../common/data/" subdir - "../../../../data/", // up 4 in tree, "../../../data/" subdir - "../../../../../", // up 5 in tree - "../../../../../src/<executable_name>/", // up 5 in tree, "/src/<executable_name>/" subdir - "../../../../../src/<executable_name>/data/", // up 5 in tree, "/src/<executable_name>/data/" subdir - "../../../../../src/<executable_name>/src/", // up 5 in tree, "/src/<executable_name>/src/" subdir - "../../../../../src/<executable_name>/inc/", // up 5 in tree, "/src/<executable_name>/inc/" subdir - "../../../../../sandbox/<executable_name>/", // up 5 in tree, "/sandbox/<executable_name>/" subdir - "../../../../../sandbox/<executable_name>/data/", // up 5 in tree, "/sandbox/<executable_name>/data/" subdir - "../../../../../sandbox/<executable_name>/src/", // up 5 in tree, "/sandbox/<executable_name>/src/" subdir - "../../../../../sandbox/<executable_name>/inc/", // up 5 in tree, "/sandbox/<executable_name>/inc/" subdir - "../../../../../0_Simple/<executable_name>/data/", // up 5 in tree, "/0_Simple/<executable_name>/" subdir - "../../../../../1_Utilities/<executable_name>/data/", // up 5 in tree, "/1_Utilities/<executable_name>/" subdir - "../../../../../2_Graphics/<executable_name>/data/", // up 5 in tree, "/2_Graphics/<executable_name>/" subdir - "../../../../../3_Imaging/<executable_name>/data/", // up 5 in tree, "/3_Imaging/<executable_name>/" subdir - "../../../../../4_Finance/<executable_name>/data/", // up 5 in tree, "/4_Finance/<executable_name>/" subdir - "../../../../../5_Simulations/<executable_name>/data/",// up 5 in tree, "/5_Simulations/<executable_name>/" subdir - "../../../../../6_Advanced/<executable_name>/data/", // up 5 in tree, "/6_Advanced/<executable_name>/" subdir - "../../../../../7_CUDALibraries/<executable_name>/data/", // up 5 in tree, "/7_CUDALibraries/<executable_name>/" subdir - "../../../../../8_Android/<executable_name>/data/", // up 5 in tree, "/8_Android/<executable_name>/" subdir - "../../../../../samples/<executable_name>/data/", // up 5 in tree, "/samples/<executable_name>/" subdir - "../../../../../common/", // up 5 in tree, "../../../common/" subdir - "../../../../../common/data/", // up 5 in tree, "../../../common/data/" subdir - }; - - // Extract the executable name - std::string executable_name; - - if (executable_path != 0) - { - executable_name = std::string(executable_path); - -#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) - // Windows path delimiter - size_t delimiter_pos = executable_name.find_last_of('\\'); - executable_name.erase(0, delimiter_pos + 1); - - if (executable_name.rfind(".exe") != std::string::npos) - { - // we strip .exe, only if the .exe is found - executable_name.resize(executable_name.size() - 4); - } - -#else - // Linux & OSX path delimiter - size_t delimiter_pos = executable_name.find_last_of('/'); - executable_name.erase(0,delimiter_pos+1); -#endif - } - - // Loop over all search paths and return the first hit - for (unsigned int i = 0; i < sizeof(searchPath)/sizeof(char *); ++i) - { - std::string path(searchPath[i]); - size_t executable_name_pos = path.find("<executable_name>"); - - // If there is executable_name variable in the searchPath - // replace it with the value - if (executable_name_pos != std::string::npos) - { - if (executable_path != 0) - { - path.replace(executable_name_pos, strlen("<executable_name>"), executable_name); - } - else - { - // Skip this path entry if no executable argument is given - continue; - } - } - -#ifdef _DEBUG - printf("sdkFindFilePath <%s> in %s\n", filename, path.c_str()); -#endif - - // Test if the file exists - path.append(filename); - FILE *fp; - FOPEN(fp, path.c_str(), "rb"); - - if (fp != NULL) - { - fclose(fp); - // File found - // returning an allocated array here for backwards compatibility reasons - char *file_path = (char *) malloc(path.length() + 1); - STRCPY(file_path, path.length() + 1, path.c_str()); - return file_path; - } - - if (fp) - { - fclose(fp); - } - } - - // File not found - return 0; -} - -#endif -- Gitblit v1.8.0