| | |
| | | |
| | | go 1.12 |
| | | |
| | | require basic.com/valib/goffmpeg.git v0.0.0-20191129092141-7363d175bd62 |
| | | require ( |
| | | basic.com/valib/godraw.git v0.0.0-20191122082247-26e9987cd183 |
| | | basic.com/valib/goffmpeg.git v0.0.0-20191203025021-783b80757681 |
| | | basic.com/valib/gogpu.git v0.0.0-20190711044327-62043b070865 |
| | | github.com/disintegration/imaging v1.6.2 |
| | | github.com/llgcode/draw2d v0.0.0-20190810100245-79e59b6b8fbc // indirect |
| | | ) |
| | |
| | | basic.com/valib/goffmpeg.git v0.0.0-20191129092141-7363d175bd62 h1:KzkPzJE76RkHeYBgAMfSiz1vzJaQRKkRDCXnw2XmxqA= |
| | | basic.com/valib/goffmpeg.git v0.0.0-20191129092141-7363d175bd62/go.mod h1:1x75Hawh/BjgPsQtuJ24px89gzk3uAslD8e0Xs6Z7GQ= |
| | | basic.com/valib/godraw.git v0.0.0-20191122082247-26e9987cd183 h1:QQ1L0Ev4vcSD23d99+rW5S/mnmdTAPAI2GZ7tkMgCE4= |
| | | basic.com/valib/godraw.git v0.0.0-20191122082247-26e9987cd183/go.mod h1:LntbWczdG87utrKx7rWYmIh1VZ+X2oPN7Q2IXb6oRE0= |
| | | basic.com/valib/goffmpeg.git v0.0.0-20191203025021-783b80757681 h1:n5zinCkvVghdKw0ZenxMo+lFjaXhHSr9ecICuQZLjNw= |
| | | basic.com/valib/goffmpeg.git v0.0.0-20191203025021-783b80757681/go.mod h1:1x75Hawh/BjgPsQtuJ24px89gzk3uAslD8e0Xs6Z7GQ= |
| | | basic.com/valib/gogpu.git v0.0.0-20190711044327-62043b070865 h1:3XvkNdRlJDXV45ie8U0uGA9ImJZtyTT0C/h+4Rizv0Y= |
| | | basic.com/valib/gogpu.git v0.0.0-20190711044327-62043b070865/go.mod h1:yxux5RP4A6a591vWljXxGlHdERVVyWDD3TwwQjuyogw= |
| | | github.com/disintegration/imaging v1.6.2 h1:w1LecBlG2Lnp8B3jk5zSuNqd7b4DXhcjwek1ei82L+c= |
| | | github.com/disintegration/imaging v1.6.2/go.mod h1:44/5580QXChDfwIclfc/PCwrr44amcmDAg8hxG0Ewe4= |
| | | github.com/go-gl/gl v0.0.0-20180407155706-68e253793080/go.mod h1:482civXOzJJCPzJ4ZOX/pwvXBWSnzD4OKMdH4ClKGbk= |
| | | github.com/go-gl/glfw v0.0.0-20180426074136-46a8d530c326/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU= |
| | | github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0 h1:DACJavvAHhabrF08vX0COfcOBJRhZ8lUbR+ZWIs0Y5g= |
| | | github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k= |
| | | github.com/jung-kurt/gofpdf v1.0.0/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes= |
| | | github.com/llgcode/draw2d v0.0.0-20190810100245-79e59b6b8fbc h1:ZvNhCJfRtl03A0VOIfvO9W22/0b6dmn1APa4Q6j9oHM= |
| | | github.com/llgcode/draw2d v0.0.0-20190810100245-79e59b6b8fbc/go.mod h1:mVa0dA29Db2S4LVqDYLlsePDzRJLDfdhVZiI15uY0FA= |
| | | github.com/llgcode/ps v0.0.0-20150911083025-f1443b32eedb/go.mod h1:1l8ky+Ew27CMX29uG+a2hNOKpeNYEQjjtiALiBlFQbY= |
| | | golang.org/x/image v0.0.0-20180708004352-c73c2afc3b81/go.mod h1:ux5Hcp/YLpHSI86hEcLt0YII63i6oz57MZXIpbrjZUs= |
| | | golang.org/x/image v0.0.0-20191009234506-e7c1f5e7dbb8 h1:hVwzHzIUGRjiF7EcUjqNxk3NCfkPxbDKRdnNE1Rpg0U= |
| | | golang.org/x/image v0.0.0-20191009234506-e7c1f5e7dbb8/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= |
| | | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= |
New file |
| | |
| | | #include "conv.h" |
| | | |
| | | #include <cmath> |
| | | #include <sys/time.h> |
| | | |
| | | #include <npp.h> |
| | | #include <helper_cuda.h> |
| | | #include <helper_string.h> |
| | | #include "Exceptions.h" |
| | | |
| | | |
| | | static const int MEMORY_ALGN_DEVICE = 511; |
| | | static const int HD_MEMORY_ALGN_DEVICE = 511; |
| | | |
| | | static inline int DivUp(int x, int d) |
| | | { |
| | | return (x + d - 1) / d; |
| | | } |
| | | |
| | | static int set_data(uint8_t *data, const int width, const int height, unsigned char *mY, unsigned char *mU, unsigned char *mV) |
| | | { |
| | | uint8_t* yuv_data = (uint8_t*)data; |
| | | if (!yuv_data) |
| | | { |
| | | return -1; |
| | | } |
| | | |
| | | uint32_t i, j; |
| | | uint32_t off; |
| | | uint32_t off_yuv; |
| | | uint32_t half_h; |
| | | uint32_t half_w; |
| | | uint32_t u_size; |
| | | uint8_t* yuv_ptr; |
| | | uint8_t* u_ptr; |
| | | uint8_t* v_ptr; |
| | | |
| | | int w = width; |
| | | int h = height; |
| | | |
| | | //从这一句来看,即使是同一种格式,进来也要处理一下。 |
| | | size_t nPitch = (w + HD_MEMORY_ALGN_DEVICE) & ~HD_MEMORY_ALGN_DEVICE; |
| | | off = 0; |
| | | off_yuv = 0; |
| | | for (i = 0; i < (uint32_t)h; i++) |
| | | { |
| | | memcpy(mY + off, yuv_data + off_yuv, w); |
| | | off += nPitch; |
| | | off_yuv += w; |
| | | } |
| | | |
| | | half_w = w >> 1; |
| | | half_h = h >> 1; |
| | | u_size = half_w * half_h; |
| | | nPitch = (half_w + HD_MEMORY_ALGN_DEVICE) & ~HD_MEMORY_ALGN_DEVICE; |
| | | |
| | | off_yuv = w * h; |
| | | off = 0; |
| | | for (i = 0; i < half_h; i++) |
| | | { |
| | | yuv_ptr = yuv_data + off_yuv; |
| | | u_ptr = mU + off; |
| | | v_ptr = mV + off; |
| | | for (j = 0; j < (uint32_t)w; j += 2) |
| | | { |
| | | *u_ptr++ = *yuv_ptr++; |
| | | *v_ptr++ = *yuv_ptr++; |
| | | } |
| | | off_yuv += w; |
| | | off += nPitch; |
| | | } |
| | | |
| | | return 0; |
| | | } |
| | | |
| | | /////////////handle |
| | | class convertor{ |
| | | public: |
| | | convertor(const int srcW, const int srcH, const int dstW, const int dstH, const int gpu); |
| | | ~convertor(); |
| | | int yuv2bgr(unsigned char **bgr, int *bgrLen); |
| | | int resize2bgr(unsigned char *in, unsigned char **data, int *data_len); |
| | | int resizeyuv(unsigned char *in, unsigned char **data, int *data_len); |
| | | int fill_yuv(const unsigned char *yuv); |
| | | private: |
| | | void init_yuv(); |
| | | void init_resize(); |
| | | void init_resize_bgr(); |
| | | void init_resize_yuv(); |
| | | private: |
| | | int width; |
| | | int height; |
| | | |
| | | unsigned char aSamplingFactors[3]; |
| | | int nMCUBlocksH; |
| | | int nMCUBlocksV; |
| | | |
| | | Npp8u *apSrcImage[3]; |
| | | NppiSize aSrcSize[3]; |
| | | Npp32s aSrcImageStep[3]; |
| | | size_t aSrcPitch[3]; |
| | | |
| | | uint8_t *mY; |
| | | uint8_t *mU; |
| | | uint8_t *mV; |
| | | |
| | | /////////////////////////// |
| | | int rWidth; |
| | | int rHeight; |
| | | float fx; |
| | | float fy; |
| | | |
| | | Npp8u *apDstImage[3] = {0,0,0}; |
| | | Npp32s aDstImageStep[3]; |
| | | NppiSize aDstSize[3]; |
| | | |
| | | ///////////////////////////// |
| | | Npp8u *imgOrigin; |
| | | size_t pitchOrigin; |
| | | NppiSize sizeOrigin; |
| | | |
| | | unsigned char *bgrOrigin; |
| | | int bgrOriginLen; |
| | | size_t bgrOriginPitch; |
| | | |
| | | //////////////////////////// |
| | | Npp8u *imgResize; |
| | | size_t pitchResize; |
| | | NppiSize sizeResize; |
| | | |
| | | unsigned char *bgrScale; |
| | | int bgrScaleLen; |
| | | size_t bgrScalePitch; |
| | | |
| | | // resize only |
| | | //////////////////////////// |
| | | Npp8u *originBGR; |
| | | int pitchOriginBGR; |
| | | Npp8u *resizedBGR; |
| | | int pitchResizedBGR; |
| | | unsigned char *hostResizedBGR; |
| | | |
| | | /////////////////////////// |
| | | unsigned char *nv12; |
| | | |
| | | bool initialized_yuv, initialized_resize, initialized_resize_bgr, initialized_resize_yuv; |
| | | int gpu_index; |
| | | }; |
| | | |
| | | |
| | | convertor::convertor(const int srcW, const int srcH, const int dstW, const int dstH, const int gpu) |
| | | :width(srcW) |
| | | ,height(srcH) |
| | | ,rWidth(dstW) |
| | | ,rHeight(dstH) |
| | | ,fx(-1) |
| | | ,fy(-1) |
| | | ,mY(NULL) |
| | | ,mU(NULL) |
| | | ,mV(NULL) |
| | | ,imgOrigin(0) |
| | | ,imgResize(0) |
| | | ,bgrOrigin(NULL) |
| | | ,bgrOriginLen(0) |
| | | ,bgrScale(NULL) |
| | | ,bgrScaleLen(0) |
| | | ,originBGR(0) |
| | | ,pitchOriginBGR(0) |
| | | ,resizedBGR(0) |
| | | ,pitchResizedBGR(0) |
| | | ,hostResizedBGR(NULL) |
| | | ,nv12(NULL) |
| | | ,initialized_yuv(false) |
| | | ,initialized_resize(false) |
| | | ,initialized_resize_bgr(false) |
| | | ,initialized_resize_yuv(false) |
| | | ,gpu_index(gpu) |
| | | {} |
| | | |
| | | static void setGPUDevice(const int gpu){ |
| | | if (gpu >= 0){ |
| | | cudaSetDevice(gpu); |
| | | } |
| | | } |
| | | |
| | | void convertor::init_yuv(){ |
| | | if (initialized_yuv) return; |
| | | initialized_yuv = true; |
| | | |
| | | setGPUDevice(gpu_index); |
| | | |
| | | for(int i = 0; i < 3; i++){ |
| | | apSrcImage[i] = 0; |
| | | apDstImage[i] = 0; |
| | | } |
| | | |
| | | aSamplingFactors[0] = 34; |
| | | aSamplingFactors[1] = 17; |
| | | aSamplingFactors[2] = 17; |
| | | |
| | | nMCUBlocksH = 0; |
| | | nMCUBlocksV = 0; |
| | | |
| | | for (int i = 0; i < 3; ++i) |
| | | { |
| | | nMCUBlocksV = std::max(nMCUBlocksV, aSamplingFactors[i] & 0x0f); |
| | | nMCUBlocksH = std::max(nMCUBlocksH, aSamplingFactors[i] >> 4); |
| | | } |
| | | |
| | | for (int i = 0; i < 3; ++i) |
| | | { |
| | | NppiSize oBlocks; |
| | | NppiSize oBlocksPerMCU = { aSamplingFactors[i] >> 4, aSamplingFactors[i] & 0x0f }; |
| | | |
| | | oBlocks.width = (int)ceil((width + 7) / 8 * |
| | | static_cast<float>(oBlocksPerMCU.width) / nMCUBlocksH); |
| | | oBlocks.width = DivUp(oBlocks.width, oBlocksPerMCU.width) * oBlocksPerMCU.width; |
| | | |
| | | oBlocks.height = (int)ceil((height + 7) / 8 * |
| | | static_cast<float>(oBlocksPerMCU.height) / nMCUBlocksV); |
| | | oBlocks.height = DivUp(oBlocks.height, oBlocksPerMCU.height) * oBlocksPerMCU.height; |
| | | |
| | | aSrcSize[i].width = oBlocks.width * 8; |
| | | aSrcSize[i].height = oBlocks.height * 8; |
| | | |
| | | // Allocate Memory |
| | | size_t nPitch; |
| | | NPP_CHECK_CUDA(cudaMallocPitch((void**)&(apSrcImage[i]), &nPitch, aSrcSize[i].width, aSrcSize[i].height)); |
| | | aSrcPitch[i] = nPitch; |
| | | aSrcImageStep[i] = static_cast<Npp32s>(nPitch); |
| | | } |
| | | |
| | | NPP_CHECK_CUDA(cudaMallocPitch((void**)&imgOrigin, &pitchOrigin, width * 3, height)); |
| | | |
| | | bgrOriginPitch = width * 3; |
| | | bgrOriginLen = bgrOriginPitch * height; |
| | | NPP_CHECK_CUDA(cudaHostAlloc((void**)&bgrOrigin, bgrOriginLen, cudaHostAllocDefault)); |
| | | |
| | | sizeOrigin.width = width; |
| | | sizeOrigin.height = height; |
| | | |
| | | uint32_t nPitch = (width + MEMORY_ALGN_DEVICE) & ~MEMORY_ALGN_DEVICE; |
| | | NPP_CHECK_CUDA(cudaHostAlloc((void**)&mY, nPitch * height, cudaHostAllocDefault)); |
| | | nPitch = (width/2 + MEMORY_ALGN_DEVICE) & ~MEMORY_ALGN_DEVICE; |
| | | NPP_CHECK_CUDA(cudaHostAlloc((void**)&mU, nPitch * height / 2, cudaHostAllocDefault)); |
| | | NPP_CHECK_CUDA(cudaHostAlloc((void**)&mV, nPitch * height / 2, cudaHostAllocDefault)); |
| | | |
| | | } |
| | | |
| | | void convertor::init_resize(){ |
| | | if (initialized_resize) return; |
| | | initialized_resize = true; |
| | | |
| | | setGPUDevice(gpu_index); |
| | | |
| | | NppiSize oDstImageSize; |
| | | oDstImageSize.width = std::max(1, rWidth); |
| | | oDstImageSize.height = std::max(1, rHeight); |
| | | |
| | | sizeResize.width = oDstImageSize.width; |
| | | sizeResize.height = oDstImageSize.height; |
| | | |
| | | for (int i=0; i < 3; ++i) |
| | | { |
| | | NppiSize oBlocks; |
| | | NppiSize oBlocksPerMCU = { aSamplingFactors[i] & 0x0f, aSamplingFactors[i] >> 4}; |
| | | |
| | | oBlocks.width = (int)ceil((oDstImageSize.width + 7)/8 * |
| | | static_cast<float>(oBlocksPerMCU.width)/nMCUBlocksH); |
| | | oBlocks.width = DivUp(oBlocks.width, oBlocksPerMCU.width) * oBlocksPerMCU.width; |
| | | |
| | | oBlocks.height = (int)ceil((oDstImageSize.height+7)/8 * |
| | | static_cast<float>(oBlocksPerMCU.height)/nMCUBlocksV); |
| | | oBlocks.height = DivUp(oBlocks.height, oBlocksPerMCU.height) * oBlocksPerMCU.height; |
| | | |
| | | aDstSize[i].width = oBlocks.width * 8; |
| | | aDstSize[i].height = oBlocks.height * 8; |
| | | |
| | | // Allocate Memory |
| | | size_t nPitch; |
| | | NPP_CHECK_CUDA(cudaMallocPitch((void**)&apDstImage[i], &nPitch, aDstSize[i].width, aDstSize[i].height)); |
| | | aDstImageStep[i] = static_cast<Npp32s>(nPitch); |
| | | } |
| | | |
| | | if (rWidth > 0 && rHeight > 0 && rWidth < width && rHeight < height){ |
| | | fx = (float)(rWidth) / (float)(width); |
| | | fy = (float)(rHeight) / (float)(height); |
| | | } |
| | | |
| | | if (imgResize == 0){ |
| | | if (rWidth > 0 && rHeight > 0 && rWidth < width && rHeight < height){ |
| | | NPP_CHECK_CUDA(cudaMallocPitch((void**)&imgResize, &pitchResize, rWidth * 3, rHeight)); |
| | | } |
| | | } |
| | | if (!bgrScale){ |
| | | if (rWidth > 0 && rHeight > 0 && rWidth < width && rHeight < height){ |
| | | bgrScalePitch = rWidth * 3; |
| | | bgrScaleLen = bgrScalePitch * rHeight; |
| | | NPP_CHECK_CUDA(cudaHostAlloc((void**)&bgrScale, bgrScaleLen, cudaHostAllocDefault)); |
| | | } |
| | | } |
| | | } |
| | | |
| | | void convertor::init_resize_bgr(){ |
| | | if (initialized_resize_bgr) return; |
| | | initialized_resize_bgr = true; |
| | | |
| | | setGPUDevice(gpu_index); |
| | | if (originBGR == 0){ |
| | | originBGR = nppiMalloc_8u_C3(width, height, &pitchOriginBGR); |
| | | } |
| | | if (resizedBGR == 0){ |
| | | resizedBGR = nppiMalloc_8u_C3(rWidth, rHeight, &pitchResizedBGR); |
| | | } |
| | | if (hostResizedBGR == NULL){ |
| | | NPP_CHECK_CUDA(cudaHostAlloc((void**)&hostResizedBGR, rWidth * 3 * rHeight, cudaHostAllocDefault)); |
| | | } |
| | | } |
| | | |
| | | void convertor::init_resize_yuv(){ |
| | | if (initialized_resize_yuv) return; |
| | | initialized_resize_yuv = true; |
| | | |
| | | if (rWidth > 0 && rHeight > 0){ |
| | | fx = (float)(width) / (float)(rWidth); |
| | | fy = (float)(height) / (float)(rHeight); |
| | | } |
| | | |
| | | nv12 = (unsigned char*)malloc(rWidth*rHeight*3/2); |
| | | } |
| | | |
| | | convertor::~convertor(){ |
| | | setGPUDevice(gpu_index); |
| | | |
| | | if(mY) cudaFreeHost(mY); |
| | | if(mU) cudaFreeHost(mU); |
| | | if(mV) cudaFreeHost(mV); |
| | | |
| | | for (int i = 0; i < 3; ++i)//内存释放 |
| | | { |
| | | cudaFree(apSrcImage[i]); |
| | | cudaFree(apDstImage[i]); |
| | | } |
| | | |
| | | if (imgOrigin) cudaFree(imgOrigin); |
| | | if (imgResize) cudaFree(imgResize); |
| | | |
| | | if (bgrOrigin) cudaFreeHost(bgrOrigin); |
| | | if (bgrScale) cudaFreeHost(bgrScale); |
| | | |
| | | if (originBGR) nppiFree(originBGR); |
| | | if (resizedBGR) nppiFree(resizedBGR); |
| | | if (hostResizedBGR) cudaFreeHost(hostResizedBGR); |
| | | |
| | | if (nv12) free(nv12); |
| | | } |
| | | |
| | | int convertor::fill_yuv(const unsigned char *yuv){ |
| | | init_yuv(); |
| | | int ret = set_data((uint8_t*)yuv, width, height, mY, mU, mV); |
| | | if (ret < 0) return ret; |
| | | |
| | | setGPUDevice(gpu_index); |
| | | |
| | | NPP_CHECK_CUDA(cudaMemcpy(apSrcImage[0], mY, aSrcPitch[0] * height, cudaMemcpyHostToDevice)); |
| | | NPP_CHECK_CUDA(cudaMemcpy(apSrcImage[1], mU, aSrcPitch[1] * height / 2, cudaMemcpyHostToDevice)); |
| | | NPP_CHECK_CUDA(cudaMemcpy(apSrcImage[2], mV, aSrcPitch[2] * height / 2, cudaMemcpyHostToDevice)); |
| | | return 0; |
| | | } |
| | | |
| | | int convertor::yuv2bgr(unsigned char **bgr, int *bgrLen){ |
| | | |
| | | *bgr = NULL; |
| | | *bgrLen = 0; |
| | | |
| | | setGPUDevice(gpu_index); |
| | | |
| | | NPP_CHECK_NPP(nppiYUV420ToBGR_8u_P3C3R(apSrcImage, aSrcImageStep, imgOrigin, pitchOrigin, sizeOrigin)); |
| | | |
| | | NPP_CHECK_CUDA(cudaMemcpy2D(bgrOrigin, bgrOriginPitch, imgOrigin, pitchOrigin, bgrOriginPitch, height, cudaMemcpyDeviceToHost)); |
| | | *bgr = bgrOrigin; |
| | | *bgrLen = bgrOriginLen; |
| | | |
| | | return 0; |
| | | } |
| | | |
| | | int convertor::resize2bgr(unsigned char *in, unsigned char **data, int *data_len){ |
| | | *data = NULL; |
| | | *data_len = 0; |
| | | |
| | | if ((rWidth < 0 && rHeight < 0) || (rWidth > width && rHeight > height)){ |
| | | return -1; |
| | | } |
| | | |
| | | setGPUDevice(gpu_index); |
| | | |
| | | if (!in){ |
| | | |
| | | init_resize(); |
| | | |
| | | NppiSize oDstImageSize; |
| | | oDstImageSize.width = std::max(1, rWidth); |
| | | oDstImageSize.height = std::max(1, rHeight); |
| | | for (int i = 0; i < 3; ++i) |
| | | { |
| | | NppiSize oBlocksPerMCU = { aSamplingFactors[i] & 0x0f, aSamplingFactors[i] >> 4}; |
| | | NppiSize oSrcImageSize = {(width * oBlocksPerMCU.width) / nMCUBlocksH, (height * oBlocksPerMCU.height)/nMCUBlocksV}; |
| | | NppiRect oSrcImageROI = {0,0,oSrcImageSize.width, oSrcImageSize.height}; |
| | | NppiRect oDstImageROI; |
| | | NppiInterpolationMode eInterploationMode = NPPI_INTER_SUPER; |
| | | NPP_CHECK_NPP(nppiGetResizeRect(oSrcImageROI, &oDstImageROI, |
| | | fx, |
| | | fy, |
| | | 0.0, 0.0, eInterploationMode)); |
| | | NPP_CHECK_NPP(nppiResizeSqrPixel_8u_C1R(apSrcImage[i], oSrcImageSize, aSrcImageStep[i], oSrcImageROI, |
| | | apDstImage[i], aDstImageStep[i], oDstImageROI , |
| | | fx, |
| | | fy, |
| | | 0.0, 0.0, eInterploationMode)); |
| | | } |
| | | NPP_CHECK_NPP(nppiYUV420ToBGR_8u_P3C3R(apDstImage, aDstImageStep, imgResize, pitchResize, sizeResize)); |
| | | NPP_CHECK_CUDA(cudaMemcpy2D(bgrScale, bgrScalePitch, imgResize, pitchResize, bgrScalePitch, rHeight, cudaMemcpyDeviceToHost)); |
| | | *data = bgrScale; |
| | | *data_len = bgrScaleLen; |
| | | }else{ |
| | | |
| | | init_resize_bgr(); |
| | | |
| | | NppiSize oSrcSize; |
| | | oSrcSize.width = width; |
| | | oSrcSize.height = height; |
| | | |
| | | NPP_CHECK_CUDA(cudaMemcpy2D(originBGR, pitchOriginBGR, in, width*3, width*3, height, cudaMemcpyHostToDevice)); |
| | | |
| | | NppiRect oSrcROI; |
| | | oSrcROI.x = 0; |
| | | oSrcROI.y = 0; |
| | | oSrcROI.width = width; |
| | | oSrcROI.height = height; |
| | | |
| | | |
| | | NppiRect oDstROI; |
| | | oDstROI.x = 0; |
| | | oDstROI.y = 0; |
| | | oDstROI.width = rWidth; |
| | | oDstROI.height = rHeight; |
| | | |
| | | // Scale Factor |
| | | double nXFactor = double(oDstROI.width) / double(oSrcROI.width); |
| | | double nYFactor = double(oDstROI.height) / double(oSrcROI.height); |
| | | |
| | | // Scaled X/Y Shift |
| | | double nXShift = - oSrcROI.x * nXFactor ; |
| | | double nYShift = - oSrcROI.y * nYFactor; |
| | | int eInterpolation = NPPI_INTER_SUPER; |
| | | if (nXFactor >= 1.f || nYFactor >= 1.f) |
| | | eInterpolation = NPPI_INTER_LANCZOS; |
| | | |
| | | NppStatus ret = nppiResizeSqrPixel_8u_C3R(originBGR, oSrcSize, pitchOriginBGR, oSrcROI, |
| | | resizedBGR, pitchResizedBGR, oDstROI, nXFactor, nYFactor, nXShift, nYShift, eInterpolation ); |
| | | |
| | | if(ret != NPP_SUCCESS) { |
| | | printf("imageResize_8u_C3R failed %d.\n", ret); |
| | | return -2; |
| | | } |
| | | size_t pitch = rWidth * 3; |
| | | *data_len = pitch * rHeight; |
| | | NPP_CHECK_CUDA(cudaMemcpy2D(hostResizedBGR, pitch, resizedBGR, pitchResizedBGR, pitch, rHeight, cudaMemcpyDeviceToHost)); |
| | | *data = hostResizedBGR; |
| | | } |
| | | return 0; |
| | | } |
| | | |
| | | static int nv12_nearest_scale(uint8_t* __restrict src, uint8_t* __restrict dst, |
| | | int srcWidth, int srcHeight, int dstWidth, int dstHeight) |
| | | { |
| | | register int sw = srcWidth; //register keyword is for local var to accelorate |
| | | register int sh = srcHeight; |
| | | register int dw = dstWidth; |
| | | register int dh = dstHeight; |
| | | register int y, x; |
| | | unsigned long int srcy, srcx, src_index, dst_index; |
| | | unsigned long int xrIntFloat_16 = (sw << 16) / dw + 1; //better than float division |
| | | unsigned long int yrIntFloat_16 = (sh << 16) / dh + 1; |
| | | |
| | | uint8_t* dst_uv = dst + dh * dw; //memory start pointer of dest uv |
| | | uint8_t* src_uv = src + sh * sw; //memory start pointer of source uv |
| | | uint8_t* dst_uv_yScanline; |
| | | uint8_t* src_uv_yScanline; |
| | | uint8_t* dst_y_slice = dst; //memory start pointer of dest y |
| | | uint8_t* src_y_slice; |
| | | uint8_t* sp; |
| | | uint8_t* dp; |
| | | |
| | | for (y = 0; y < (dh & ~7); ++y) //'dh & ~7' is to generate faster assembly code |
| | | { |
| | | srcy = (y * yrIntFloat_16) >> 16; |
| | | src_y_slice = src + srcy * sw; |
| | | |
| | | if((y & 1) == 0) |
| | | { |
| | | dst_uv_yScanline = dst_uv + (y / 2) * dw; |
| | | src_uv_yScanline = src_uv + (srcy / 2) * sw; |
| | | } |
| | | |
| | | for(x = 0; x < (dw & ~7); ++x) |
| | | { |
| | | srcx = (x * xrIntFloat_16) >> 16; |
| | | dst_y_slice[x] = src_y_slice[srcx]; |
| | | |
| | | if((y & 1) == 0) //y is even |
| | | { |
| | | if((x & 1) == 0) //x is even |
| | | { |
| | | src_index = (srcx / 2) * 2; |
| | | |
| | | sp = dst_uv_yScanline + x; |
| | | dp = src_uv_yScanline + src_index; |
| | | *sp = *dp; |
| | | ++sp; |
| | | ++dp; |
| | | *sp = *dp; |
| | | } |
| | | } |
| | | } |
| | | dst_y_slice += dw; |
| | | } |
| | | return 0; |
| | | } |
| | | |
| | | int convertor::resizeyuv(unsigned char *in, unsigned char **data, int *data_len){ |
| | | |
| | | init_resize_yuv(); |
| | | |
| | | *data_len = rWidth*rHeight*3/2; |
| | | *data = nv12; |
| | | |
| | | return nv12_nearest_scale(in, nv12, width, height, rWidth, rHeight); |
| | | } |
| | | |
| | | convHandle conv_create(const int srcW, const int srcH, const int dstW, const int dstH, const int gpu){ |
| | | if (gpu < 0) return NULL; |
| | | |
| | | convertor *conv = new convertor(srcW, srcH, dstW, dstH, gpu); |
| | | return conv; |
| | | } |
| | | |
| | | void conv_destroy(convHandle h){ |
| | | if (!h) return; |
| | | convertor *conv = (convertor*)h; |
| | | delete conv; |
| | | } |
| | | |
| | | int yuv2bgrandresize(convHandle h, void *yuv, unsigned char **bgr, int *bgrLen, unsigned char **scaleBGR, int *scaleBGRLen){ |
| | | if (!h) return -2; |
| | | convertor *conv = (convertor*)h; |
| | | int ret = conv->fill_yuv((unsigned char*)yuv); |
| | | if (ret != 0) return ret; |
| | | ret = conv->yuv2bgr(bgr, bgrLen); |
| | | if (ret != 0) return ret; |
| | | ret = conv->resize2bgr(NULL, scaleBGR, scaleBGRLen); |
| | | return ret; |
| | | } |
| | | |
| | | int yuv2bgr(convHandle h, void *yuv, unsigned char **bgr, int *bgrLen){ |
| | | if (!h) return -2; |
| | | convertor *conv = (convertor*)h; |
| | | int ret = conv->fill_yuv((unsigned char*)yuv); |
| | | if (ret != 0) return ret; |
| | | return conv->yuv2bgr(bgr, bgrLen); |
| | | } |
| | | |
| | | int yuv2resizedbgr(convHandle h, void *yuv, unsigned char **bgr, int *bgrLen){ |
| | | if (!h) return -2; |
| | | convertor *conv = (convertor*)h; |
| | | int ret = conv->fill_yuv((unsigned char*)yuv); |
| | | if (ret != 0) return ret; |
| | | ret = conv->resize2bgr(NULL, bgr, bgrLen); |
| | | return ret; |
| | | } |
| | | |
| | | int resizebgr(convHandle h, void *data, unsigned char **resized, int *len){ |
| | | if (!h) return -2; |
| | | convertor *conv = (convertor*)h; |
| | | return conv->resize2bgr((unsigned char*)data, resized, len); |
| | | } |
| | | |
| | | int resizeyuv(convHandle h, void *data, unsigned char **resized, int *len){ |
| | | if (!h) return -2; |
| | | convertor *conv = (convertor*)h; |
| | | return conv->resizeyuv((unsigned char*)data, resized, len); |
| | | } |
New file |
| | |
| | | #ifndef __RESIZE_NPP_H__ |
| | | #define __RESIZE_NPP_H__ |
| | | |
| | | #ifdef __cplusplus |
| | | extern "C"{ |
| | | #endif |
| | | |
| | | typedef void* convHandle; |
| | | convHandle conv_create(const int srcW, const int srcH, const int dstW, const int dstH, const int gpu); |
| | | |
| | | void conv_destroy(convHandle h); |
| | | int yuv2bgrandresize(convHandle h, void *yuv, unsigned char **bgr, int *bgrLen, unsigned char **scaleBGR, int *scaleBGRLen); |
| | | int yuv2bgr(convHandle h, void *yuv, unsigned char **bgr, int *bgrLen); |
| | | int yuv2resizedbgr(convHandle h, void *yuv, unsigned char **bgr, int *bgrLen); |
| | | |
| | | int resizebgr(convHandle h, void *data, unsigned char **resized, int *len); |
| | | int resizeyuv(convHandle h, void *data, unsigned char **resized, int *len); |
| | | |
| | | #ifdef __cplusplus |
| | | } |
| | | #endif |
| | | |
| | | #endif //__RESIZE_NPP_H__ |
New file |
| | |
| | | package goconv |
| | | |
| | | /* |
| | | #cgo CFLAGS: -I./ -I./inc -I/usr/local/cuda/include |
| | | #cgo CXXFLAGS: -I./ -I./inc -I/usr/local/cuda/include -std=c++11 |
| | | #cgo LDFLAGS: -L/usr/local/cuda/lib64 -lnppig -lnppicc -lnppial -lnppisu -lcudart -ldl |
| | | #include <stdlib.h> |
| | | #include "conv.h" |
| | | */ |
| | | import "C" |
| | | import ( |
| | | "unsafe" |
| | | |
| | | "basic.com/valib/godraw.git" |
| | | "basic.com/valib/gogpu.git" |
| | | "github.com/disintegration/imaging" |
| | | ) |
| | | |
| | | const ( |
| | | need = 200 |
| | | reserved = 512 |
| | | ) |
| | | |
| | | func gpuIndex(lastIndex int) int { |
| | | indices := gogpu.RankGPU() |
| | | if len(indices) == 0 { |
| | | return -1 |
| | | } |
| | | |
| | | for _, v := range indices { |
| | | if v != lastIndex { |
| | | if gogpu.SatisfyGPU(v, need, need/2) { |
| | | return v |
| | | } |
| | | } |
| | | } |
| | | |
| | | if gogpu.SatisfyGPU(lastIndex, need, reserved) { |
| | | return lastIndex |
| | | } |
| | | return -1 |
| | | } |
| | | |
| | | type convertor struct { |
| | | width int |
| | | height int |
| | | rWidth int |
| | | rHeight int |
| | | conv C.convHandle |
| | | } |
| | | |
| | | var convts []*convertor |
| | | |
| | | func find(w, h, rw, rh int) *convertor { |
| | | for _, v := range convts { |
| | | if v.width == w && v.height == h && v.rWidth == rw && v.rHeight == rh { |
| | | return v |
| | | } |
| | | } |
| | | gpu := gpuIndex(0) |
| | | if gpu < 0 { |
| | | return nil |
| | | } |
| | | cw := C.conv_create(C.int(w), C.int(h), C.int(rw), C.int(rh), C.int(gpu)) |
| | | if cw == nil { |
| | | return nil |
| | | } |
| | | c := &convertor{w, h, rw, rh, cw} |
| | | convts = append(convts, c) |
| | | return c |
| | | } |
| | | |
| | | // YUV2BGR yuv->bgr |
| | | func YUV2BGR(yuv []byte, w, h int) []byte { |
| | | |
| | | cw := find(w, h, -1, -1) |
| | | if cw == nil { |
| | | return yuv2bgr(yuv, w, h) |
| | | } |
| | | var bgr *C.uchar |
| | | var bgrLen C.int |
| | | ret := C.yuv2bgr(cw.conv, unsafe.Pointer(&yuv[0]), &bgr, &bgrLen) |
| | | if ret != 0 { |
| | | return nil |
| | | } |
| | | const maxLen = 0x7fffffff |
| | | goBGRLen := int(bgrLen) |
| | | if goBGRLen > 0 { |
| | | return (*[maxLen]byte)(unsafe.Pointer(bgr))[:goBGRLen:goBGRLen] |
| | | } |
| | | return nil |
| | | } |
| | | |
| | | // YUV2ResizedBGR yuv -> resized bgr |
| | | func YUV2ResizedBGR(yuv []byte, w, h, rw, rh int) []byte { |
| | | |
| | | cw := find(w, h, rw, rh) |
| | | if cw == nil { |
| | | bgr := yuv2bgr(yuv, w, h) |
| | | return bgresize(bgr, w, h, rw, rh) |
| | | } |
| | | var bgr *C.uchar |
| | | var bgrLen C.int |
| | | ret := C.yuv2resizedbgr(cw.conv, unsafe.Pointer(&yuv[0]), &bgr, &bgrLen) |
| | | if ret != 0 { |
| | | return nil |
| | | } |
| | | const maxLen = 0x7fffffff |
| | | goBGRLen := int(bgrLen) |
| | | if goBGRLen > 0 { |
| | | return (*[maxLen]byte)(unsafe.Pointer(bgr))[:goBGRLen:goBGRLen] |
| | | } |
| | | return nil |
| | | |
| | | } |
| | | |
| | | // ResizeBGR resize |
| | | func ResizeBGR(bgrO []byte, w, h, rw, rh int) []byte { |
| | | if (rw < 0 && rh < 0) || (rw > w && rh > h) { |
| | | return bgrO |
| | | } |
| | | |
| | | cw := find(w, h, rw, rh) |
| | | if cw == nil { |
| | | return bgresize(bgrO, w, h, rw, rh) |
| | | } |
| | | |
| | | var bgr *C.uchar |
| | | var bgrLen C.int |
| | | ret := C.resizebgr(cw.conv, unsafe.Pointer(&bgrO[0]), &bgr, &bgrLen) |
| | | if ret != 0 { |
| | | return nil |
| | | } |
| | | const maxLen = 0x7fffffff |
| | | goBGRLen := int(bgrLen) |
| | | if goBGRLen > 0 { |
| | | return (*[maxLen]byte)(unsafe.Pointer(bgr))[:goBGRLen:goBGRLen] |
| | | } |
| | | return nil |
| | | } |
| | | |
| | | // ResizeYUV yuv |
| | | func ResizeYUV(yuv []byte, w, h, rw, rh int) []byte { |
| | | if (rw < 0 && rh < 0) || (rw > w && rh > h) { |
| | | return yuv |
| | | } |
| | | |
| | | cw := find(w, h, rw, rh) |
| | | if cw == nil { |
| | | return yuv |
| | | } |
| | | |
| | | var resized *C.uchar |
| | | var resizedLen C.int |
| | | ret := C.resizeyuv(cw.conv, unsafe.Pointer(&yuv[0]), &resized, &resizedLen) |
| | | if ret != 0 { |
| | | return nil |
| | | } |
| | | |
| | | const maxLen = 0x7fffffff |
| | | goResizedLen := int(resizedLen) |
| | | if goResizedLen > 0 { |
| | | return (*[maxLen]byte)(unsafe.Pointer(resized))[:goResizedLen:goResizedLen] |
| | | } |
| | | return nil |
| | | } |
| | | |
| | | // YUV2BGRandResize conv and resize |
| | | func YUV2BGRandResize(yuv []byte, w, h, rw, rh int) ([]byte, []byte) { |
| | | cw := find(w, h, rw, rh) |
| | | if cw == nil { |
| | | origin := yuv2bgr(yuv, w, h) |
| | | resized := bgresize(origin, w, h, rw, rh) |
| | | return origin, resized |
| | | } |
| | | |
| | | var bgr *C.uchar |
| | | var bgrLen C.int |
| | | var scale *C.uchar |
| | | var scaleLen C.int |
| | | |
| | | ret := C.yuv2bgrandresize(cw.conv, unsafe.Pointer(&yuv[0]), &bgr, &bgrLen, &scale, &scaleLen) |
| | | |
| | | if ret != 0 { |
| | | return nil, nil |
| | | } |
| | | var out, resized []byte |
| | | |
| | | const maxLen = 0x7fffffff |
| | | goBGRLen, goScaleLen := int(bgrLen), int(scaleLen) |
| | | if goBGRLen > 0 { |
| | | out = (*[maxLen]byte)(unsafe.Pointer(bgr))[:goBGRLen:goBGRLen] |
| | | } |
| | | if goScaleLen > 0 { |
| | | resized = (*[maxLen]byte)(unsafe.Pointer(scale))[:goScaleLen:goScaleLen] |
| | | } |
| | | return out, resized |
| | | |
| | | } |
| | | |
| | | // Free free |
| | | func Free() { |
| | | for _, v := range convts { |
| | | if v.conv != nil { |
| | | C.conv_destroy(v.conv) |
| | | } |
| | | } |
| | | } |
| | | |
| | | func yuv2bgr(yuv []byte, w, h int) []byte { |
| | | |
| | | data := make([]byte, 0, w*h*3) |
| | | start := w * h |
| | | for i := 0; i < h; i++ { |
| | | for j := 0; j < w; j++ { |
| | | |
| | | index := i/2*w + j - (j & 0x01) |
| | | |
| | | y := int32(yuv[j+i*w]) |
| | | v := int32(yuv[start+index]) |
| | | u := int32(yuv[start+index+1]) |
| | | |
| | | r := y + (140*(v-128))/100 |
| | | g := y - (34*(u-128)+71*(v-128))/100 |
| | | b := y + (177*(u-128))/100 |
| | | |
| | | if r > 255 { |
| | | r = 255 |
| | | } |
| | | if r < 0 { |
| | | r = 0 |
| | | } |
| | | if g > 255 { |
| | | g = 255 |
| | | } |
| | | if g < 0 { |
| | | g = 0 |
| | | } |
| | | if b > 255 { |
| | | b = 255 |
| | | } |
| | | if b < 0 { |
| | | b = 0 |
| | | } |
| | | data = append(data, byte(r), byte(g), byte(b)) |
| | | } |
| | | } |
| | | return data |
| | | } |
| | | |
| | | func bgresize(bgr []byte, w, h, rw, rh int) []byte { |
| | | img, err := godraw.ToImage(bgr, w, h) |
| | | if err != nil { |
| | | return nil |
| | | } |
| | | dstImg := imaging.Resize(img, rw, rh, imaging.NearestNeighbor) |
| | | return godraw.Image2BGR(dstImg) |
| | | } |
New file |
| | |
| | | /** |
| | | * Copyright 1993-2015 NVIDIA Corporation. All rights reserved. |
| | | * |
| | | * Please refer to the NVIDIA end user license agreement (EULA) associated |
| | | * with this source code for terms and conditions that govern your use of |
| | | * this software. Any use, reproduction, disclosure, or distribution of |
| | | * this software and related documentation outside the terms of the EULA |
| | | * is strictly prohibited. |
| | | * |
| | | */ |
| | | |
| | | #ifndef NV_UTIL_NPP_EXCEPTIONS_H |
| | | #define NV_UTIL_NPP_EXCEPTIONS_H |
| | | |
| | | |
| | | #include <string> |
| | | #include <sstream> |
| | | #include <iostream> |
| | | |
| | | /// All npp related C++ classes are put into the npp namespace. |
| | | namespace npp |
| | | { |
| | | |
| | | /// Exception base class. |
| | | /// This exception base class will be used for everything C++ throught |
| | | /// the NPP project. |
| | | /// The exception contains a string message, as well as data fields for a string |
| | | /// containing the name of the file as well as the line number where the exception was thrown. |
| | | /// The easiest way of throwing exceptions and providing filename and line number is |
| | | /// to use one of the ASSERT macros defined for that purpose. |
| | | class Exception |
| | | { |
| | | public: |
| | | /// Constructor. |
| | | /// \param rMessage A message with information as to why the exception was thrown. |
| | | /// \param rFileName The name of the file where the exception was thrown. |
| | | /// \param nLineNumber Line number in the file where the exception was thrown. |
| | | explicit |
| | | Exception(const std::string &rMessage = "", const std::string &rFileName = "", unsigned int nLineNumber = 0) |
| | | : sMessage_(rMessage), sFileName_(rFileName), nLineNumber_(nLineNumber) |
| | | { }; |
| | | |
| | | Exception(const Exception &rException) |
| | | : sMessage_(rException.sMessage_), sFileName_(rException.sFileName_), nLineNumber_(rException.nLineNumber_) |
| | | { }; |
| | | |
| | | virtual |
| | | ~Exception() |
| | | { }; |
| | | |
| | | /// Get the exception's message. |
| | | const |
| | | std::string & |
| | | message() |
| | | const |
| | | { |
| | | return sMessage_; |
| | | } |
| | | |
| | | /// Get the exception's file info. |
| | | const |
| | | std::string & |
| | | fileName() |
| | | const |
| | | { |
| | | return sFileName_; |
| | | } |
| | | |
| | | /// Get the exceptions's line info. |
| | | unsigned int |
| | | lineNumber() |
| | | const |
| | | { |
| | | return nLineNumber_; |
| | | } |
| | | |
| | | |
| | | /// Create a clone of this exception. |
| | | /// This creates a new Exception object on the heap. It is |
| | | /// the responsibility of the user of this function to free this memory |
| | | /// (delete x). |
| | | virtual |
| | | Exception * |
| | | clone() |
| | | const |
| | | { |
| | | return new Exception(*this); |
| | | } |
| | | |
| | | /// Create a single string with all the exceptions information. |
| | | /// The virtual toString() method is used by the operator<<() |
| | | /// so that all exceptions derived from this base-class can print |
| | | /// their full information correctly even if a reference to their |
| | | /// exact type is not had at the time of printing (i.e. the basic |
| | | /// operator<<() is used). |
| | | virtual |
| | | std::string |
| | | toString() |
| | | const |
| | | { |
| | | std::ostringstream oOutputString; |
| | | oOutputString << fileName() << ":" << lineNumber() << ": " << message(); |
| | | return oOutputString.str(); |
| | | } |
| | | |
| | | private: |
| | | std::string sMessage_; ///< Message regarding the cause of the exception. |
| | | std::string sFileName_; ///< Name of the file where the exception was thrown. |
| | | unsigned int nLineNumber_; ///< Line number in the file where the exception was thrown |
| | | }; |
| | | |
| | | /// Output stream inserter for Exception. |
| | | /// \param rOutputStream The stream the exception information is written to. |
| | | /// \param rException The exception that's being written. |
| | | /// \return Reference to the output stream being used. |
| | | static std::ostream & |
| | | operator << (std::ostream &rOutputStream, const Exception &rException) |
| | | { |
| | | rOutputStream << rException.toString(); |
| | | return rOutputStream; |
| | | } |
| | | |
| | | /// Basic assert macro. |
| | | /// This macro should be used to enforce any kind of pre or post conditions. |
| | | /// Unlike the C-runtime assert macro, this macro does not abort execution, but throws |
| | | /// a C++ exception. The exception is automatically filled with information about the failing |
| | | /// condition, the filename and line number where the exception was thrown. |
| | | /// \note The macro is written in such a way that omitting a semicolon after its usage |
| | | /// causes a compiler error. The correct way to invoke this macro is: |
| | | /// NPP_ASSERT(n < MAX); |
| | | #define NPP_ASSERT(C) do {if (!(C)) throw npp::Exception(#C " assertion faild!", __FILE__, __LINE__);} while(false) |
| | | |
| | | // ASSERT macro. |
| | | // Same functionality as the basic assert macro with the added ability to pass |
| | | // a message M. M should be a string literal. |
| | | // Note: Never use code inside ASSERT() that causes a side-effect ASSERT macros may get compiled |
| | | // out in release mode. |
| | | #define NPP_ASSERT_MSG(C, M) do {if (!(C)) throw npp::Exception(#C " assertion faild! Message: " M, __FILE__, __LINE__);} while(false) |
| | | |
| | | #ifdef _DEBUG |
| | | /// Basic debug assert macro. |
| | | /// This macro is identical in every respect to NPP_ASSERT(C) but it does get compiled to a |
| | | /// no-op in release builds. It is therefor of utmost importance to not put statements into |
| | | /// this macro that cause side effects required for correct program execution. |
| | | #define NPP_DEBUG_ASSERT(C) do {if (!(C)) throw npp::Exception(#C " debug assertion faild!", __FILE__, __LINE__);} while(false) |
| | | #else |
| | | #define NPP_DEBUG_ASSERT(C) |
| | | #endif |
| | | |
| | | /// ASSERT for null-pointer test. |
| | | /// It is safe to put code with side effects into this macro. Also: This macro never |
| | | /// gets compiled to a no-op because resource allocation may fail based on external causes not under |
| | | /// control of a software developer. |
| | | #define NPP_ASSERT_NOT_NULL(P) do {if ((P) == 0) throw npp::Exception(#P " not null assertion faild!", __FILE__, __LINE__);} while(false) |
| | | |
| | | /// Macro for flagging methods as not implemented. |
| | | /// The macro throws an exception with a message that an implementation was missing |
| | | #define NPP_NOT_IMPLEMENTED() do {throw npp::Exception("Implementation missing!", __FILE__, __LINE__);} while(false) |
| | | |
| | | /// Macro for checking error return code of CUDA (runtime) calls. |
| | | /// This macro never gets disabled. |
| | | #define NPP_CHECK_CUDA(S) do {cudaError_t eCUDAResult; \ |
| | | eCUDAResult = S; \ |
| | | if (eCUDAResult != cudaSuccess) std::cout << "NPP_CHECK_CUDA - eCUDAResult = " << eCUDAResult << std::endl; \ |
| | | NPP_ASSERT(eCUDAResult == cudaSuccess);} while (false) |
| | | |
| | | /// Macro for checking error return code for NPP calls. |
| | | #define NPP_CHECK_NPP(S) do {NppStatus eStatusNPP; \ |
| | | eStatusNPP = S; \ |
| | | if (eStatusNPP != NPP_SUCCESS) std::cout << "NPP_CHECK_NPP - eStatusNPP = " << _cudaGetErrorEnum(eStatusNPP) << "("<< eStatusNPP << ")" << std::endl; \ |
| | | NPP_ASSERT(eStatusNPP == NPP_SUCCESS);} while (false) |
| | | |
| | | /// Macro for checking error return codes from cuFFT calls. |
| | | #define NPP_CHECK_CUFFT(S) do {cufftResult eCUFFTResult; \ |
| | | eCUFFTResult = S; \ |
| | | if (eCUFFTResult != NPP_SUCCESS) std::cout << "NPP_CHECK_CUFFT - eCUFFTResult = " << eCUFFTResult << std::endl; \ |
| | | NPP_ASSERT(eCUFFTResult == CUFFT_SUCCESS);} while (false) |
| | | |
| | | } // npp namespace |
| | | |
| | | #endif // NV_UTIL_NPP_EXCEPTIONS_H |
New file |
| | |
| | | /** |
| | | * Copyright 1993-2013 NVIDIA Corporation. All rights reserved. |
| | | * |
| | | * Please refer to the NVIDIA end user license agreement (EULA) associated |
| | | * with this source code for terms and conditions that govern your use of |
| | | * this software. Any use, reproduction, disclosure, or distribution of |
| | | * this software and related documentation outside the terms of the EULA |
| | | * is strictly prohibited. |
| | | * |
| | | */ |
| | | |
| | | //////////////////////////////////////////////////////////////////////////////// |
| | | // These are CUDA Helper functions for initialization and error checking |
| | | |
| | | #ifndef HELPER_CUDA_H |
| | | #define HELPER_CUDA_H |
| | | |
| | | #pragma once |
| | | |
| | | #include <stdlib.h> |
| | | #include <stdio.h> |
| | | #include <string.h> |
| | | |
| | | #include <helper_string.h> |
| | | |
| | | #ifndef EXIT_WAIVED |
| | | #define EXIT_WAIVED 2 |
| | | #endif |
| | | |
| | | // Note, it is required that your SDK sample to include the proper header files, please |
| | | // refer the CUDA examples for examples of the needed CUDA headers, which may change depending |
| | | // on which CUDA functions are used. |
| | | |
| | | // CUDA Runtime error messages |
| | | #ifdef __DRIVER_TYPES_H__ |
| | | static const char *_cudaGetErrorEnum(cudaError_t error) |
| | | { |
| | | switch (error) |
| | | { |
| | | case cudaSuccess: |
| | | return "cudaSuccess"; |
| | | |
| | | case cudaErrorMissingConfiguration: |
| | | return "cudaErrorMissingConfiguration"; |
| | | |
| | | case cudaErrorMemoryAllocation: |
| | | return "cudaErrorMemoryAllocation"; |
| | | |
| | | case cudaErrorInitializationError: |
| | | return "cudaErrorInitializationError"; |
| | | |
| | | case cudaErrorLaunchFailure: |
| | | return "cudaErrorLaunchFailure"; |
| | | |
| | | case cudaErrorPriorLaunchFailure: |
| | | return "cudaErrorPriorLaunchFailure"; |
| | | |
| | | case cudaErrorLaunchTimeout: |
| | | return "cudaErrorLaunchTimeout"; |
| | | |
| | | case cudaErrorLaunchOutOfResources: |
| | | return "cudaErrorLaunchOutOfResources"; |
| | | |
| | | case cudaErrorInvalidDeviceFunction: |
| | | return "cudaErrorInvalidDeviceFunction"; |
| | | |
| | | case cudaErrorInvalidConfiguration: |
| | | return "cudaErrorInvalidConfiguration"; |
| | | |
| | | case cudaErrorInvalidDevice: |
| | | return "cudaErrorInvalidDevice"; |
| | | |
| | | case cudaErrorInvalidValue: |
| | | return "cudaErrorInvalidValue"; |
| | | |
| | | case cudaErrorInvalidPitchValue: |
| | | return "cudaErrorInvalidPitchValue"; |
| | | |
| | | case cudaErrorInvalidSymbol: |
| | | return "cudaErrorInvalidSymbol"; |
| | | |
| | | case cudaErrorMapBufferObjectFailed: |
| | | return "cudaErrorMapBufferObjectFailed"; |
| | | |
| | | case cudaErrorUnmapBufferObjectFailed: |
| | | return "cudaErrorUnmapBufferObjectFailed"; |
| | | |
| | | case cudaErrorInvalidHostPointer: |
| | | return "cudaErrorInvalidHostPointer"; |
| | | |
| | | case cudaErrorInvalidDevicePointer: |
| | | return "cudaErrorInvalidDevicePointer"; |
| | | |
| | | case cudaErrorInvalidTexture: |
| | | return "cudaErrorInvalidTexture"; |
| | | |
| | | case cudaErrorInvalidTextureBinding: |
| | | return "cudaErrorInvalidTextureBinding"; |
| | | |
| | | case cudaErrorInvalidChannelDescriptor: |
| | | return "cudaErrorInvalidChannelDescriptor"; |
| | | |
| | | case cudaErrorInvalidMemcpyDirection: |
| | | return "cudaErrorInvalidMemcpyDirection"; |
| | | |
| | | case cudaErrorAddressOfConstant: |
| | | return "cudaErrorAddressOfConstant"; |
| | | |
| | | case cudaErrorTextureFetchFailed: |
| | | return "cudaErrorTextureFetchFailed"; |
| | | |
| | | case cudaErrorTextureNotBound: |
| | | return "cudaErrorTextureNotBound"; |
| | | |
| | | case cudaErrorSynchronizationError: |
| | | return "cudaErrorSynchronizationError"; |
| | | |
| | | case cudaErrorInvalidFilterSetting: |
| | | return "cudaErrorInvalidFilterSetting"; |
| | | |
| | | case cudaErrorInvalidNormSetting: |
| | | return "cudaErrorInvalidNormSetting"; |
| | | |
| | | case cudaErrorMixedDeviceExecution: |
| | | return "cudaErrorMixedDeviceExecution"; |
| | | |
| | | case cudaErrorCudartUnloading: |
| | | return "cudaErrorCudartUnloading"; |
| | | |
| | | case cudaErrorUnknown: |
| | | return "cudaErrorUnknown"; |
| | | |
| | | case cudaErrorNotYetImplemented: |
| | | return "cudaErrorNotYetImplemented"; |
| | | |
| | | case cudaErrorMemoryValueTooLarge: |
| | | return "cudaErrorMemoryValueTooLarge"; |
| | | |
| | | case cudaErrorInvalidResourceHandle: |
| | | return "cudaErrorInvalidResourceHandle"; |
| | | |
| | | case cudaErrorNotReady: |
| | | return "cudaErrorNotReady"; |
| | | |
| | | case cudaErrorInsufficientDriver: |
| | | return "cudaErrorInsufficientDriver"; |
| | | |
| | | case cudaErrorSetOnActiveProcess: |
| | | return "cudaErrorSetOnActiveProcess"; |
| | | |
| | | case cudaErrorInvalidSurface: |
| | | return "cudaErrorInvalidSurface"; |
| | | |
| | | case cudaErrorNoDevice: |
| | | return "cudaErrorNoDevice"; |
| | | |
| | | case cudaErrorECCUncorrectable: |
| | | return "cudaErrorECCUncorrectable"; |
| | | |
| | | case cudaErrorSharedObjectSymbolNotFound: |
| | | return "cudaErrorSharedObjectSymbolNotFound"; |
| | | |
| | | case cudaErrorSharedObjectInitFailed: |
| | | return "cudaErrorSharedObjectInitFailed"; |
| | | |
| | | case cudaErrorUnsupportedLimit: |
| | | return "cudaErrorUnsupportedLimit"; |
| | | |
| | | case cudaErrorDuplicateVariableName: |
| | | return "cudaErrorDuplicateVariableName"; |
| | | |
| | | case cudaErrorDuplicateTextureName: |
| | | return "cudaErrorDuplicateTextureName"; |
| | | |
| | | case cudaErrorDuplicateSurfaceName: |
| | | return "cudaErrorDuplicateSurfaceName"; |
| | | |
| | | case cudaErrorDevicesUnavailable: |
| | | return "cudaErrorDevicesUnavailable"; |
| | | |
| | | case cudaErrorInvalidKernelImage: |
| | | return "cudaErrorInvalidKernelImage"; |
| | | |
| | | case cudaErrorNoKernelImageForDevice: |
| | | return "cudaErrorNoKernelImageForDevice"; |
| | | |
| | | case cudaErrorIncompatibleDriverContext: |
| | | return "cudaErrorIncompatibleDriverContext"; |
| | | |
| | | case cudaErrorPeerAccessAlreadyEnabled: |
| | | return "cudaErrorPeerAccessAlreadyEnabled"; |
| | | |
| | | case cudaErrorPeerAccessNotEnabled: |
| | | return "cudaErrorPeerAccessNotEnabled"; |
| | | |
| | | case cudaErrorDeviceAlreadyInUse: |
| | | return "cudaErrorDeviceAlreadyInUse"; |
| | | |
| | | case cudaErrorProfilerDisabled: |
| | | return "cudaErrorProfilerDisabled"; |
| | | |
| | | case cudaErrorProfilerNotInitialized: |
| | | return "cudaErrorProfilerNotInitialized"; |
| | | |
| | | case cudaErrorProfilerAlreadyStarted: |
| | | return "cudaErrorProfilerAlreadyStarted"; |
| | | |
| | | case cudaErrorProfilerAlreadyStopped: |
| | | return "cudaErrorProfilerAlreadyStopped"; |
| | | |
| | | /* Since CUDA 4.0*/ |
| | | case cudaErrorAssert: |
| | | return "cudaErrorAssert"; |
| | | |
| | | case cudaErrorTooManyPeers: |
| | | return "cudaErrorTooManyPeers"; |
| | | |
| | | case cudaErrorHostMemoryAlreadyRegistered: |
| | | return "cudaErrorHostMemoryAlreadyRegistered"; |
| | | |
| | | case cudaErrorHostMemoryNotRegistered: |
| | | return "cudaErrorHostMemoryNotRegistered"; |
| | | |
| | | /* Since CUDA 5.0 */ |
| | | case cudaErrorOperatingSystem: |
| | | return "cudaErrorOperatingSystem"; |
| | | |
| | | case cudaErrorPeerAccessUnsupported: |
| | | return "cudaErrorPeerAccessUnsupported"; |
| | | |
| | | case cudaErrorLaunchMaxDepthExceeded: |
| | | return "cudaErrorLaunchMaxDepthExceeded"; |
| | | |
| | | case cudaErrorLaunchFileScopedTex: |
| | | return "cudaErrorLaunchFileScopedTex"; |
| | | |
| | | case cudaErrorLaunchFileScopedSurf: |
| | | return "cudaErrorLaunchFileScopedSurf"; |
| | | |
| | | case cudaErrorSyncDepthExceeded: |
| | | return "cudaErrorSyncDepthExceeded"; |
| | | |
| | | case cudaErrorLaunchPendingCountExceeded: |
| | | return "cudaErrorLaunchPendingCountExceeded"; |
| | | |
| | | case cudaErrorNotPermitted: |
| | | return "cudaErrorNotPermitted"; |
| | | |
| | | case cudaErrorNotSupported: |
| | | return "cudaErrorNotSupported"; |
| | | |
| | | /* Since CUDA 6.0 */ |
| | | case cudaErrorHardwareStackError: |
| | | return "cudaErrorHardwareStackError"; |
| | | |
| | | case cudaErrorIllegalInstruction: |
| | | return "cudaErrorIllegalInstruction"; |
| | | |
| | | case cudaErrorMisalignedAddress: |
| | | return "cudaErrorMisalignedAddress"; |
| | | |
| | | case cudaErrorInvalidAddressSpace: |
| | | return "cudaErrorInvalidAddressSpace"; |
| | | |
| | | case cudaErrorInvalidPc: |
| | | return "cudaErrorInvalidPc"; |
| | | |
| | | case cudaErrorIllegalAddress: |
| | | return "cudaErrorIllegalAddress"; |
| | | |
| | | /* Since CUDA 6.5*/ |
| | | case cudaErrorInvalidPtx: |
| | | return "cudaErrorInvalidPtx"; |
| | | |
| | | case cudaErrorInvalidGraphicsContext: |
| | | return "cudaErrorInvalidGraphicsContext"; |
| | | |
| | | case cudaErrorStartupFailure: |
| | | return "cudaErrorStartupFailure"; |
| | | |
| | | case cudaErrorApiFailureBase: |
| | | return "cudaErrorApiFailureBase"; |
| | | } |
| | | |
| | | return "<unknown>"; |
| | | } |
| | | #endif |
| | | |
| | | #ifdef __cuda_cuda_h__ |
| | | // CUDA Driver API errors |
| | | static const char *_cudaGetErrorEnum(CUresult error) |
| | | { |
| | | switch (error) |
| | | { |
| | | case CUDA_SUCCESS: |
| | | return "CUDA_SUCCESS"; |
| | | |
| | | case CUDA_ERROR_INVALID_VALUE: |
| | | return "CUDA_ERROR_INVALID_VALUE"; |
| | | |
| | | case CUDA_ERROR_OUT_OF_MEMORY: |
| | | return "CUDA_ERROR_OUT_OF_MEMORY"; |
| | | |
| | | case CUDA_ERROR_NOT_INITIALIZED: |
| | | return "CUDA_ERROR_NOT_INITIALIZED"; |
| | | |
| | | case CUDA_ERROR_DEINITIALIZED: |
| | | return "CUDA_ERROR_DEINITIALIZED"; |
| | | |
| | | case CUDA_ERROR_PROFILER_DISABLED: |
| | | return "CUDA_ERROR_PROFILER_DISABLED"; |
| | | |
| | | case CUDA_ERROR_PROFILER_NOT_INITIALIZED: |
| | | return "CUDA_ERROR_PROFILER_NOT_INITIALIZED"; |
| | | |
| | | case CUDA_ERROR_PROFILER_ALREADY_STARTED: |
| | | return "CUDA_ERROR_PROFILER_ALREADY_STARTED"; |
| | | |
| | | case CUDA_ERROR_PROFILER_ALREADY_STOPPED: |
| | | return "CUDA_ERROR_PROFILER_ALREADY_STOPPED"; |
| | | |
| | | case CUDA_ERROR_NO_DEVICE: |
| | | return "CUDA_ERROR_NO_DEVICE"; |
| | | |
| | | case CUDA_ERROR_INVALID_DEVICE: |
| | | return "CUDA_ERROR_INVALID_DEVICE"; |
| | | |
| | | case CUDA_ERROR_INVALID_IMAGE: |
| | | return "CUDA_ERROR_INVALID_IMAGE"; |
| | | |
| | | case CUDA_ERROR_INVALID_CONTEXT: |
| | | return "CUDA_ERROR_INVALID_CONTEXT"; |
| | | |
| | | case CUDA_ERROR_CONTEXT_ALREADY_CURRENT: |
| | | return "CUDA_ERROR_CONTEXT_ALREADY_CURRENT"; |
| | | |
| | | case CUDA_ERROR_MAP_FAILED: |
| | | return "CUDA_ERROR_MAP_FAILED"; |
| | | |
| | | case CUDA_ERROR_UNMAP_FAILED: |
| | | return "CUDA_ERROR_UNMAP_FAILED"; |
| | | |
| | | case CUDA_ERROR_ARRAY_IS_MAPPED: |
| | | return "CUDA_ERROR_ARRAY_IS_MAPPED"; |
| | | |
| | | case CUDA_ERROR_ALREADY_MAPPED: |
| | | return "CUDA_ERROR_ALREADY_MAPPED"; |
| | | |
| | | case CUDA_ERROR_NO_BINARY_FOR_GPU: |
| | | return "CUDA_ERROR_NO_BINARY_FOR_GPU"; |
| | | |
| | | case CUDA_ERROR_ALREADY_ACQUIRED: |
| | | return "CUDA_ERROR_ALREADY_ACQUIRED"; |
| | | |
| | | case CUDA_ERROR_NOT_MAPPED: |
| | | return "CUDA_ERROR_NOT_MAPPED"; |
| | | |
| | | case CUDA_ERROR_NOT_MAPPED_AS_ARRAY: |
| | | return "CUDA_ERROR_NOT_MAPPED_AS_ARRAY"; |
| | | |
| | | case CUDA_ERROR_NOT_MAPPED_AS_POINTER: |
| | | return "CUDA_ERROR_NOT_MAPPED_AS_POINTER"; |
| | | |
| | | case CUDA_ERROR_ECC_UNCORRECTABLE: |
| | | return "CUDA_ERROR_ECC_UNCORRECTABLE"; |
| | | |
| | | case CUDA_ERROR_UNSUPPORTED_LIMIT: |
| | | return "CUDA_ERROR_UNSUPPORTED_LIMIT"; |
| | | |
| | | case CUDA_ERROR_CONTEXT_ALREADY_IN_USE: |
| | | return "CUDA_ERROR_CONTEXT_ALREADY_IN_USE"; |
| | | |
| | | case CUDA_ERROR_PEER_ACCESS_UNSUPPORTED: |
| | | return "CUDA_ERROR_PEER_ACCESS_UNSUPPORTED"; |
| | | |
| | | case CUDA_ERROR_INVALID_PTX: |
| | | return "CUDA_ERROR_INVALID_PTX"; |
| | | |
| | | case CUDA_ERROR_INVALID_GRAPHICS_CONTEXT: |
| | | return "CUDA_ERROR_INVALID_GRAPHICS_CONTEXT"; |
| | | |
| | | case CUDA_ERROR_INVALID_SOURCE: |
| | | return "CUDA_ERROR_INVALID_SOURCE"; |
| | | |
| | | case CUDA_ERROR_FILE_NOT_FOUND: |
| | | return "CUDA_ERROR_FILE_NOT_FOUND"; |
| | | |
| | | case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND: |
| | | return "CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND"; |
| | | |
| | | case CUDA_ERROR_SHARED_OBJECT_INIT_FAILED: |
| | | return "CUDA_ERROR_SHARED_OBJECT_INIT_FAILED"; |
| | | |
| | | case CUDA_ERROR_OPERATING_SYSTEM: |
| | | return "CUDA_ERROR_OPERATING_SYSTEM"; |
| | | |
| | | case CUDA_ERROR_INVALID_HANDLE: |
| | | return "CUDA_ERROR_INVALID_HANDLE"; |
| | | |
| | | case CUDA_ERROR_NOT_FOUND: |
| | | return "CUDA_ERROR_NOT_FOUND"; |
| | | |
| | | case CUDA_ERROR_NOT_READY: |
| | | return "CUDA_ERROR_NOT_READY"; |
| | | |
| | | case CUDA_ERROR_ILLEGAL_ADDRESS: |
| | | return "CUDA_ERROR_ILLEGAL_ADDRESS"; |
| | | |
| | | case CUDA_ERROR_LAUNCH_FAILED: |
| | | return "CUDA_ERROR_LAUNCH_FAILED"; |
| | | |
| | | case CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES: |
| | | return "CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES"; |
| | | |
| | | case CUDA_ERROR_LAUNCH_TIMEOUT: |
| | | return "CUDA_ERROR_LAUNCH_TIMEOUT"; |
| | | |
| | | case CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING: |
| | | return "CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING"; |
| | | |
| | | case CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED: |
| | | return "CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED"; |
| | | |
| | | case CUDA_ERROR_PEER_ACCESS_NOT_ENABLED: |
| | | return "CUDA_ERROR_PEER_ACCESS_NOT_ENABLED"; |
| | | |
| | | case CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE: |
| | | return "CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE"; |
| | | |
| | | case CUDA_ERROR_CONTEXT_IS_DESTROYED: |
| | | return "CUDA_ERROR_CONTEXT_IS_DESTROYED"; |
| | | |
| | | case CUDA_ERROR_ASSERT: |
| | | return "CUDA_ERROR_ASSERT"; |
| | | |
| | | case CUDA_ERROR_TOO_MANY_PEERS: |
| | | return "CUDA_ERROR_TOO_MANY_PEERS"; |
| | | |
| | | case CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED: |
| | | return "CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED"; |
| | | |
| | | case CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED: |
| | | return "CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED"; |
| | | |
| | | case CUDA_ERROR_HARDWARE_STACK_ERROR: |
| | | return "CUDA_ERROR_HARDWARE_STACK_ERROR"; |
| | | |
| | | case CUDA_ERROR_ILLEGAL_INSTRUCTION: |
| | | return "CUDA_ERROR_ILLEGAL_INSTRUCTION"; |
| | | |
| | | case CUDA_ERROR_MISALIGNED_ADDRESS: |
| | | return "CUDA_ERROR_MISALIGNED_ADDRESS"; |
| | | |
| | | case CUDA_ERROR_INVALID_ADDRESS_SPACE: |
| | | return "CUDA_ERROR_INVALID_ADDRESS_SPACE"; |
| | | |
| | | case CUDA_ERROR_INVALID_PC: |
| | | return "CUDA_ERROR_INVALID_PC"; |
| | | |
| | | case CUDA_ERROR_NOT_PERMITTED: |
| | | return "CUDA_ERROR_NOT_PERMITTED"; |
| | | |
| | | case CUDA_ERROR_NOT_SUPPORTED: |
| | | return "CUDA_ERROR_NOT_SUPPORTED"; |
| | | |
| | | case CUDA_ERROR_UNKNOWN: |
| | | return "CUDA_ERROR_UNKNOWN"; |
| | | } |
| | | |
| | | return "<unknown>"; |
| | | } |
| | | #endif |
| | | |
| | | #ifdef CUBLAS_API_H_ |
| | | // cuBLAS API errors |
| | | static const char *_cudaGetErrorEnum(cublasStatus_t error) |
| | | { |
| | | switch (error) |
| | | { |
| | | case CUBLAS_STATUS_SUCCESS: |
| | | return "CUBLAS_STATUS_SUCCESS"; |
| | | |
| | | case CUBLAS_STATUS_NOT_INITIALIZED: |
| | | return "CUBLAS_STATUS_NOT_INITIALIZED"; |
| | | |
| | | case CUBLAS_STATUS_ALLOC_FAILED: |
| | | return "CUBLAS_STATUS_ALLOC_FAILED"; |
| | | |
| | | case CUBLAS_STATUS_INVALID_VALUE: |
| | | return "CUBLAS_STATUS_INVALID_VALUE"; |
| | | |
| | | case CUBLAS_STATUS_ARCH_MISMATCH: |
| | | return "CUBLAS_STATUS_ARCH_MISMATCH"; |
| | | |
| | | case CUBLAS_STATUS_MAPPING_ERROR: |
| | | return "CUBLAS_STATUS_MAPPING_ERROR"; |
| | | |
| | | case CUBLAS_STATUS_EXECUTION_FAILED: |
| | | return "CUBLAS_STATUS_EXECUTION_FAILED"; |
| | | |
| | | case CUBLAS_STATUS_INTERNAL_ERROR: |
| | | return "CUBLAS_STATUS_INTERNAL_ERROR"; |
| | | |
| | | case CUBLAS_STATUS_NOT_SUPPORTED: |
| | | return "CUBLAS_STATUS_NOT_SUPPORTED"; |
| | | |
| | | case CUBLAS_STATUS_LICENSE_ERROR: |
| | | return "CUBLAS_STATUS_LICENSE_ERROR"; |
| | | } |
| | | |
| | | return "<unknown>"; |
| | | } |
| | | #endif |
| | | |
| | | #ifdef _CUFFT_H_ |
| | | // cuFFT API errors |
| | | static const char *_cudaGetErrorEnum(cufftResult error) |
| | | { |
| | | switch (error) |
| | | { |
| | | case CUFFT_SUCCESS: |
| | | return "CUFFT_SUCCESS"; |
| | | |
| | | case CUFFT_INVALID_PLAN: |
| | | return "CUFFT_INVALID_PLAN"; |
| | | |
| | | case CUFFT_ALLOC_FAILED: |
| | | return "CUFFT_ALLOC_FAILED"; |
| | | |
| | | case CUFFT_INVALID_TYPE: |
| | | return "CUFFT_INVALID_TYPE"; |
| | | |
| | | case CUFFT_INVALID_VALUE: |
| | | return "CUFFT_INVALID_VALUE"; |
| | | |
| | | case CUFFT_INTERNAL_ERROR: |
| | | return "CUFFT_INTERNAL_ERROR"; |
| | | |
| | | case CUFFT_EXEC_FAILED: |
| | | return "CUFFT_EXEC_FAILED"; |
| | | |
| | | case CUFFT_SETUP_FAILED: |
| | | return "CUFFT_SETUP_FAILED"; |
| | | |
| | | case CUFFT_INVALID_SIZE: |
| | | return "CUFFT_INVALID_SIZE"; |
| | | |
| | | case CUFFT_UNALIGNED_DATA: |
| | | return "CUFFT_UNALIGNED_DATA"; |
| | | |
| | | case CUFFT_INCOMPLETE_PARAMETER_LIST: |
| | | return "CUFFT_INCOMPLETE_PARAMETER_LIST"; |
| | | |
| | | case CUFFT_INVALID_DEVICE: |
| | | return "CUFFT_INVALID_DEVICE"; |
| | | |
| | | case CUFFT_PARSE_ERROR: |
| | | return "CUFFT_PARSE_ERROR"; |
| | | |
| | | case CUFFT_NO_WORKSPACE: |
| | | return "CUFFT_NO_WORKSPACE"; |
| | | |
| | | case CUFFT_NOT_IMPLEMENTED: |
| | | return "CUFFT_NOT_IMPLEMENTED"; |
| | | |
| | | case CUFFT_LICENSE_ERROR: |
| | | return "CUFFT_LICENSE_ERROR"; |
| | | } |
| | | |
| | | return "<unknown>"; |
| | | } |
| | | #endif |
| | | |
| | | |
| | | #ifdef CUSPARSEAPI |
| | | // cuSPARSE API errors |
| | | static const char *_cudaGetErrorEnum(cusparseStatus_t error) |
| | | { |
| | | switch (error) |
| | | { |
| | | case CUSPARSE_STATUS_SUCCESS: |
| | | return "CUSPARSE_STATUS_SUCCESS"; |
| | | |
| | | case CUSPARSE_STATUS_NOT_INITIALIZED: |
| | | return "CUSPARSE_STATUS_NOT_INITIALIZED"; |
| | | |
| | | case CUSPARSE_STATUS_ALLOC_FAILED: |
| | | return "CUSPARSE_STATUS_ALLOC_FAILED"; |
| | | |
| | | case CUSPARSE_STATUS_INVALID_VALUE: |
| | | return "CUSPARSE_STATUS_INVALID_VALUE"; |
| | | |
| | | case CUSPARSE_STATUS_ARCH_MISMATCH: |
| | | return "CUSPARSE_STATUS_ARCH_MISMATCH"; |
| | | |
| | | case CUSPARSE_STATUS_MAPPING_ERROR: |
| | | return "CUSPARSE_STATUS_MAPPING_ERROR"; |
| | | |
| | | case CUSPARSE_STATUS_EXECUTION_FAILED: |
| | | return "CUSPARSE_STATUS_EXECUTION_FAILED"; |
| | | |
| | | case CUSPARSE_STATUS_INTERNAL_ERROR: |
| | | return "CUSPARSE_STATUS_INTERNAL_ERROR"; |
| | | |
| | | case CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED: |
| | | return "CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED"; |
| | | } |
| | | |
| | | return "<unknown>"; |
| | | } |
| | | #endif |
| | | |
| | | #ifdef CUSOLVER_COMMON_H_ |
| | | //cuSOLVER API errors |
| | | static const char *_cudaGetErrorEnum(cusolverStatus_t error) |
| | | { |
| | | switch(error) |
| | | { |
| | | case CUSOLVER_STATUS_SUCCESS: |
| | | return "CUSOLVER_STATUS_SUCCESS"; |
| | | case CUSOLVER_STATUS_NOT_INITIALIZED: |
| | | return "CUSOLVER_STATUS_NOT_INITIALIZED"; |
| | | case CUSOLVER_STATUS_ALLOC_FAILED: |
| | | return "CUSOLVER_STATUS_ALLOC_FAILED"; |
| | | case CUSOLVER_STATUS_INVALID_VALUE: |
| | | return "CUSOLVER_STATUS_INVALID_VALUE"; |
| | | case CUSOLVER_STATUS_ARCH_MISMATCH: |
| | | return "CUSOLVER_STATUS_ARCH_MISMATCH"; |
| | | case CUSOLVER_STATUS_MAPPING_ERROR: |
| | | return "CUSOLVER_STATUS_MAPPING_ERROR"; |
| | | case CUSOLVER_STATUS_EXECUTION_FAILED: |
| | | return "CUSOLVER_STATUS_EXECUTION_FAILED"; |
| | | case CUSOLVER_STATUS_INTERNAL_ERROR: |
| | | return "CUSOLVER_STATUS_INTERNAL_ERROR"; |
| | | case CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED: |
| | | return "CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED"; |
| | | case CUSOLVER_STATUS_NOT_SUPPORTED : |
| | | return "CUSOLVER_STATUS_NOT_SUPPORTED "; |
| | | case CUSOLVER_STATUS_ZERO_PIVOT: |
| | | return "CUSOLVER_STATUS_ZERO_PIVOT"; |
| | | case CUSOLVER_STATUS_INVALID_LICENSE: |
| | | return "CUSOLVER_STATUS_INVALID_LICENSE"; |
| | | } |
| | | |
| | | return "<unknown>"; |
| | | |
| | | } |
| | | #endif |
| | | |
| | | #ifdef CURAND_H_ |
| | | // cuRAND API errors |
| | | static const char *_cudaGetErrorEnum(curandStatus_t error) |
| | | { |
| | | switch (error) |
| | | { |
| | | case CURAND_STATUS_SUCCESS: |
| | | return "CURAND_STATUS_SUCCESS"; |
| | | |
| | | case CURAND_STATUS_VERSION_MISMATCH: |
| | | return "CURAND_STATUS_VERSION_MISMATCH"; |
| | | |
| | | case CURAND_STATUS_NOT_INITIALIZED: |
| | | return "CURAND_STATUS_NOT_INITIALIZED"; |
| | | |
| | | case CURAND_STATUS_ALLOCATION_FAILED: |
| | | return "CURAND_STATUS_ALLOCATION_FAILED"; |
| | | |
| | | case CURAND_STATUS_TYPE_ERROR: |
| | | return "CURAND_STATUS_TYPE_ERROR"; |
| | | |
| | | case CURAND_STATUS_OUT_OF_RANGE: |
| | | return "CURAND_STATUS_OUT_OF_RANGE"; |
| | | |
| | | case CURAND_STATUS_LENGTH_NOT_MULTIPLE: |
| | | return "CURAND_STATUS_LENGTH_NOT_MULTIPLE"; |
| | | |
| | | case CURAND_STATUS_DOUBLE_PRECISION_REQUIRED: |
| | | return "CURAND_STATUS_DOUBLE_PRECISION_REQUIRED"; |
| | | |
| | | case CURAND_STATUS_LAUNCH_FAILURE: |
| | | return "CURAND_STATUS_LAUNCH_FAILURE"; |
| | | |
| | | case CURAND_STATUS_PREEXISTING_FAILURE: |
| | | return "CURAND_STATUS_PREEXISTING_FAILURE"; |
| | | |
| | | case CURAND_STATUS_INITIALIZATION_FAILED: |
| | | return "CURAND_STATUS_INITIALIZATION_FAILED"; |
| | | |
| | | case CURAND_STATUS_ARCH_MISMATCH: |
| | | return "CURAND_STATUS_ARCH_MISMATCH"; |
| | | |
| | | case CURAND_STATUS_INTERNAL_ERROR: |
| | | return "CURAND_STATUS_INTERNAL_ERROR"; |
| | | } |
| | | |
| | | return "<unknown>"; |
| | | } |
| | | #endif |
| | | |
| | | #ifdef NV_NPPIDEFS_H |
| | | // NPP API errors |
| | | static const char *_cudaGetErrorEnum(NppStatus error) |
| | | { |
| | | switch (error) |
| | | { |
| | | case NPP_NOT_SUPPORTED_MODE_ERROR: |
| | | return "NPP_NOT_SUPPORTED_MODE_ERROR"; |
| | | |
| | | case NPP_ROUND_MODE_NOT_SUPPORTED_ERROR: |
| | | return "NPP_ROUND_MODE_NOT_SUPPORTED_ERROR"; |
| | | |
| | | case NPP_RESIZE_NO_OPERATION_ERROR: |
| | | return "NPP_RESIZE_NO_OPERATION_ERROR"; |
| | | |
| | | case NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY: |
| | | return "NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY"; |
| | | |
| | | #if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) <= 0x5000 |
| | | |
| | | case NPP_BAD_ARG_ERROR: |
| | | return "NPP_BAD_ARGUMENT_ERROR"; |
| | | |
| | | case NPP_COEFF_ERROR: |
| | | return "NPP_COEFFICIENT_ERROR"; |
| | | |
| | | case NPP_RECT_ERROR: |
| | | return "NPP_RECTANGLE_ERROR"; |
| | | |
| | | case NPP_QUAD_ERROR: |
| | | return "NPP_QUADRANGLE_ERROR"; |
| | | |
| | | case NPP_MEM_ALLOC_ERR: |
| | | return "NPP_MEMORY_ALLOCATION_ERROR"; |
| | | |
| | | case NPP_HISTO_NUMBER_OF_LEVELS_ERROR: |
| | | return "NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR"; |
| | | |
| | | case NPP_INVALID_INPUT: |
| | | return "NPP_INVALID_INPUT"; |
| | | |
| | | case NPP_POINTER_ERROR: |
| | | return "NPP_POINTER_ERROR"; |
| | | |
| | | case NPP_WARNING: |
| | | return "NPP_WARNING"; |
| | | |
| | | case NPP_ODD_ROI_WARNING: |
| | | return "NPP_ODD_ROI_WARNING"; |
| | | #else |
| | | |
| | | // These are for CUDA 5.5 or higher |
| | | case NPP_BAD_ARGUMENT_ERROR: |
| | | return "NPP_BAD_ARGUMENT_ERROR"; |
| | | |
| | | case NPP_COEFFICIENT_ERROR: |
| | | return "NPP_COEFFICIENT_ERROR"; |
| | | |
| | | case NPP_RECTANGLE_ERROR: |
| | | return "NPP_RECTANGLE_ERROR"; |
| | | |
| | | case NPP_QUADRANGLE_ERROR: |
| | | return "NPP_QUADRANGLE_ERROR"; |
| | | |
| | | case NPP_MEMORY_ALLOCATION_ERR: |
| | | return "NPP_MEMORY_ALLOCATION_ERROR"; |
| | | |
| | | case NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR: |
| | | return "NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR"; |
| | | |
| | | case NPP_INVALID_HOST_POINTER_ERROR: |
| | | return "NPP_INVALID_HOST_POINTER_ERROR"; |
| | | |
| | | case NPP_INVALID_DEVICE_POINTER_ERROR: |
| | | return "NPP_INVALID_DEVICE_POINTER_ERROR"; |
| | | #endif |
| | | |
| | | case NPP_LUT_NUMBER_OF_LEVELS_ERROR: |
| | | return "NPP_LUT_NUMBER_OF_LEVELS_ERROR"; |
| | | |
| | | case NPP_TEXTURE_BIND_ERROR: |
| | | return "NPP_TEXTURE_BIND_ERROR"; |
| | | |
| | | case NPP_WRONG_INTERSECTION_ROI_ERROR: |
| | | return "NPP_WRONG_INTERSECTION_ROI_ERROR"; |
| | | |
| | | case NPP_NOT_EVEN_STEP_ERROR: |
| | | return "NPP_NOT_EVEN_STEP_ERROR"; |
| | | |
| | | case NPP_INTERPOLATION_ERROR: |
| | | return "NPP_INTERPOLATION_ERROR"; |
| | | |
| | | case NPP_RESIZE_FACTOR_ERROR: |
| | | return "NPP_RESIZE_FACTOR_ERROR"; |
| | | |
| | | case NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR: |
| | | return "NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR"; |
| | | |
| | | |
| | | #if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) <= 0x5000 |
| | | |
| | | case NPP_MEMFREE_ERR: |
| | | return "NPP_MEMFREE_ERR"; |
| | | |
| | | case NPP_MEMSET_ERR: |
| | | return "NPP_MEMSET_ERR"; |
| | | |
| | | case NPP_MEMCPY_ERR: |
| | | return "NPP_MEMCPY_ERROR"; |
| | | |
| | | case NPP_MIRROR_FLIP_ERR: |
| | | return "NPP_MIRROR_FLIP_ERR"; |
| | | #else |
| | | |
| | | case NPP_MEMFREE_ERROR: |
| | | return "NPP_MEMFREE_ERROR"; |
| | | |
| | | case NPP_MEMSET_ERROR: |
| | | return "NPP_MEMSET_ERROR"; |
| | | |
| | | case NPP_MEMCPY_ERROR: |
| | | return "NPP_MEMCPY_ERROR"; |
| | | |
| | | case NPP_MIRROR_FLIP_ERROR: |
| | | return "NPP_MIRROR_FLIP_ERROR"; |
| | | #endif |
| | | |
| | | case NPP_ALIGNMENT_ERROR: |
| | | return "NPP_ALIGNMENT_ERROR"; |
| | | |
| | | case NPP_STEP_ERROR: |
| | | return "NPP_STEP_ERROR"; |
| | | |
| | | case NPP_SIZE_ERROR: |
| | | return "NPP_SIZE_ERROR"; |
| | | |
| | | case NPP_NULL_POINTER_ERROR: |
| | | return "NPP_NULL_POINTER_ERROR"; |
| | | |
| | | case NPP_CUDA_KERNEL_EXECUTION_ERROR: |
| | | return "NPP_CUDA_KERNEL_EXECUTION_ERROR"; |
| | | |
| | | case NPP_NOT_IMPLEMENTED_ERROR: |
| | | return "NPP_NOT_IMPLEMENTED_ERROR"; |
| | | |
| | | case NPP_ERROR: |
| | | return "NPP_ERROR"; |
| | | |
| | | case NPP_SUCCESS: |
| | | return "NPP_SUCCESS"; |
| | | |
| | | case NPP_WRONG_INTERSECTION_QUAD_WARNING: |
| | | return "NPP_WRONG_INTERSECTION_QUAD_WARNING"; |
| | | |
| | | case NPP_MISALIGNED_DST_ROI_WARNING: |
| | | return "NPP_MISALIGNED_DST_ROI_WARNING"; |
| | | |
| | | case NPP_AFFINE_QUAD_INCORRECT_WARNING: |
| | | return "NPP_AFFINE_QUAD_INCORRECT_WARNING"; |
| | | |
| | | case NPP_DOUBLE_SIZE_WARNING: |
| | | return "NPP_DOUBLE_SIZE_WARNING"; |
| | | |
| | | case NPP_WRONG_INTERSECTION_ROI_WARNING: |
| | | return "NPP_WRONG_INTERSECTION_ROI_WARNING"; |
| | | |
| | | #if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) >= 0x6000 |
| | | /* These are 6.0 or higher */ |
| | | case NPP_LUT_PALETTE_BITSIZE_ERROR: |
| | | return "NPP_LUT_PALETTE_BITSIZE_ERROR"; |
| | | |
| | | case NPP_ZC_MODE_NOT_SUPPORTED_ERROR: |
| | | return "NPP_ZC_MODE_NOT_SUPPORTED_ERROR"; |
| | | |
| | | case NPP_QUALITY_INDEX_ERROR: |
| | | return "NPP_QUALITY_INDEX_ERROR"; |
| | | |
| | | case NPP_CHANNEL_ORDER_ERROR: |
| | | return "NPP_CHANNEL_ORDER_ERROR"; |
| | | |
| | | case NPP_ZERO_MASK_VALUE_ERROR: |
| | | return "NPP_ZERO_MASK_VALUE_ERROR"; |
| | | |
| | | case NPP_NUMBER_OF_CHANNELS_ERROR: |
| | | return "NPP_NUMBER_OF_CHANNELS_ERROR"; |
| | | |
| | | case NPP_COI_ERROR: |
| | | return "NPP_COI_ERROR"; |
| | | |
| | | case NPP_DIVISOR_ERROR: |
| | | return "NPP_DIVISOR_ERROR"; |
| | | |
| | | case NPP_CHANNEL_ERROR: |
| | | return "NPP_CHANNEL_ERROR"; |
| | | |
| | | case NPP_STRIDE_ERROR: |
| | | return "NPP_STRIDE_ERROR"; |
| | | |
| | | case NPP_ANCHOR_ERROR: |
| | | return "NPP_ANCHOR_ERROR"; |
| | | |
| | | case NPP_MASK_SIZE_ERROR: |
| | | return "NPP_MASK_SIZE_ERROR"; |
| | | |
| | | case NPP_MOMENT_00_ZERO_ERROR: |
| | | return "NPP_MOMENT_00_ZERO_ERROR"; |
| | | |
| | | case NPP_THRESHOLD_NEGATIVE_LEVEL_ERROR: |
| | | return "NPP_THRESHOLD_NEGATIVE_LEVEL_ERROR"; |
| | | |
| | | case NPP_THRESHOLD_ERROR: |
| | | return "NPP_THRESHOLD_ERROR"; |
| | | |
| | | case NPP_CONTEXT_MATCH_ERROR: |
| | | return "NPP_CONTEXT_MATCH_ERROR"; |
| | | |
| | | case NPP_FFT_FLAG_ERROR: |
| | | return "NPP_FFT_FLAG_ERROR"; |
| | | |
| | | case NPP_FFT_ORDER_ERROR: |
| | | return "NPP_FFT_ORDER_ERROR"; |
| | | |
| | | case NPP_SCALE_RANGE_ERROR: |
| | | return "NPP_SCALE_RANGE_ERROR"; |
| | | |
| | | case NPP_DATA_TYPE_ERROR: |
| | | return "NPP_DATA_TYPE_ERROR"; |
| | | |
| | | case NPP_OUT_OFF_RANGE_ERROR: |
| | | return "NPP_OUT_OFF_RANGE_ERROR"; |
| | | |
| | | case NPP_DIVIDE_BY_ZERO_ERROR: |
| | | return "NPP_DIVIDE_BY_ZERO_ERROR"; |
| | | |
| | | case NPP_RANGE_ERROR: |
| | | return "NPP_RANGE_ERROR"; |
| | | |
| | | case NPP_NO_MEMORY_ERROR: |
| | | return "NPP_NO_MEMORY_ERROR"; |
| | | |
| | | case NPP_ERROR_RESERVED: |
| | | return "NPP_ERROR_RESERVED"; |
| | | |
| | | case NPP_NO_OPERATION_WARNING: |
| | | return "NPP_NO_OPERATION_WARNING"; |
| | | |
| | | case NPP_DIVIDE_BY_ZERO_WARNING: |
| | | return "NPP_DIVIDE_BY_ZERO_WARNING"; |
| | | #endif |
| | | |
| | | } |
| | | |
| | | return "<unknown>"; |
| | | } |
| | | #endif |
| | | |
| | | #ifdef __DRIVER_TYPES_H__ |
| | | #ifndef DEVICE_RESET |
| | | #define DEVICE_RESET cudaDeviceReset(); |
| | | #endif |
| | | #else |
| | | #ifndef DEVICE_RESET |
| | | #define DEVICE_RESET |
| | | #endif |
| | | #endif |
| | | |
| | | template< typename T > |
| | | void check(T result, char const *const func, const char *const file, int const line) |
| | | { |
| | | if (result) |
| | | { |
| | | fprintf(stderr, "CUDA error at %s:%d code=%d(%s) \"%s\" \n", |
| | | file, line, static_cast<unsigned int>(result), _cudaGetErrorEnum(result), func); |
| | | DEVICE_RESET |
| | | // Make sure we call CUDA Device Reset before exiting |
| | | exit(EXIT_FAILURE); |
| | | } |
| | | } |
| | | |
| | | #ifdef __DRIVER_TYPES_H__ |
| | | // This will output the proper CUDA error strings in the event that a CUDA host call returns an error |
| | | #define checkCudaErrors(val) check ( (val), #val, __FILE__, __LINE__ ) |
| | | |
| | | // This will output the proper error string when calling cudaGetLastError |
| | | #define getLastCudaError(msg) __getLastCudaError (msg, __FILE__, __LINE__) |
| | | |
| | | inline void __getLastCudaError(const char *errorMessage, const char *file, const int line) |
| | | { |
| | | cudaError_t err = cudaGetLastError(); |
| | | |
| | | if (cudaSuccess != err) |
| | | { |
| | | fprintf(stderr, "%s(%i) : getLastCudaError() CUDA error : %s : (%d) %s.\n", |
| | | file, line, errorMessage, (int)err, cudaGetErrorString(err)); |
| | | DEVICE_RESET |
| | | exit(EXIT_FAILURE); |
| | | } |
| | | } |
| | | #endif |
| | | |
| | | #ifndef MAX |
| | | #define MAX(a,b) (a > b ? a : b) |
| | | #endif |
| | | |
| | | // Float To Int conversion |
| | | inline int ftoi(float value) |
| | | { |
| | | return (value >= 0 ? (int)(value + 0.5) : (int)(value - 0.5)); |
| | | } |
| | | |
| | | // Beginning of GPU Architecture definitions |
| | | inline int _ConvertSMVer2Cores(int major, int minor) |
| | | { |
| | | // Defines for GPU Architecture types (using the SM version to determine the # of cores per SM |
| | | typedef struct |
| | | { |
| | | int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version |
| | | int Cores; |
| | | } sSMtoCores; |
| | | |
| | | sSMtoCores nGpuArchCoresPerSM[] = |
| | | { |
| | | { 0x20, 32 }, // Fermi Generation (SM 2.0) GF100 class |
| | | { 0x21, 48 }, // Fermi Generation (SM 2.1) GF10x class |
| | | { 0x30, 192}, // Kepler Generation (SM 3.0) GK10x class |
| | | { 0x32, 192}, // Kepler Generation (SM 3.2) GK10x class |
| | | { 0x35, 192}, // Kepler Generation (SM 3.5) GK11x class |
| | | { 0x37, 192}, // Kepler Generation (SM 3.7) GK21x class |
| | | { 0x50, 128}, // Maxwell Generation (SM 5.0) GM10x class |
| | | { 0x52, 128}, // Maxwell Generation (SM 5.2) GM20x class |
| | | { -1, -1 } |
| | | }; |
| | | |
| | | int index = 0; |
| | | |
| | | while (nGpuArchCoresPerSM[index].SM != -1) |
| | | { |
| | | if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor)) |
| | | { |
| | | return nGpuArchCoresPerSM[index].Cores; |
| | | } |
| | | |
| | | index++; |
| | | } |
| | | |
| | | // If we don't find the values, we default use the previous one to run properly |
| | | printf("MapSMtoCores for SM %d.%d is undefined. Default to use %d Cores/SM\n", major, minor, nGpuArchCoresPerSM[index-1].Cores); |
| | | return nGpuArchCoresPerSM[index-1].Cores; |
| | | } |
| | | // end of GPU Architecture definitions |
| | | |
| | | #ifdef __CUDA_RUNTIME_H__ |
| | | // General GPU Device CUDA Initialization |
| | | inline int gpuDeviceInit(int devID) |
| | | { |
| | | int device_count; |
| | | checkCudaErrors(cudaGetDeviceCount(&device_count)); |
| | | |
| | | if (device_count == 0) |
| | | { |
| | | fprintf(stderr, "gpuDeviceInit() CUDA error: no devices supporting CUDA.\n"); |
| | | exit(EXIT_FAILURE); |
| | | } |
| | | |
| | | if (devID < 0) |
| | | { |
| | | devID = 0; |
| | | } |
| | | |
| | | if (devID > device_count-1) |
| | | { |
| | | fprintf(stderr, "\n"); |
| | | fprintf(stderr, ">> %d CUDA capable GPU device(s) detected. <<\n", device_count); |
| | | fprintf(stderr, ">> gpuDeviceInit (-device=%d) is not a valid GPU device. <<\n", devID); |
| | | fprintf(stderr, "\n"); |
| | | return -devID; |
| | | } |
| | | |
| | | cudaDeviceProp deviceProp; |
| | | checkCudaErrors(cudaGetDeviceProperties(&deviceProp, devID)); |
| | | |
| | | if (deviceProp.computeMode == cudaComputeModeProhibited) |
| | | { |
| | | fprintf(stderr, "Error: device is running in <Compute Mode Prohibited>, no threads can use ::cudaSetDevice().\n"); |
| | | return -1; |
| | | } |
| | | |
| | | if (deviceProp.major < 1) |
| | | { |
| | | fprintf(stderr, "gpuDeviceInit(): GPU device does not support CUDA.\n"); |
| | | exit(EXIT_FAILURE); |
| | | } |
| | | |
| | | checkCudaErrors(cudaSetDevice(devID)); |
| | | printf("gpuDeviceInit() CUDA Device [%d]: \"%s\n", devID, deviceProp.name); |
| | | |
| | | return devID; |
| | | } |
| | | |
| | | // This function returns the best GPU (with maximum GFLOPS) |
| | | inline int gpuGetMaxGflopsDeviceId() |
| | | { |
| | | int current_device = 0, sm_per_multiproc = 0; |
| | | int max_perf_device = 0; |
| | | int device_count = 0, best_SM_arch = 0; |
| | | int devices_prohibited = 0; |
| | | |
| | | unsigned long long max_compute_perf = 0; |
| | | cudaDeviceProp deviceProp; |
| | | cudaGetDeviceCount(&device_count); |
| | | |
| | | checkCudaErrors(cudaGetDeviceCount(&device_count)); |
| | | |
| | | if (device_count == 0) |
| | | { |
| | | fprintf(stderr, "gpuGetMaxGflopsDeviceId() CUDA error: no devices supporting CUDA.\n"); |
| | | exit(EXIT_FAILURE); |
| | | } |
| | | |
| | | // Find the best major SM Architecture GPU device |
| | | while (current_device < device_count) |
| | | { |
| | | cudaGetDeviceProperties(&deviceProp, current_device); |
| | | |
| | | // If this GPU is not running on Compute Mode prohibited, then we can add it to the list |
| | | if (deviceProp.computeMode != cudaComputeModeProhibited) |
| | | { |
| | | if (deviceProp.major > 0 && deviceProp.major < 9999) |
| | | { |
| | | best_SM_arch = MAX(best_SM_arch, deviceProp.major); |
| | | } |
| | | } |
| | | else |
| | | { |
| | | devices_prohibited++; |
| | | } |
| | | |
| | | current_device++; |
| | | } |
| | | |
| | | if (devices_prohibited == device_count) |
| | | { |
| | | fprintf(stderr, "gpuGetMaxGflopsDeviceId() CUDA error: all devices have compute mode prohibited.\n"); |
| | | exit(EXIT_FAILURE); |
| | | } |
| | | |
| | | // Find the best CUDA capable GPU device |
| | | current_device = 0; |
| | | |
| | | while (current_device < device_count) |
| | | { |
| | | cudaGetDeviceProperties(&deviceProp, current_device); |
| | | |
| | | // If this GPU is not running on Compute Mode prohibited, then we can add it to the list |
| | | if (deviceProp.computeMode != cudaComputeModeProhibited) |
| | | { |
| | | if (deviceProp.major == 9999 && deviceProp.minor == 9999) |
| | | { |
| | | sm_per_multiproc = 1; |
| | | } |
| | | else |
| | | { |
| | | sm_per_multiproc = _ConvertSMVer2Cores(deviceProp.major, deviceProp.minor); |
| | | } |
| | | |
| | | unsigned long long compute_perf = (unsigned long long) deviceProp.multiProcessorCount * sm_per_multiproc * deviceProp.clockRate; |
| | | |
| | | if (compute_perf > max_compute_perf) |
| | | { |
| | | // If we find GPU with SM major > 2, search only these |
| | | if (best_SM_arch > 2) |
| | | { |
| | | // If our device==dest_SM_arch, choose this, or else pass |
| | | if (deviceProp.major == best_SM_arch) |
| | | { |
| | | max_compute_perf = compute_perf; |
| | | max_perf_device = current_device; |
| | | } |
| | | } |
| | | else |
| | | { |
| | | max_compute_perf = compute_perf; |
| | | max_perf_device = current_device; |
| | | } |
| | | } |
| | | } |
| | | |
| | | ++current_device; |
| | | } |
| | | |
| | | return max_perf_device; |
| | | } |
| | | |
| | | |
| | | // Initialization code to find the best CUDA Device |
| | | inline int findCudaDevice(int argc, const char **argv) |
| | | { |
| | | cudaDeviceProp deviceProp; |
| | | int devID = 0; |
| | | |
| | | // If the command-line has a device number specified, use it |
| | | if (checkCmdLineFlag(argc, argv, "device")) |
| | | { |
| | | devID = getCmdLineArgumentInt(argc, argv, "device="); |
| | | |
| | | if (devID < 0) |
| | | { |
| | | printf("Invalid command line parameter\n "); |
| | | exit(EXIT_FAILURE); |
| | | } |
| | | else |
| | | { |
| | | devID = gpuDeviceInit(devID); |
| | | |
| | | if (devID < 0) |
| | | { |
| | | printf("exiting...\n"); |
| | | exit(EXIT_FAILURE); |
| | | } |
| | | } |
| | | } |
| | | else |
| | | { |
| | | // Otherwise pick the device with highest Gflops/s |
| | | devID = gpuGetMaxGflopsDeviceId(); |
| | | checkCudaErrors(cudaSetDevice(devID)); |
| | | checkCudaErrors(cudaGetDeviceProperties(&deviceProp, devID)); |
| | | printf("GPU Device %d: \"%s\" with compute capability %d.%d\n\n", devID, deviceProp.name, deviceProp.major, deviceProp.minor); |
| | | } |
| | | |
| | | return devID; |
| | | } |
| | | |
| | | // General check for CUDA GPU SM Capabilities |
| | | inline bool checkCudaCapabilities(int major_version, int minor_version) |
| | | { |
| | | cudaDeviceProp deviceProp; |
| | | deviceProp.major = 0; |
| | | deviceProp.minor = 0; |
| | | int dev; |
| | | |
| | | checkCudaErrors(cudaGetDevice(&dev)); |
| | | checkCudaErrors(cudaGetDeviceProperties(&deviceProp, dev)); |
| | | |
| | | if ((deviceProp.major > major_version) || |
| | | (deviceProp.major == major_version && deviceProp.minor >= minor_version)) |
| | | { |
| | | printf(" Device %d: <%16s >, Compute SM %d.%d detected\n", dev, deviceProp.name, deviceProp.major, deviceProp.minor); |
| | | return true; |
| | | } |
| | | else |
| | | { |
| | | printf(" No GPU device was found that can support CUDA compute capability %d.%d.\n", major_version, minor_version); |
| | | return false; |
| | | } |
| | | } |
| | | #endif |
| | | |
| | | // end of CUDA Helper Functions |
| | | |
| | | |
| | | #endif |
New file |
| | |
| | | /** |
| | | * Copyright 1993-2013 NVIDIA Corporation. All rights reserved. |
| | | * |
| | | * Please refer to the NVIDIA end user license agreement (EULA) associated |
| | | * with this source code for terms and conditions that govern your use of |
| | | * this software. Any use, reproduction, disclosure, or distribution of |
| | | * this software and related documentation outside the terms of the EULA |
| | | * is strictly prohibited. |
| | | * |
| | | */ |
| | | |
| | | // These are helper functions for the SDK samples (string parsing, timers, etc) |
| | | #ifndef STRING_HELPER_H |
| | | #define STRING_HELPER_H |
| | | |
| | | #include <stdio.h> |
| | | #include <stdlib.h> |
| | | #include <fstream> |
| | | #include <string> |
| | | |
| | | #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) |
| | | #ifndef _CRT_SECURE_NO_DEPRECATE |
| | | #define _CRT_SECURE_NO_DEPRECATE |
| | | #endif |
| | | #ifndef STRCASECMP |
| | | #define STRCASECMP _stricmp |
| | | #endif |
| | | #ifndef STRNCASECMP |
| | | #define STRNCASECMP _strnicmp |
| | | #endif |
| | | #ifndef STRCPY |
| | | #define STRCPY(sFilePath, nLength, sPath) strcpy_s(sFilePath, nLength, sPath) |
| | | #endif |
| | | |
| | | #ifndef FOPEN |
| | | #define FOPEN(fHandle,filename,mode) fopen_s(&fHandle, filename, mode) |
| | | #endif |
| | | #ifndef FOPEN_FAIL |
| | | #define FOPEN_FAIL(result) (result != 0) |
| | | #endif |
| | | #ifndef SSCANF |
| | | #define SSCANF sscanf_s |
| | | #endif |
| | | #ifndef SPRINTF |
| | | #define SPRINTF sprintf_s |
| | | #endif |
| | | #else // Linux Includes |
| | | #include <string.h> |
| | | #include <strings.h> |
| | | |
| | | #ifndef STRCASECMP |
| | | #define STRCASECMP strcasecmp |
| | | #endif |
| | | #ifndef STRNCASECMP |
| | | #define STRNCASECMP strncasecmp |
| | | #endif |
| | | #ifndef STRCPY |
| | | #define STRCPY(sFilePath, nLength, sPath) strcpy(sFilePath, sPath) |
| | | #endif |
| | | |
| | | #ifndef FOPEN |
| | | #define FOPEN(fHandle,filename,mode) (fHandle = fopen(filename, mode)) |
| | | #endif |
| | | #ifndef FOPEN_FAIL |
| | | #define FOPEN_FAIL(result) (result == NULL) |
| | | #endif |
| | | #ifndef SSCANF |
| | | #define SSCANF sscanf |
| | | #endif |
| | | #ifndef SPRINTF |
| | | #define SPRINTF sprintf |
| | | #endif |
| | | #endif |
| | | |
| | | #ifndef EXIT_WAIVED |
| | | #define EXIT_WAIVED 2 |
| | | #endif |
| | | |
| | | // CUDA Utility Helper Functions |
| | | inline int stringRemoveDelimiter(char delimiter, const char *string) |
| | | { |
| | | int string_start = 0; |
| | | |
| | | while (string[string_start] == delimiter) |
| | | { |
| | | string_start++; |
| | | } |
| | | |
| | | if (string_start >= (int)strlen(string)-1) |
| | | { |
| | | return 0; |
| | | } |
| | | |
| | | return string_start; |
| | | } |
| | | |
| | | inline int getFileExtension(char *filename, char **extension) |
| | | { |
| | | int string_length = (int)strlen(filename); |
| | | |
| | | while (filename[string_length--] != '.') |
| | | { |
| | | if (string_length == 0) |
| | | break; |
| | | } |
| | | |
| | | if (string_length > 0) string_length += 2; |
| | | |
| | | if (string_length == 0) |
| | | *extension = NULL; |
| | | else |
| | | *extension = &filename[string_length]; |
| | | |
| | | return string_length; |
| | | } |
| | | |
| | | |
| | | inline bool checkCmdLineFlag(const int argc, const char **argv, const char *string_ref) |
| | | { |
| | | bool bFound = false; |
| | | |
| | | if (argc >= 1) |
| | | { |
| | | for (int i=1; i < argc; i++) |
| | | { |
| | | int string_start = stringRemoveDelimiter('-', argv[i]); |
| | | const char *string_argv = &argv[i][string_start]; |
| | | |
| | | const char *equal_pos = strchr(string_argv, '='); |
| | | int argv_length = (int)(equal_pos == 0 ? strlen(string_argv) : equal_pos - string_argv); |
| | | |
| | | int length = (int)strlen(string_ref); |
| | | |
| | | if (length == argv_length && !STRNCASECMP(string_argv, string_ref, length)) |
| | | { |
| | | bFound = true; |
| | | continue; |
| | | } |
| | | } |
| | | } |
| | | |
| | | return bFound; |
| | | } |
| | | |
| | | // This function wraps the CUDA Driver API into a template function |
| | | template <class T> |
| | | inline bool getCmdLineArgumentValue(const int argc, const char **argv, const char *string_ref, T *value) |
| | | { |
| | | bool bFound = false; |
| | | |
| | | if (argc >= 1) |
| | | { |
| | | for (int i=1; i < argc; i++) |
| | | { |
| | | int string_start = stringRemoveDelimiter('-', argv[i]); |
| | | const char *string_argv = &argv[i][string_start]; |
| | | int length = (int)strlen(string_ref); |
| | | |
| | | if (!STRNCASECMP(string_argv, string_ref, length)) |
| | | { |
| | | if (length+1 <= (int)strlen(string_argv)) |
| | | { |
| | | int auto_inc = (string_argv[length] == '=') ? 1 : 0; |
| | | *value = (T)atoi(&string_argv[length + auto_inc]); |
| | | } |
| | | |
| | | bFound = true; |
| | | i=argc; |
| | | } |
| | | } |
| | | } |
| | | |
| | | return bFound; |
| | | } |
| | | |
| | | inline int getCmdLineArgumentInt(const int argc, const char **argv, const char *string_ref) |
| | | { |
| | | bool bFound = false; |
| | | int value = -1; |
| | | |
| | | if (argc >= 1) |
| | | { |
| | | for (int i=1; i < argc; i++) |
| | | { |
| | | int string_start = stringRemoveDelimiter('-', argv[i]); |
| | | const char *string_argv = &argv[i][string_start]; |
| | | int length = (int)strlen(string_ref); |
| | | |
| | | if (!STRNCASECMP(string_argv, string_ref, length)) |
| | | { |
| | | if (length+1 <= (int)strlen(string_argv)) |
| | | { |
| | | int auto_inc = (string_argv[length] == '=') ? 1 : 0; |
| | | value = atoi(&string_argv[length + auto_inc]); |
| | | } |
| | | else |
| | | { |
| | | value = 0; |
| | | } |
| | | |
| | | bFound = true; |
| | | continue; |
| | | } |
| | | } |
| | | } |
| | | |
| | | if (bFound) |
| | | { |
| | | return value; |
| | | } |
| | | else |
| | | { |
| | | return 0; |
| | | } |
| | | } |
| | | |
| | | inline float getCmdLineArgumentFloat(const int argc, const char **argv, const char *string_ref) |
| | | { |
| | | bool bFound = false; |
| | | float value = -1; |
| | | |
| | | if (argc >= 1) |
| | | { |
| | | for (int i=1; i < argc; i++) |
| | | { |
| | | int string_start = stringRemoveDelimiter('-', argv[i]); |
| | | const char *string_argv = &argv[i][string_start]; |
| | | int length = (int)strlen(string_ref); |
| | | |
| | | if (!STRNCASECMP(string_argv, string_ref, length)) |
| | | { |
| | | if (length+1 <= (int)strlen(string_argv)) |
| | | { |
| | | int auto_inc = (string_argv[length] == '=') ? 1 : 0; |
| | | value = (float)atof(&string_argv[length + auto_inc]); |
| | | } |
| | | else |
| | | { |
| | | value = 0.f; |
| | | } |
| | | |
| | | bFound = true; |
| | | continue; |
| | | } |
| | | } |
| | | } |
| | | |
| | | if (bFound) |
| | | { |
| | | return value; |
| | | } |
| | | else |
| | | { |
| | | return 0; |
| | | } |
| | | } |
| | | |
| | | inline bool getCmdLineArgumentString(const int argc, const char **argv, |
| | | const char *string_ref, char **string_retval) |
| | | { |
| | | bool bFound = false; |
| | | |
| | | if (argc >= 1) |
| | | { |
| | | for (int i=1; i < argc; i++) |
| | | { |
| | | int string_start = stringRemoveDelimiter('-', argv[i]); |
| | | char *string_argv = (char *)&argv[i][string_start]; |
| | | int length = (int)strlen(string_ref); |
| | | |
| | | if (!STRNCASECMP(string_argv, string_ref, length)) |
| | | { |
| | | *string_retval = &string_argv[length+1]; |
| | | bFound = true; |
| | | continue; |
| | | } |
| | | } |
| | | } |
| | | |
| | | if (!bFound) |
| | | { |
| | | *string_retval = NULL; |
| | | } |
| | | |
| | | return bFound; |
| | | } |
| | | |
| | | ////////////////////////////////////////////////////////////////////////////// |
| | | //! Find the path for a file assuming that |
| | | //! files are found in the searchPath. |
| | | //! |
| | | //! @return the path if succeeded, otherwise 0 |
| | | //! @param filename name of the file |
| | | //! @param executable_path optional absolute path of the executable |
| | | ////////////////////////////////////////////////////////////////////////////// |
| | | inline char *sdkFindFilePath(const char *filename, const char *executable_path) |
| | | { |
| | | // <executable_name> defines a variable that is replaced with the name of the executable |
| | | |
| | | // Typical relative search paths to locate needed companion files (e.g. sample input data, or JIT source files) |
| | | // The origin for the relative search may be the .exe file, a .bat file launching an .exe, a browser .exe launching the .exe or .bat, etc |
| | | const char *searchPath[] = |
| | | { |
| | | "./", // same dir |
| | | "./common/", // "/common/" subdir |
| | | "./common/data/", // "/common/data/" subdir |
| | | "./data/", // "/data/" subdir |
| | | "./src/", // "/src/" subdir |
| | | "./src/<executable_name>/data/", // "/src/<executable_name>/data/" subdir |
| | | "./inc/", // "/inc/" subdir |
| | | "./0_Simple/", // "/0_Simple/" subdir |
| | | "./1_Utilities/", // "/1_Utilities/" subdir |
| | | "./2_Graphics/", // "/2_Graphics/" subdir |
| | | "./3_Imaging/", // "/3_Imaging/" subdir |
| | | "./4_Finance/", // "/4_Finance/" subdir |
| | | "./5_Simulations/", // "/5_Simulations/" subdir |
| | | "./6_Advanced/", // "/6_Advanced/" subdir |
| | | "./7_CUDALibraries/", // "/7_CUDALibraries/" subdir |
| | | "./8_Android/", // "/8_Android/" subdir |
| | | "./samples/", // "/samples/" subdir |
| | | |
| | | "./0_Simple/<executable_name>/data/", // "/0_Simple/<executable_name>/data/" subdir |
| | | "./1_Utilities/<executable_name>/data/", // "/1_Utilities/<executable_name>/data/" subdir |
| | | "./2_Graphics/<executable_name>/data/", // "/2_Graphics/<executable_name>/data/" subdir |
| | | "./3_Imaging/<executable_name>/data/", // "/3_Imaging/<executable_name>/data/" subdir |
| | | "./4_Finance/<executable_name>/data/", // "/4_Finance/<executable_name>/data/" subdir |
| | | "./5_Simulations/<executable_name>/data/", // "/5_Simulations/<executable_name>/data/" subdir |
| | | "./6_Advanced/<executable_name>/data/", // "/6_Advanced/<executable_name>/data/" subdir |
| | | "./7_CUDALibraries/<executable_name>/", // "/7_CUDALibraries/<executable_name>/" subdir |
| | | "./7_CUDALibraries/<executable_name>/data/", // "/7_CUDALibraries/<executable_name>/data/" subdir |
| | | |
| | | "../", // up 1 in tree |
| | | "../common/", // up 1 in tree, "/common/" subdir |
| | | "../common/data/", // up 1 in tree, "/common/data/" subdir |
| | | "../data/", // up 1 in tree, "/data/" subdir |
| | | "../src/", // up 1 in tree, "/src/" subdir |
| | | "../inc/", // up 1 in tree, "/inc/" subdir |
| | | |
| | | "../0_Simple/<executable_name>/data/", // up 1 in tree, "/0_Simple/<executable_name>/" subdir |
| | | "../1_Utilities/<executable_name>/data/", // up 1 in tree, "/1_Utilities/<executable_name>/" subdir |
| | | "../2_Graphics/<executable_name>/data/", // up 1 in tree, "/2_Graphics/<executable_name>/" subdir |
| | | "../3_Imaging/<executable_name>/data/", // up 1 in tree, "/3_Imaging/<executable_name>/" subdir |
| | | "../4_Finance/<executable_name>/data/", // up 1 in tree, "/4_Finance/<executable_name>/" subdir |
| | | "../5_Simulations/<executable_name>/data/", // up 1 in tree, "/5_Simulations/<executable_name>/" subdir |
| | | "../6_Advanced/<executable_name>/data/", // up 1 in tree, "/6_Advanced/<executable_name>/" subdir |
| | | "../7_CUDALibraries/<executable_name>/data/",// up 1 in tree, "/7_CUDALibraries/<executable_name>/" subdir |
| | | "../8_Android/<executable_name>/data/", // up 1 in tree, "/8_Android/<executable_name>/" subdir |
| | | "../samples/<executable_name>/data/", // up 1 in tree, "/samples/<executable_name>/" subdir |
| | | "../../", // up 2 in tree |
| | | "../../common/", // up 2 in tree, "/common/" subdir |
| | | "../../common/data/", // up 2 in tree, "/common/data/" subdir |
| | | "../../data/", // up 2 in tree, "/data/" subdir |
| | | "../../src/", // up 2 in tree, "/src/" subdir |
| | | "../../inc/", // up 2 in tree, "/inc/" subdir |
| | | "../../sandbox/<executable_name>/data/", // up 2 in tree, "/sandbox/<executable_name>/" subdir |
| | | "../../0_Simple/<executable_name>/data/", // up 2 in tree, "/0_Simple/<executable_name>/" subdir |
| | | "../../1_Utilities/<executable_name>/data/", // up 2 in tree, "/1_Utilities/<executable_name>/" subdir |
| | | "../../2_Graphics/<executable_name>/data/", // up 2 in tree, "/2_Graphics/<executable_name>/" subdir |
| | | "../../3_Imaging/<executable_name>/data/", // up 2 in tree, "/3_Imaging/<executable_name>/" subdir |
| | | "../../4_Finance/<executable_name>/data/", // up 2 in tree, "/4_Finance/<executable_name>/" subdir |
| | | "../../5_Simulations/<executable_name>/data/", // up 2 in tree, "/5_Simulations/<executable_name>/" subdir |
| | | "../../6_Advanced/<executable_name>/data/", // up 2 in tree, "/6_Advanced/<executable_name>/" subdir |
| | | "../../7_CUDALibraries/<executable_name>/data/", // up 2 in tree, "/7_CUDALibraries/<executable_name>/" subdir |
| | | "../../8_Android/<executable_name>/data/", // up 2 in tree, "/8_Android/<executable_name>/" subdir |
| | | "../../samples/<executable_name>/data/", // up 2 in tree, "/samples/<executable_name>/" subdir |
| | | "../../../", // up 3 in tree |
| | | "../../../src/<executable_name>/", // up 3 in tree, "/src/<executable_name>/" subdir |
| | | "../../../src/<executable_name>/data/", // up 3 in tree, "/src/<executable_name>/data/" subdir |
| | | "../../../src/<executable_name>/src/", // up 3 in tree, "/src/<executable_name>/src/" subdir |
| | | "../../../src/<executable_name>/inc/", // up 3 in tree, "/src/<executable_name>/inc/" subdir |
| | | "../../../sandbox/<executable_name>/", // up 3 in tree, "/sandbox/<executable_name>/" subdir |
| | | "../../../sandbox/<executable_name>/data/", // up 3 in tree, "/sandbox/<executable_name>/data/" subdir |
| | | "../../../sandbox/<executable_name>/src/", // up 3 in tree, "/sandbox/<executable_name>/src/" subdir |
| | | "../../../sandbox/<executable_name>/inc/", // up 3 in tree, "/sandbox/<executable_name>/inc/" subdir |
| | | "../../../0_Simple/<executable_name>/data/", // up 3 in tree, "/0_Simple/<executable_name>/" subdir |
| | | "../../../1_Utilities/<executable_name>/data/", // up 3 in tree, "/1_Utilities/<executable_name>/" subdir |
| | | "../../../2_Graphics/<executable_name>/data/", // up 3 in tree, "/2_Graphics/<executable_name>/" subdir |
| | | "../../../3_Imaging/<executable_name>/data/", // up 3 in tree, "/3_Imaging/<executable_name>/" subdir |
| | | "../../../4_Finance/<executable_name>/data/", // up 3 in tree, "/4_Finance/<executable_name>/" subdir |
| | | "../../../5_Simulations/<executable_name>/data/", // up 3 in tree, "/5_Simulations/<executable_name>/" subdir |
| | | "../../../6_Advanced/<executable_name>/data/", // up 3 in tree, "/6_Advanced/<executable_name>/" subdir |
| | | "../../../7_CUDALibraries/<executable_name>/data/", // up 3 in tree, "/7_CUDALibraries/<executable_name>/" subdir |
| | | "../../../8_Android/<executable_name>/data/", // up 3 in tree, "/8_Android/<executable_name>/" subdir |
| | | "../../../0_Simple/<executable_name>/", // up 3 in tree, "/0_Simple/<executable_name>/" subdir |
| | | "../../../1_Utilities/<executable_name>/", // up 3 in tree, "/1_Utilities/<executable_name>/" subdir |
| | | "../../../2_Graphics/<executable_name>/", // up 3 in tree, "/2_Graphics/<executable_name>/" subdir |
| | | "../../../3_Imaging/<executable_name>/", // up 3 in tree, "/3_Imaging/<executable_name>/" subdir |
| | | "../../../4_Finance/<executable_name>/", // up 3 in tree, "/4_Finance/<executable_name>/" subdir |
| | | "../../../5_Simulations/<executable_name>/", // up 3 in tree, "/5_Simulations/<executable_name>/" subdir |
| | | "../../../6_Advanced/<executable_name>/", // up 3 in tree, "/6_Advanced/<executable_name>/" subdir |
| | | "../../../7_CUDALibraries/<executable_name>/", // up 3 in tree, "/7_CUDALibraries/<executable_name>/" subdir |
| | | "../../../8_Android/<executable_name>/", // up 3 in tree, "/8_Android/<executable_name>/" subdir |
| | | "../../../samples/<executable_name>/data/", // up 3 in tree, "/samples/<executable_name>/" subdir |
| | | "../../../common/", // up 3 in tree, "../../../common/" subdir |
| | | "../../../common/data/", // up 3 in tree, "../../../common/data/" subdir |
| | | "../../../data/", // up 3 in tree, "../../../data/" subdir |
| | | "../../../../", // up 4 in tree |
| | | "../../../../src/<executable_name>/", // up 4 in tree, "/src/<executable_name>/" subdir |
| | | "../../../../src/<executable_name>/data/", // up 4 in tree, "/src/<executable_name>/data/" subdir |
| | | "../../../../src/<executable_name>/src/", // up 4 in tree, "/src/<executable_name>/src/" subdir |
| | | "../../../../src/<executable_name>/inc/", // up 4 in tree, "/src/<executable_name>/inc/" subdir |
| | | "../../../../sandbox/<executable_name>/", // up 4 in tree, "/sandbox/<executable_name>/" subdir |
| | | "../../../../sandbox/<executable_name>/data/", // up 4 in tree, "/sandbox/<executable_name>/data/" subdir |
| | | "../../../../sandbox/<executable_name>/src/", // up 4 in tree, "/sandbox/<executable_name>/src/" subdir |
| | | "../../../../sandbox/<executable_name>/inc/", // up 4 in tree, "/sandbox/<executable_name>/inc/" subdir |
| | | "../../../../0_Simple/<executable_name>/data/", // up 4 in tree, "/0_Simple/<executable_name>/" subdir |
| | | "../../../../1_Utilities/<executable_name>/data/", // up 4 in tree, "/1_Utilities/<executable_name>/" subdir |
| | | "../../../../2_Graphics/<executable_name>/data/", // up 4 in tree, "/2_Graphics/<executable_name>/" subdir |
| | | "../../../../3_Imaging/<executable_name>/data/", // up 4 in tree, "/3_Imaging/<executable_name>/" subdir |
| | | "../../../../4_Finance/<executable_name>/data/", // up 4 in tree, "/4_Finance/<executable_name>/" subdir |
| | | "../../../../5_Simulations/<executable_name>/data/",// up 4 in tree, "/5_Simulations/<executable_name>/" subdir |
| | | "../../../../6_Advanced/<executable_name>/data/", // up 4 in tree, "/6_Advanced/<executable_name>/" subdir |
| | | "../../../../7_CUDALibraries/<executable_name>/data/", // up 4 in tree, "/7_CUDALibraries/<executable_name>/" subdir |
| | | "../../../../8_Android/<executable_name>/data/", // up 4 in tree, "/8_Android/<executable_name>/" subdir |
| | | "../../../../0_Simple/<executable_name>/", // up 4 in tree, "/0_Simple/<executable_name>/" subdir |
| | | "../../../../1_Utilities/<executable_name>/", // up 4 in tree, "/1_Utilities/<executable_name>/" subdir |
| | | "../../../../2_Graphics/<executable_name>/", // up 4 in tree, "/2_Graphics/<executable_name>/" subdir |
| | | "../../../../3_Imaging/<executable_name>/", // up 4 in tree, "/3_Imaging/<executable_name>/" subdir |
| | | "../../../../4_Finance/<executable_name>/", // up 4 in tree, "/4_Finance/<executable_name>/" subdir |
| | | "../../../../5_Simulations/<executable_name>/",// up 4 in tree, "/5_Simulations/<executable_name>/" subdir |
| | | "../../../../6_Advanced/<executable_name>/", // up 4 in tree, "/6_Advanced/<executable_name>/" subdir |
| | | "../../../../7_CUDALibraries/<executable_name>/", // up 4 in tree, "/7_CUDALibraries/<executable_name>/" subdir |
| | | "../../../../8_Android/<executable_name>/", // up 4 in tree, "/8_Android/<executable_name>/" subdir |
| | | "../../../../samples/<executable_name>/data/", // up 4 in tree, "/samples/<executable_name>/" subdir |
| | | "../../../../common/", // up 4 in tree, "../../../common/" subdir |
| | | "../../../../common/data/", // up 4 in tree, "../../../common/data/" subdir |
| | | "../../../../data/", // up 4 in tree, "../../../data/" subdir |
| | | "../../../../../", // up 5 in tree |
| | | "../../../../../src/<executable_name>/", // up 5 in tree, "/src/<executable_name>/" subdir |
| | | "../../../../../src/<executable_name>/data/", // up 5 in tree, "/src/<executable_name>/data/" subdir |
| | | "../../../../../src/<executable_name>/src/", // up 5 in tree, "/src/<executable_name>/src/" subdir |
| | | "../../../../../src/<executable_name>/inc/", // up 5 in tree, "/src/<executable_name>/inc/" subdir |
| | | "../../../../../sandbox/<executable_name>/", // up 5 in tree, "/sandbox/<executable_name>/" subdir |
| | | "../../../../../sandbox/<executable_name>/data/", // up 5 in tree, "/sandbox/<executable_name>/data/" subdir |
| | | "../../../../../sandbox/<executable_name>/src/", // up 5 in tree, "/sandbox/<executable_name>/src/" subdir |
| | | "../../../../../sandbox/<executable_name>/inc/", // up 5 in tree, "/sandbox/<executable_name>/inc/" subdir |
| | | "../../../../../0_Simple/<executable_name>/data/", // up 5 in tree, "/0_Simple/<executable_name>/" subdir |
| | | "../../../../../1_Utilities/<executable_name>/data/", // up 5 in tree, "/1_Utilities/<executable_name>/" subdir |
| | | "../../../../../2_Graphics/<executable_name>/data/", // up 5 in tree, "/2_Graphics/<executable_name>/" subdir |
| | | "../../../../../3_Imaging/<executable_name>/data/", // up 5 in tree, "/3_Imaging/<executable_name>/" subdir |
| | | "../../../../../4_Finance/<executable_name>/data/", // up 5 in tree, "/4_Finance/<executable_name>/" subdir |
| | | "../../../../../5_Simulations/<executable_name>/data/",// up 5 in tree, "/5_Simulations/<executable_name>/" subdir |
| | | "../../../../../6_Advanced/<executable_name>/data/", // up 5 in tree, "/6_Advanced/<executable_name>/" subdir |
| | | "../../../../../7_CUDALibraries/<executable_name>/data/", // up 5 in tree, "/7_CUDALibraries/<executable_name>/" subdir |
| | | "../../../../../8_Android/<executable_name>/data/", // up 5 in tree, "/8_Android/<executable_name>/" subdir |
| | | "../../../../../samples/<executable_name>/data/", // up 5 in tree, "/samples/<executable_name>/" subdir |
| | | "../../../../../common/", // up 5 in tree, "../../../common/" subdir |
| | | "../../../../../common/data/", // up 5 in tree, "../../../common/data/" subdir |
| | | }; |
| | | |
| | | // Extract the executable name |
| | | std::string executable_name; |
| | | |
| | | if (executable_path != 0) |
| | | { |
| | | executable_name = std::string(executable_path); |
| | | |
| | | #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) |
| | | // Windows path delimiter |
| | | size_t delimiter_pos = executable_name.find_last_of('\\'); |
| | | executable_name.erase(0, delimiter_pos + 1); |
| | | |
| | | if (executable_name.rfind(".exe") != std::string::npos) |
| | | { |
| | | // we strip .exe, only if the .exe is found |
| | | executable_name.resize(executable_name.size() - 4); |
| | | } |
| | | |
| | | #else |
| | | // Linux & OSX path delimiter |
| | | size_t delimiter_pos = executable_name.find_last_of('/'); |
| | | executable_name.erase(0,delimiter_pos+1); |
| | | #endif |
| | | } |
| | | |
| | | // Loop over all search paths and return the first hit |
| | | for (unsigned int i = 0; i < sizeof(searchPath)/sizeof(char *); ++i) |
| | | { |
| | | std::string path(searchPath[i]); |
| | | size_t executable_name_pos = path.find("<executable_name>"); |
| | | |
| | | // If there is executable_name variable in the searchPath |
| | | // replace it with the value |
| | | if (executable_name_pos != std::string::npos) |
| | | { |
| | | if (executable_path != 0) |
| | | { |
| | | path.replace(executable_name_pos, strlen("<executable_name>"), executable_name); |
| | | } |
| | | else |
| | | { |
| | | // Skip this path entry if no executable argument is given |
| | | continue; |
| | | } |
| | | } |
| | | |
| | | #ifdef _DEBUG |
| | | printf("sdkFindFilePath <%s> in %s\n", filename, path.c_str()); |
| | | #endif |
| | | |
| | | // Test if the file exists |
| | | path.append(filename); |
| | | FILE *fp; |
| | | FOPEN(fp, path.c_str(), "rb"); |
| | | |
| | | if (fp != NULL) |
| | | { |
| | | fclose(fp); |
| | | // File found |
| | | // returning an allocated array here for backwards compatibility reasons |
| | | char *file_path = (char *) malloc(path.length() + 1); |
| | | STRCPY(file_path, path.length() + 1, path.c_str()); |
| | | return file_path; |
| | | } |
| | | |
| | | if (fp) |
| | | { |
| | | fclose(fp); |
| | | } |
| | | } |
| | | |
| | | // File not found |
| | | return 0; |
| | | } |
| | | |
| | | #endif |
| | |
| | | |
| | | void *create_batch_image(const int size){ |
| | | c_img *imgs = (c_img*)malloc(size * sizeof(c_img)); |
| | | for(int i = 0; i < size; i++){ |
| | | imgs[i].data_ = NULL; |
| | | } |
| | | return imgs; |
| | | } |
| | | int fill_images(void *imgs, const int size, const int index, void *data, const int w, const int h, const int c){ |
| | | if(!imgs || !data || size <= index) return -1; |
| | | c_img *images = (c_img*)imgs; |
| | | images[index].data_ = data; |
| | | images[index].data_ = (unsigned char*)data; |
| | | images[index].w_ = w; |
| | | images[index].h_ = h; |
| | | images[index].c_ = c; |
| | |
| | | } |
| | | return ret; |
| | | } |
| | | void *process(void *handle, void *imgs, const int size){ |
| | | void *process(void *handle, void *imgs, const int size, void *result){ |
| | | c_img *images = (c_img*)imgs; |
| | | c_fgRet *res = init_fgres(size); |
| | | c_fgRet *res = (c_fgRet *)result; |
| | | int ret = c_human_tracker_process(handle, images, size, res); |
| | | if (ret != 0) |
| | | return NULL; |
| | |
| | | // HumanTracker struct |
| | | type HumanTracker struct { |
| | | handle unsafe.Pointer |
| | | result unsafe.Pointer |
| | | batchSize int |
| | | } |
| | | |
| | |
| | | } |
| | | p := C.c_human_tracker_create(C.int(gpu), C.int(batchSize), C.int(flag)) |
| | | if p != nil { |
| | | return &HumanTracker{p, batchSize} |
| | | res := C.init_fgres(C.int(batchSize)) |
| | | return &HumanTracker{p, res, batchSize} |
| | | } |
| | | return nil |
| | | } |
| | |
| | | func (h *HumanTracker) Free() { |
| | | if h.handle != nil { |
| | | C.c_human_tracker_release(&h.handle) |
| | | } |
| | | if h.result != nil { |
| | | C.free(h.result) |
| | | } |
| | | } |
| | | |
| | |
| | | } |
| | | } |
| | | |
| | | cRet := C.process(h.handle, cImgs, C.int(h.batchSize)) |
| | | cRet := C.process(h.handle, cImgs, C.int(h.batchSize), h.result) |
| | | if cRet == nil { |
| | | return nil, errors.New("create C results error") |
| | | } |
| | | defer C.free(unsafe.Pointer(cRet)) |
| | | |
| | | var result []FgResult |
| | | p := uintptr(cRet) |
| | |
| | | "time" |
| | | |
| | | "track/gohumantrack" |
| | | |
| | | "track/goconv" |
| | | "basic.com/valib/goffmpeg.git" |
| | | ) |
| | | |
| | |
| | | flag.StringVar(&url1, "cam1", "", "url") |
| | | flag.StringVar(&url2, "cam2", "", "url") |
| | | } |
| | | func main() { |
| | | flag.Parse() |
| | | fmt.Println("url1: ", url1, " url2: ", url2) |
| | | if len(url1) == 0 || len(url2) == 0 { |
| | | fmt.Println("url null") |
| | | } |
| | | |
| | | tracker := gohumantrack.NewHumanTracker(0, 2, 0) |
| | | fmt.Println("version: ", tracker.GetVersion()) |
| | | |
| | | goffmpeg.InitFFmpeg("./runtime/libcffmpeg.so") |
| | | |
| | | cam1 := goffmpeg.New(false, false) |
| | | cam1.Run(url1) |
| | | cam1.BuildDecoder() |
| | | cam1.CloseStream() |
| | | |
| | | cam2 := goffmpeg.New(false, false) |
| | | cam2.Run(url2) |
| | | cam2.BuildDecoder() |
| | | cam2.CloseStream() |
| | | |
| | | for { |
| | | func run(cam1, cam2 *goffmpeg.GoFFMPEG, tracker *gohumantrack.HumanTracker) bool { |
| | | data1, ow1, oh1, _ := cam1.GetYUV() |
| | | data2, ow2, oh2, _ := cam2.GetYUV() |
| | | if ow1 > 0 && oh1 > 0 && ow2 > 0 && oh2 > 0 { |
| | | |
| | | bgr1 := goconv.YUV2BGR(data1, ow1, oh1) |
| | | bgr2 := goconv.YUV2BGR(data2, ow2, oh2) |
| | | if bgr1 == nil || bgr2 == nil{ |
| | | return false |
| | | } |
| | | var images []gohumantrack.ImageHumanTracker |
| | | img := gohumantrack.ImageHumanTracker{ |
| | | Data: data1, |
| | | Data: bgr1, |
| | | Width: ow1, |
| | | Height: oh1, |
| | | Channel: 3, |
| | | } |
| | | images = append(images, img) |
| | | img = gohumantrack.ImageHumanTracker{ |
| | | Data: data2, |
| | | Data: bgr2, |
| | | Width: ow2, |
| | | Height: oh2, |
| | | Channel: 3, |
| | |
| | | } else { |
| | | fmt.Println("process error: ", err) |
| | | } |
| | | } else { |
| | | // fmt.Println("cam1 size: ", ow1, "x", oh1, " cam2 size: ", ow2, "x", oh2) |
| | | return true |
| | | } |
| | | return false |
| | | } |
| | | |
| | | func main() { |
| | | flag.Parse() |
| | | fmt.Println("url1: ", url1, " url2: ", url2) |
| | | if len(url1) == 0 || len(url2) == 0 { |
| | | fmt.Println("url null") |
| | | } |
| | | |
| | | tracker := gohumantrack.NewHumanTracker(0, 2, 0) |
| | | fmt.Println("version: ", tracker.GetVersion()) |
| | | |
| | | goffmpeg.InitFFmpeg("./runtime/libcffmpeg.so") |
| | | |
| | | cam1 := goffmpeg.New(false, false) |
| | | cam1.Run(url1) |
| | | cam1.BuildDecoder() |
| | | cam1.CloseStream() |
| | | |
| | | cam2 := goffmpeg.New(false, false) |
| | | cam2.Run(url2) |
| | | cam2.BuildDecoder() |
| | | cam2.CloseStream() |
| | | |
| | | for { |
| | | if !run(cam1, cam2, tracker){ |
| | | time.Sleep(38 * time.Millisecond) |
| | | } |
| | | |