stydb
2019-12-03 6d6e6d425c4fe63a487ff27be9341671b2c1dd93
update
6个文件已添加
6个文件已修改
2957 ■■■■■ 已修改文件
.gitignore 1 ●●●● 补丁 | 查看 | 原始文档 | blame | 历史
go.mod 8 ●●●● 补丁 | 查看 | 原始文档 | blame | 历史
go.sum 22 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
goconv/conv.cpp 592 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
goconv/conv.h 23 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
goconv/goconv.go 258 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
goconv/inc/Exceptions.h 181 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
goconv/inc/helper_cuda.h 1261 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
goconv/inc/helper_string.h 526 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
gohumantrack/gohumantrack.go 19 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
main.go 66 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
runtime/libcffmpeg.so 补丁 | 查看 | 原始文档 | blame | 历史
.gitignore
@@ -1,2 +1,3 @@
*.avi
track
FPN_REID*
go.mod
@@ -2,4 +2,10 @@
go 1.12
require basic.com/valib/goffmpeg.git v0.0.0-20191129092141-7363d175bd62
require (
    basic.com/valib/godraw.git v0.0.0-20191122082247-26e9987cd183
    basic.com/valib/goffmpeg.git v0.0.0-20191203025021-783b80757681
    basic.com/valib/gogpu.git v0.0.0-20190711044327-62043b070865
    github.com/disintegration/imaging v1.6.2
    github.com/llgcode/draw2d v0.0.0-20190810100245-79e59b6b8fbc // indirect
)
go.sum
@@ -1,2 +1,20 @@
basic.com/valib/goffmpeg.git v0.0.0-20191129092141-7363d175bd62 h1:KzkPzJE76RkHeYBgAMfSiz1vzJaQRKkRDCXnw2XmxqA=
basic.com/valib/goffmpeg.git v0.0.0-20191129092141-7363d175bd62/go.mod h1:1x75Hawh/BjgPsQtuJ24px89gzk3uAslD8e0Xs6Z7GQ=
basic.com/valib/godraw.git v0.0.0-20191122082247-26e9987cd183 h1:QQ1L0Ev4vcSD23d99+rW5S/mnmdTAPAI2GZ7tkMgCE4=
basic.com/valib/godraw.git v0.0.0-20191122082247-26e9987cd183/go.mod h1:LntbWczdG87utrKx7rWYmIh1VZ+X2oPN7Q2IXb6oRE0=
basic.com/valib/goffmpeg.git v0.0.0-20191203025021-783b80757681 h1:n5zinCkvVghdKw0ZenxMo+lFjaXhHSr9ecICuQZLjNw=
basic.com/valib/goffmpeg.git v0.0.0-20191203025021-783b80757681/go.mod h1:1x75Hawh/BjgPsQtuJ24px89gzk3uAslD8e0Xs6Z7GQ=
basic.com/valib/gogpu.git v0.0.0-20190711044327-62043b070865 h1:3XvkNdRlJDXV45ie8U0uGA9ImJZtyTT0C/h+4Rizv0Y=
basic.com/valib/gogpu.git v0.0.0-20190711044327-62043b070865/go.mod h1:yxux5RP4A6a591vWljXxGlHdERVVyWDD3TwwQjuyogw=
github.com/disintegration/imaging v1.6.2 h1:w1LecBlG2Lnp8B3jk5zSuNqd7b4DXhcjwek1ei82L+c=
github.com/disintegration/imaging v1.6.2/go.mod h1:44/5580QXChDfwIclfc/PCwrr44amcmDAg8hxG0Ewe4=
github.com/go-gl/gl v0.0.0-20180407155706-68e253793080/go.mod h1:482civXOzJJCPzJ4ZOX/pwvXBWSnzD4OKMdH4ClKGbk=
github.com/go-gl/glfw v0.0.0-20180426074136-46a8d530c326/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU=
github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0 h1:DACJavvAHhabrF08vX0COfcOBJRhZ8lUbR+ZWIs0Y5g=
github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k=
github.com/jung-kurt/gofpdf v1.0.0/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes=
github.com/llgcode/draw2d v0.0.0-20190810100245-79e59b6b8fbc h1:ZvNhCJfRtl03A0VOIfvO9W22/0b6dmn1APa4Q6j9oHM=
github.com/llgcode/draw2d v0.0.0-20190810100245-79e59b6b8fbc/go.mod h1:mVa0dA29Db2S4LVqDYLlsePDzRJLDfdhVZiI15uY0FA=
github.com/llgcode/ps v0.0.0-20150911083025-f1443b32eedb/go.mod h1:1l8ky+Ew27CMX29uG+a2hNOKpeNYEQjjtiALiBlFQbY=
golang.org/x/image v0.0.0-20180708004352-c73c2afc3b81/go.mod h1:ux5Hcp/YLpHSI86hEcLt0YII63i6oz57MZXIpbrjZUs=
golang.org/x/image v0.0.0-20191009234506-e7c1f5e7dbb8 h1:hVwzHzIUGRjiF7EcUjqNxk3NCfkPxbDKRdnNE1Rpg0U=
golang.org/x/image v0.0.0-20191009234506-e7c1f5e7dbb8/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
goconv/conv.cpp
New file
@@ -0,0 +1,592 @@
#include "conv.h"
#include <cmath>
#include <sys/time.h>
#include <npp.h>
#include <helper_cuda.h>
#include <helper_string.h>
#include "Exceptions.h"
static const int MEMORY_ALGN_DEVICE = 511;
static const int HD_MEMORY_ALGN_DEVICE = 511;
static inline int DivUp(int x, int d)
{
    return (x + d - 1) / d;
}
static int set_data(uint8_t *data, const int width, const int height, unsigned char *mY, unsigned char *mU, unsigned char *mV)
{
    uint8_t* yuv_data = (uint8_t*)data;
    if (!yuv_data)
    {
        return -1;
    }
    uint32_t    i, j;
    uint32_t    off;
    uint32_t    off_yuv;
    uint32_t    half_h;
    uint32_t    half_w;
    uint32_t    u_size;
    uint8_t*    yuv_ptr;
    uint8_t*    u_ptr;
    uint8_t*    v_ptr;
    int w = width;
    int h = height;
    //从这一句来看,即使是同一种格式,进来也要处理一下。
    size_t nPitch  = (w + HD_MEMORY_ALGN_DEVICE) & ~HD_MEMORY_ALGN_DEVICE;
    off     = 0;
    off_yuv = 0;
    for (i = 0; i < (uint32_t)h; i++)
    {
        memcpy(mY + off, yuv_data + off_yuv, w);
        off     += nPitch;
        off_yuv += w;
    }
    half_w = w >> 1;
    half_h = h >> 1;
    u_size = half_w * half_h;
    nPitch = (half_w + HD_MEMORY_ALGN_DEVICE) & ~HD_MEMORY_ALGN_DEVICE;
    off_yuv = w * h;
    off = 0;
    for (i = 0; i < half_h; i++)
    {
        yuv_ptr = yuv_data + off_yuv;
        u_ptr = mU + off;
        v_ptr = mV + off;
        for (j = 0; j < (uint32_t)w; j += 2)
        {
            *u_ptr++ = *yuv_ptr++;
            *v_ptr++ = *yuv_ptr++;
        }
        off_yuv += w;
        off += nPitch;
    }
    return 0;
}
/////////////handle
class convertor{
public:
    convertor(const int srcW, const int srcH, const int dstW, const int dstH, const int gpu);
    ~convertor();
    int yuv2bgr(unsigned char **bgr, int *bgrLen);
    int resize2bgr(unsigned char *in, unsigned char **data, int *data_len);
    int resizeyuv(unsigned char *in, unsigned char **data, int *data_len);
    int fill_yuv(const unsigned char *yuv);
private:
    void init_yuv();
    void init_resize();
    void init_resize_bgr();
    void init_resize_yuv();
private:
    int width;
    int height;
    unsigned char aSamplingFactors[3];
    int nMCUBlocksH;
    int nMCUBlocksV;
    Npp8u   *apSrcImage[3];
    NppiSize aSrcSize[3];
    Npp32s   aSrcImageStep[3];
    size_t   aSrcPitch[3];
    uint8_t *mY;
    uint8_t *mU;
    uint8_t *mV;
///////////////////////////
    int rWidth;
    int rHeight;
    float fx;
    float fy;
    Npp8u *apDstImage[3] = {0,0,0};
    Npp32s aDstImageStep[3];
    NppiSize aDstSize[3];
/////////////////////////////
    Npp8u *imgOrigin;
    size_t pitchOrigin;
    NppiSize sizeOrigin;
    unsigned char *bgrOrigin;
    int bgrOriginLen;
    size_t bgrOriginPitch;
////////////////////////////
    Npp8u *imgResize;
    size_t pitchResize;
    NppiSize sizeResize;
    unsigned char *bgrScale;
    int bgrScaleLen;
    size_t bgrScalePitch;
// resize only
////////////////////////////
    Npp8u *originBGR;
    int pitchOriginBGR;
    Npp8u *resizedBGR;
    int pitchResizedBGR;
    unsigned char *hostResizedBGR;
///////////////////////////
    unsigned char *nv12;
    bool initialized_yuv, initialized_resize, initialized_resize_bgr, initialized_resize_yuv;
    int gpu_index;
};
convertor::convertor(const int srcW, const int srcH, const int dstW, const int dstH, const int gpu)
:width(srcW)
,height(srcH)
,rWidth(dstW)
,rHeight(dstH)
,fx(-1)
,fy(-1)
,mY(NULL)
,mU(NULL)
,mV(NULL)
,imgOrigin(0)
,imgResize(0)
,bgrOrigin(NULL)
,bgrOriginLen(0)
,bgrScale(NULL)
,bgrScaleLen(0)
,originBGR(0)
,pitchOriginBGR(0)
,resizedBGR(0)
,pitchResizedBGR(0)
,hostResizedBGR(NULL)
,nv12(NULL)
,initialized_yuv(false)
,initialized_resize(false)
,initialized_resize_bgr(false)
,initialized_resize_yuv(false)
,gpu_index(gpu)
{}
static void setGPUDevice(const int gpu){
    if (gpu >= 0){
        cudaSetDevice(gpu);
    }
}
void convertor::init_yuv(){
    if (initialized_yuv) return;
    initialized_yuv = true;
    setGPUDevice(gpu_index);
    for(int i = 0; i < 3; i++){
        apSrcImage[i] = 0;
        apDstImage[i] = 0;
    }
    aSamplingFactors[0] = 34;
    aSamplingFactors[1] = 17;
    aSamplingFactors[2] = 17;
    nMCUBlocksH = 0;
    nMCUBlocksV = 0;
    for (int i = 0; i < 3; ++i)
    {
        nMCUBlocksV = std::max(nMCUBlocksV, aSamplingFactors[i] & 0x0f);
        nMCUBlocksH = std::max(nMCUBlocksH, aSamplingFactors[i] >> 4);
    }
    for (int i = 0; i < 3; ++i)
    {
        NppiSize oBlocks;
        NppiSize oBlocksPerMCU = { aSamplingFactors[i] >> 4, aSamplingFactors[i] & 0x0f };
        oBlocks.width = (int)ceil((width   + 7) / 8 *
            static_cast<float>(oBlocksPerMCU.width) / nMCUBlocksH);
        oBlocks.width = DivUp(oBlocks.width, oBlocksPerMCU.width) * oBlocksPerMCU.width;
        oBlocks.height = (int)ceil((height + 7) / 8 *
            static_cast<float>(oBlocksPerMCU.height) / nMCUBlocksV);
        oBlocks.height = DivUp(oBlocks.height, oBlocksPerMCU.height) * oBlocksPerMCU.height;
        aSrcSize[i].width  = oBlocks.width  * 8;
        aSrcSize[i].height = oBlocks.height * 8;
        // Allocate Memory
        size_t nPitch;
        NPP_CHECK_CUDA(cudaMallocPitch((void**)&(apSrcImage[i]), &nPitch, aSrcSize[i].width, aSrcSize[i].height));
        aSrcPitch[i] = nPitch;
        aSrcImageStep[i] = static_cast<Npp32s>(nPitch);
    }
    NPP_CHECK_CUDA(cudaMallocPitch((void**)&imgOrigin, &pitchOrigin, width * 3, height));
    bgrOriginPitch = width * 3;
    bgrOriginLen = bgrOriginPitch * height;
    NPP_CHECK_CUDA(cudaHostAlloc((void**)&bgrOrigin, bgrOriginLen, cudaHostAllocDefault));
    sizeOrigin.width = width;
    sizeOrigin.height = height;
    uint32_t nPitch = (width + MEMORY_ALGN_DEVICE) & ~MEMORY_ALGN_DEVICE;
    NPP_CHECK_CUDA(cudaHostAlloc((void**)&mY, nPitch * height, cudaHostAllocDefault));
    nPitch = (width/2 + MEMORY_ALGN_DEVICE) & ~MEMORY_ALGN_DEVICE;
    NPP_CHECK_CUDA(cudaHostAlloc((void**)&mU, nPitch * height / 2, cudaHostAllocDefault));
    NPP_CHECK_CUDA(cudaHostAlloc((void**)&mV, nPitch * height / 2, cudaHostAllocDefault));
}
void convertor::init_resize(){
    if (initialized_resize) return;
    initialized_resize = true;
    setGPUDevice(gpu_index);
    NppiSize oDstImageSize;
    oDstImageSize.width  = std::max(1, rWidth);
    oDstImageSize.height = std::max(1, rHeight);
    sizeResize.width = oDstImageSize.width;
    sizeResize.height = oDstImageSize.height;
    for (int i=0; i < 3; ++i)
    {
        NppiSize oBlocks;
        NppiSize oBlocksPerMCU = { aSamplingFactors[i] & 0x0f, aSamplingFactors[i] >> 4};
        oBlocks.width = (int)ceil((oDstImageSize.width + 7)/8  *
                                  static_cast<float>(oBlocksPerMCU.width)/nMCUBlocksH);
        oBlocks.width = DivUp(oBlocks.width, oBlocksPerMCU.width) * oBlocksPerMCU.width;
        oBlocks.height = (int)ceil((oDstImageSize.height+7)/8 *
                                   static_cast<float>(oBlocksPerMCU.height)/nMCUBlocksV);
        oBlocks.height = DivUp(oBlocks.height, oBlocksPerMCU.height) * oBlocksPerMCU.height;
        aDstSize[i].width = oBlocks.width * 8;
        aDstSize[i].height = oBlocks.height * 8;
        // Allocate Memory
        size_t nPitch;
        NPP_CHECK_CUDA(cudaMallocPitch((void**)&apDstImage[i], &nPitch, aDstSize[i].width, aDstSize[i].height));
        aDstImageStep[i] = static_cast<Npp32s>(nPitch);
    }
    if (rWidth > 0 && rHeight > 0 && rWidth < width && rHeight < height){
        fx = (float)(rWidth) / (float)(width);
        fy = (float)(rHeight) / (float)(height);
    }
    if (imgResize == 0){
        if (rWidth > 0 && rHeight > 0 && rWidth < width && rHeight < height){
            NPP_CHECK_CUDA(cudaMallocPitch((void**)&imgResize, &pitchResize, rWidth * 3, rHeight));
        }
    }
    if (!bgrScale){
        if (rWidth > 0 && rHeight > 0 && rWidth < width && rHeight < height){
            bgrScalePitch = rWidth * 3;
            bgrScaleLen = bgrScalePitch * rHeight;
            NPP_CHECK_CUDA(cudaHostAlloc((void**)&bgrScale, bgrScaleLen, cudaHostAllocDefault));
        }
    }
}
void convertor::init_resize_bgr(){
    if (initialized_resize_bgr) return;
    initialized_resize_bgr = true;
    setGPUDevice(gpu_index);
    if (originBGR == 0){
        originBGR = nppiMalloc_8u_C3(width, height, &pitchOriginBGR);
    }
    if (resizedBGR == 0){
        resizedBGR = nppiMalloc_8u_C3(rWidth, rHeight, &pitchResizedBGR);
    }
    if (hostResizedBGR == NULL){
        NPP_CHECK_CUDA(cudaHostAlloc((void**)&hostResizedBGR, rWidth * 3 * rHeight, cudaHostAllocDefault));
    }
}
void convertor::init_resize_yuv(){
    if (initialized_resize_yuv) return;
    initialized_resize_yuv = true;
    if (rWidth > 0 && rHeight > 0){
        fx = (float)(width) / (float)(rWidth);
        fy = (float)(height) / (float)(rHeight);
    }
    nv12 = (unsigned char*)malloc(rWidth*rHeight*3/2);
}
convertor::~convertor(){
    setGPUDevice(gpu_index);
    if(mY) cudaFreeHost(mY);
    if(mU) cudaFreeHost(mU);
    if(mV) cudaFreeHost(mV);
    for (int i = 0; i < 3; ++i)//内存释放
    {
        cudaFree(apSrcImage[i]);
        cudaFree(apDstImage[i]);
    }
    if (imgOrigin) cudaFree(imgOrigin);
    if (imgResize) cudaFree(imgResize);
    if (bgrOrigin) cudaFreeHost(bgrOrigin);
    if (bgrScale) cudaFreeHost(bgrScale);
    if (originBGR) nppiFree(originBGR);
    if (resizedBGR) nppiFree(resizedBGR);
    if (hostResizedBGR) cudaFreeHost(hostResizedBGR);
    if (nv12) free(nv12);
}
int convertor::fill_yuv(const unsigned char *yuv){
    init_yuv();
    int ret = set_data((uint8_t*)yuv, width, height, mY, mU, mV);
    if (ret < 0) return ret;
    setGPUDevice(gpu_index);
    NPP_CHECK_CUDA(cudaMemcpy(apSrcImage[0], mY, aSrcPitch[0] * height,     cudaMemcpyHostToDevice));
    NPP_CHECK_CUDA(cudaMemcpy(apSrcImage[1], mU, aSrcPitch[1] * height / 2, cudaMemcpyHostToDevice));
    NPP_CHECK_CUDA(cudaMemcpy(apSrcImage[2], mV, aSrcPitch[2] * height / 2, cudaMemcpyHostToDevice));
    return 0;
}
int convertor::yuv2bgr(unsigned char **bgr, int *bgrLen){
    *bgr = NULL;
    *bgrLen = 0;
    setGPUDevice(gpu_index);
    NPP_CHECK_NPP(nppiYUV420ToBGR_8u_P3C3R(apSrcImage, aSrcImageStep, imgOrigin, pitchOrigin, sizeOrigin));
    NPP_CHECK_CUDA(cudaMemcpy2D(bgrOrigin, bgrOriginPitch, imgOrigin, pitchOrigin, bgrOriginPitch, height, cudaMemcpyDeviceToHost));
    *bgr = bgrOrigin;
    *bgrLen = bgrOriginLen;
    return 0;
}
int convertor::resize2bgr(unsigned char *in, unsigned char **data, int *data_len){
    *data = NULL;
    *data_len = 0;
    if ((rWidth < 0 && rHeight < 0) || (rWidth > width && rHeight > height)){
        return -1;
    }
    setGPUDevice(gpu_index);
    if (!in){
        init_resize();
        NppiSize oDstImageSize;
        oDstImageSize.width  = std::max(1, rWidth);
        oDstImageSize.height = std::max(1, rHeight);
        for (int i = 0; i < 3; ++i)
        {
            NppiSize oBlocksPerMCU = { aSamplingFactors[i] & 0x0f, aSamplingFactors[i] >> 4};
            NppiSize oSrcImageSize = {(width * oBlocksPerMCU.width) / nMCUBlocksH, (height * oBlocksPerMCU.height)/nMCUBlocksV};
            NppiRect oSrcImageROI = {0,0,oSrcImageSize.width, oSrcImageSize.height};
            NppiRect oDstImageROI;
            NppiInterpolationMode eInterploationMode = NPPI_INTER_SUPER;
            NPP_CHECK_NPP(nppiGetResizeRect(oSrcImageROI, &oDstImageROI,
                                            fx,
                                            fy,
                                            0.0, 0.0, eInterploationMode));
            NPP_CHECK_NPP(nppiResizeSqrPixel_8u_C1R(apSrcImage[i], oSrcImageSize, aSrcImageStep[i], oSrcImageROI,
                                                    apDstImage[i], aDstImageStep[i], oDstImageROI ,
                                                    fx,
                                                    fy,
                                                    0.0, 0.0, eInterploationMode));
        }
        NPP_CHECK_NPP(nppiYUV420ToBGR_8u_P3C3R(apDstImage, aDstImageStep, imgResize, pitchResize, sizeResize));
        NPP_CHECK_CUDA(cudaMemcpy2D(bgrScale, bgrScalePitch, imgResize, pitchResize, bgrScalePitch, rHeight, cudaMemcpyDeviceToHost));
        *data = bgrScale;
        *data_len = bgrScaleLen;
    }else{
        init_resize_bgr();
        NppiSize oSrcSize;
        oSrcSize.width = width;
        oSrcSize.height = height;
        NPP_CHECK_CUDA(cudaMemcpy2D(originBGR, pitchOriginBGR, in, width*3, width*3, height, cudaMemcpyHostToDevice));
        NppiRect oSrcROI;
        oSrcROI.x = 0;
        oSrcROI.y = 0;
        oSrcROI.width = width;
        oSrcROI.height = height;
        NppiRect oDstROI;
        oDstROI.x = 0;
        oDstROI.y = 0;
        oDstROI.width = rWidth;
        oDstROI.height = rHeight;
        // Scale Factor
        double nXFactor = double(oDstROI.width) / double(oSrcROI.width);
        double nYFactor = double(oDstROI.height) / double(oSrcROI.height);
        // Scaled X/Y  Shift
        double nXShift = - oSrcROI.x * nXFactor ;
        double nYShift = - oSrcROI.y * nYFactor;
        int eInterpolation = NPPI_INTER_SUPER;
        if (nXFactor >= 1.f || nYFactor >= 1.f)
            eInterpolation = NPPI_INTER_LANCZOS;
        NppStatus ret = nppiResizeSqrPixel_8u_C3R(originBGR, oSrcSize, pitchOriginBGR, oSrcROI,
            resizedBGR, pitchResizedBGR, oDstROI, nXFactor, nYFactor, nXShift, nYShift, eInterpolation );
        if(ret != NPP_SUCCESS) {
            printf("imageResize_8u_C3R failed %d.\n", ret);
            return -2;
        }
        size_t pitch = rWidth * 3;
        *data_len = pitch * rHeight;
        NPP_CHECK_CUDA(cudaMemcpy2D(hostResizedBGR, pitch, resizedBGR, pitchResizedBGR, pitch, rHeight, cudaMemcpyDeviceToHost));
        *data = hostResizedBGR;
    }
    return 0;
}
static int nv12_nearest_scale(uint8_t* __restrict src, uint8_t* __restrict dst,
                        int srcWidth, int srcHeight, int dstWidth, int dstHeight)
{
    register int sw = srcWidth;  //register keyword is for local var to accelorate
    register int sh = srcHeight;
    register int dw = dstWidth;
    register int dh = dstHeight;
    register int y, x;
    unsigned long int srcy, srcx, src_index, dst_index;
    unsigned long int xrIntFloat_16 = (sw << 16) / dw + 1; //better than float division
    unsigned long int yrIntFloat_16 = (sh << 16) / dh + 1;
    uint8_t* dst_uv = dst + dh * dw; //memory start pointer of dest uv
    uint8_t* src_uv = src + sh * sw; //memory start pointer of source uv
    uint8_t* dst_uv_yScanline;
    uint8_t* src_uv_yScanline;
    uint8_t* dst_y_slice = dst; //memory start pointer of dest y
    uint8_t* src_y_slice;
    uint8_t* sp;
    uint8_t* dp;
    for (y = 0; y < (dh & ~7); ++y)  //'dh & ~7' is to generate faster assembly code
    {
        srcy = (y * yrIntFloat_16) >> 16;
        src_y_slice = src + srcy * sw;
        if((y & 1) == 0)
        {
            dst_uv_yScanline = dst_uv + (y / 2) * dw;
            src_uv_yScanline = src_uv + (srcy / 2) * sw;
        }
        for(x = 0; x < (dw & ~7); ++x)
        {
            srcx = (x * xrIntFloat_16) >> 16;
            dst_y_slice[x] = src_y_slice[srcx];
            if((y & 1) == 0) //y is even
            {
                if((x & 1) == 0) //x is even
                {
                    src_index = (srcx / 2) * 2;
                    sp = dst_uv_yScanline + x;
                    dp = src_uv_yScanline + src_index;
                    *sp = *dp;
                    ++sp;
                    ++dp;
                    *sp = *dp;
                }
             }
         }
         dst_y_slice += dw;
    }
    return 0;
}
int convertor::resizeyuv(unsigned char *in, unsigned char **data, int *data_len){
    init_resize_yuv();
    *data_len = rWidth*rHeight*3/2;
    *data = nv12;
    return nv12_nearest_scale(in, nv12, width, height, rWidth, rHeight);
}
convHandle conv_create(const int srcW, const int srcH, const int dstW, const int dstH, const int gpu){
    if (gpu < 0) return NULL;
    convertor *conv = new convertor(srcW, srcH, dstW, dstH, gpu);
    return conv;
}
void conv_destroy(convHandle h){
    if (!h) return;
    convertor *conv = (convertor*)h;
    delete conv;
}
int yuv2bgrandresize(convHandle h, void *yuv, unsigned char **bgr, int *bgrLen, unsigned char **scaleBGR, int *scaleBGRLen){
    if (!h) return -2;
    convertor *conv = (convertor*)h;
    int ret = conv->fill_yuv((unsigned char*)yuv);
    if (ret != 0) return ret;
    ret = conv->yuv2bgr(bgr, bgrLen);
    if (ret != 0) return ret;
    ret = conv->resize2bgr(NULL, scaleBGR, scaleBGRLen);
    return ret;
}
int yuv2bgr(convHandle h, void *yuv, unsigned char **bgr, int *bgrLen){
    if (!h) return -2;
    convertor *conv = (convertor*)h;
    int ret = conv->fill_yuv((unsigned char*)yuv);
    if (ret != 0) return ret;
    return conv->yuv2bgr(bgr, bgrLen);
}
int yuv2resizedbgr(convHandle h, void *yuv, unsigned char **bgr, int *bgrLen){
    if (!h) return -2;
    convertor *conv = (convertor*)h;
    int ret = conv->fill_yuv((unsigned char*)yuv);
    if (ret != 0) return ret;
    ret = conv->resize2bgr(NULL, bgr, bgrLen);
    return ret;
}
int resizebgr(convHandle h, void *data, unsigned char **resized, int *len){
    if (!h) return -2;
    convertor *conv = (convertor*)h;
    return conv->resize2bgr((unsigned char*)data, resized, len);
}
int resizeyuv(convHandle h, void *data, unsigned char **resized, int *len){
    if (!h) return -2;
    convertor *conv = (convertor*)h;
    return conv->resizeyuv((unsigned char*)data, resized, len);
}
goconv/conv.h
New file
@@ -0,0 +1,23 @@
#ifndef __RESIZE_NPP_H__
#define __RESIZE_NPP_H__
#ifdef __cplusplus
extern "C"{
#endif
typedef void* convHandle;
convHandle conv_create(const int srcW, const int srcH, const int dstW, const int dstH, const int gpu);
void conv_destroy(convHandle h);
int yuv2bgrandresize(convHandle h, void *yuv, unsigned char **bgr, int *bgrLen, unsigned char **scaleBGR, int *scaleBGRLen);
int yuv2bgr(convHandle h, void *yuv, unsigned char **bgr, int *bgrLen);
int yuv2resizedbgr(convHandle h, void *yuv, unsigned char **bgr, int *bgrLen);
int resizebgr(convHandle h, void *data, unsigned char **resized, int *len);
int resizeyuv(convHandle h, void *data, unsigned char **resized, int *len);
#ifdef __cplusplus
}
#endif
#endif //__RESIZE_NPP_H__
goconv/goconv.go
New file
@@ -0,0 +1,258 @@
package goconv
/*
#cgo CFLAGS: -I./ -I./inc -I/usr/local/cuda/include
#cgo CXXFLAGS: -I./ -I./inc -I/usr/local/cuda/include -std=c++11
#cgo LDFLAGS: -L/usr/local/cuda/lib64 -lnppig -lnppicc -lnppial -lnppisu -lcudart -ldl
#include <stdlib.h>
#include "conv.h"
*/
import "C"
import (
    "unsafe"
    "basic.com/valib/godraw.git"
    "basic.com/valib/gogpu.git"
    "github.com/disintegration/imaging"
)
const (
    need     = 200
    reserved = 512
)
func gpuIndex(lastIndex int) int {
    indices := gogpu.RankGPU()
    if len(indices) == 0 {
        return -1
    }
    for _, v := range indices {
        if v != lastIndex {
            if gogpu.SatisfyGPU(v, need, need/2) {
                return v
            }
        }
    }
    if gogpu.SatisfyGPU(lastIndex, need, reserved) {
        return lastIndex
    }
    return -1
}
type convertor struct {
    width   int
    height  int
    rWidth  int
    rHeight int
    conv    C.convHandle
}
var convts []*convertor
func find(w, h, rw, rh int) *convertor {
    for _, v := range convts {
        if v.width == w && v.height == h && v.rWidth == rw && v.rHeight == rh {
            return v
        }
    }
    gpu := gpuIndex(0)
    if gpu < 0 {
        return nil
    }
    cw := C.conv_create(C.int(w), C.int(h), C.int(rw), C.int(rh), C.int(gpu))
    if cw == nil {
        return nil
    }
    c := &convertor{w, h, rw, rh, cw}
    convts = append(convts, c)
    return c
}
// YUV2BGR yuv->bgr
func YUV2BGR(yuv []byte, w, h int) []byte {
    cw := find(w, h, -1, -1)
    if cw == nil {
        return yuv2bgr(yuv, w, h)
    }
    var bgr *C.uchar
    var bgrLen C.int
    ret := C.yuv2bgr(cw.conv, unsafe.Pointer(&yuv[0]), &bgr, &bgrLen)
    if ret != 0 {
        return nil
    }
    const maxLen = 0x7fffffff
    goBGRLen := int(bgrLen)
    if goBGRLen > 0 {
        return (*[maxLen]byte)(unsafe.Pointer(bgr))[:goBGRLen:goBGRLen]
    }
    return nil
}
// YUV2ResizedBGR yuv -> resized bgr
func YUV2ResizedBGR(yuv []byte, w, h, rw, rh int) []byte {
    cw := find(w, h, rw, rh)
    if cw == nil {
        bgr := yuv2bgr(yuv, w, h)
        return bgresize(bgr, w, h, rw, rh)
    }
    var bgr *C.uchar
    var bgrLen C.int
    ret := C.yuv2resizedbgr(cw.conv, unsafe.Pointer(&yuv[0]), &bgr, &bgrLen)
    if ret != 0 {
        return nil
    }
    const maxLen = 0x7fffffff
    goBGRLen := int(bgrLen)
    if goBGRLen > 0 {
        return (*[maxLen]byte)(unsafe.Pointer(bgr))[:goBGRLen:goBGRLen]
    }
    return nil
}
// ResizeBGR resize
func ResizeBGR(bgrO []byte, w, h, rw, rh int) []byte {
    if (rw < 0 && rh < 0) || (rw > w && rh > h) {
        return bgrO
    }
    cw := find(w, h, rw, rh)
    if cw == nil {
        return bgresize(bgrO, w, h, rw, rh)
    }
    var bgr *C.uchar
    var bgrLen C.int
    ret := C.resizebgr(cw.conv, unsafe.Pointer(&bgrO[0]), &bgr, &bgrLen)
    if ret != 0 {
        return nil
    }
    const maxLen = 0x7fffffff
    goBGRLen := int(bgrLen)
    if goBGRLen > 0 {
        return (*[maxLen]byte)(unsafe.Pointer(bgr))[:goBGRLen:goBGRLen]
    }
    return nil
}
// ResizeYUV yuv
func ResizeYUV(yuv []byte, w, h, rw, rh int) []byte {
    if (rw < 0 && rh < 0) || (rw > w && rh > h) {
        return yuv
    }
    cw := find(w, h, rw, rh)
    if cw == nil {
        return yuv
    }
    var resized *C.uchar
    var resizedLen C.int
    ret := C.resizeyuv(cw.conv, unsafe.Pointer(&yuv[0]), &resized, &resizedLen)
    if ret != 0 {
        return nil
    }
    const maxLen = 0x7fffffff
    goResizedLen := int(resizedLen)
    if goResizedLen > 0 {
        return (*[maxLen]byte)(unsafe.Pointer(resized))[:goResizedLen:goResizedLen]
    }
    return nil
}
// YUV2BGRandResize conv and resize
func YUV2BGRandResize(yuv []byte, w, h, rw, rh int) ([]byte, []byte) {
    cw := find(w, h, rw, rh)
    if cw == nil {
        origin := yuv2bgr(yuv, w, h)
        resized := bgresize(origin, w, h, rw, rh)
        return origin, resized
    }
    var bgr *C.uchar
    var bgrLen C.int
    var scale *C.uchar
    var scaleLen C.int
    ret := C.yuv2bgrandresize(cw.conv, unsafe.Pointer(&yuv[0]), &bgr, &bgrLen, &scale, &scaleLen)
    if ret != 0 {
        return nil, nil
    }
    var out, resized []byte
    const maxLen = 0x7fffffff
    goBGRLen, goScaleLen := int(bgrLen), int(scaleLen)
    if goBGRLen > 0 {
        out = (*[maxLen]byte)(unsafe.Pointer(bgr))[:goBGRLen:goBGRLen]
    }
    if goScaleLen > 0 {
        resized = (*[maxLen]byte)(unsafe.Pointer(scale))[:goScaleLen:goScaleLen]
    }
    return out, resized
}
// Free free
func Free() {
    for _, v := range convts {
        if v.conv != nil {
            C.conv_destroy(v.conv)
        }
    }
}
func yuv2bgr(yuv []byte, w, h int) []byte {
    data := make([]byte, 0, w*h*3)
    start := w * h
    for i := 0; i < h; i++ {
        for j := 0; j < w; j++ {
            index := i/2*w + j - (j & 0x01)
            y := int32(yuv[j+i*w])
            v := int32(yuv[start+index])
            u := int32(yuv[start+index+1])
            r := y + (140*(v-128))/100
            g := y - (34*(u-128)+71*(v-128))/100
            b := y + (177*(u-128))/100
            if r > 255 {
                r = 255
            }
            if r < 0 {
                r = 0
            }
            if g > 255 {
                g = 255
            }
            if g < 0 {
                g = 0
            }
            if b > 255 {
                b = 255
            }
            if b < 0 {
                b = 0
            }
            data = append(data, byte(r), byte(g), byte(b))
        }
    }
    return data
}
func bgresize(bgr []byte, w, h, rw, rh int) []byte {
    img, err := godraw.ToImage(bgr, w, h)
    if err != nil {
        return nil
    }
    dstImg := imaging.Resize(img, rw, rh, imaging.NearestNeighbor)
    return godraw.Image2BGR(dstImg)
}
goconv/inc/Exceptions.h
New file
@@ -0,0 +1,181 @@
/**
 * Copyright 1993-2015 NVIDIA Corporation.  All rights reserved.
 *
 * Please refer to the NVIDIA end user license agreement (EULA) associated
 * with this source code for terms and conditions that govern your use of
 * this software. Any use, reproduction, disclosure, or distribution of
 * this software and related documentation outside the terms of the EULA
 * is strictly prohibited.
 *
 */
#ifndef NV_UTIL_NPP_EXCEPTIONS_H
#define NV_UTIL_NPP_EXCEPTIONS_H
#include <string>
#include <sstream>
#include <iostream>
/// All npp related C++ classes are put into the npp namespace.
namespace npp
{
    /// Exception base class.
    ///     This exception base class will be used for everything C++ throught
    /// the NPP project.
    ///     The exception contains a string message, as well as data fields for a string
    /// containing the name of the file as well as the line number where the exception was thrown.
    ///     The easiest way of throwing exceptions and providing filename and line number is
    /// to use one of the ASSERT macros defined for that purpose.
    class Exception
    {
        public:
            /// Constructor.
            /// \param rMessage A message with information as to why the exception was thrown.
            /// \param rFileName The name of the file where the exception was thrown.
            /// \param nLineNumber Line number in the file where the exception was thrown.
            explicit
            Exception(const std::string &rMessage = "", const std::string &rFileName = "", unsigned int nLineNumber = 0)
                : sMessage_(rMessage), sFileName_(rFileName), nLineNumber_(nLineNumber)
            { };
            Exception(const Exception &rException)
                : sMessage_(rException.sMessage_), sFileName_(rException.sFileName_), nLineNumber_(rException.nLineNumber_)
            { };
            virtual
            ~Exception()
            { };
            /// Get the exception's message.
            const
            std::string &
            message()
            const
            {
                return sMessage_;
            }
            /// Get the exception's file info.
            const
            std::string &
            fileName()
            const
            {
                return sFileName_;
            }
            /// Get the exceptions's line info.
            unsigned int
            lineNumber()
            const
            {
                return nLineNumber_;
            }
            /// Create a clone of this exception.
            ///      This creates a new Exception object on the heap. It is
            /// the responsibility of the user of this function to free this memory
            /// (delete x).
            virtual
            Exception *
            clone()
            const
            {
                return new Exception(*this);
            }
            /// Create a single string with all the exceptions information.
            ///     The virtual toString() method is used by the operator<<()
            /// so that all exceptions derived from this base-class can print
            /// their full information correctly even if a reference to their
            /// exact type is not had at the time of printing (i.e. the basic
            /// operator<<() is used).
            virtual
            std::string
            toString()
            const
            {
                std::ostringstream oOutputString;
                oOutputString << fileName() << ":" << lineNumber() << ": " << message();
                return oOutputString.str();
            }
        private:
            std::string sMessage_;      ///< Message regarding the cause of the exception.
            std::string sFileName_;     ///< Name of the file where the exception was thrown.
            unsigned int nLineNumber_;  ///< Line number in the file where the exception was thrown
    };
    /// Output stream inserter for Exception.
    /// \param rOutputStream The stream the exception information is written to.
    /// \param rException The exception that's being written.
    /// \return Reference to the output stream being used.
    static std::ostream &
    operator << (std::ostream &rOutputStream, const Exception &rException)
    {
        rOutputStream << rException.toString();
        return rOutputStream;
    }
    /// Basic assert macro.
    ///     This macro should be used to enforce any kind of pre or post conditions.
    /// Unlike the C-runtime assert macro, this macro does not abort execution, but throws
    /// a C++ exception. The exception is automatically filled with information about the failing
    /// condition, the filename and line number where the exception was thrown.
    /// \note The macro is written in such a way that omitting a semicolon after its usage
    ///     causes a compiler error. The correct way to invoke this macro is:
    /// NPP_ASSERT(n < MAX);
#define NPP_ASSERT(C) do {if (!(C)) throw npp::Exception(#C " assertion faild!", __FILE__, __LINE__);} while(false)
    // ASSERT macro.
    //  Same functionality as the basic assert macro with the added ability to pass
    //  a message M. M should be a string literal.
    //  Note: Never use code inside ASSERT() that causes a side-effect ASSERT macros may get compiled
    //      out in release mode.
#define NPP_ASSERT_MSG(C, M) do {if (!(C)) throw npp::Exception(#C " assertion faild! Message: " M, __FILE__, __LINE__);} while(false)
#ifdef _DEBUG
    /// Basic debug assert macro.
    ///     This macro is identical in every respect to NPP_ASSERT(C) but it does get compiled to a
    /// no-op in release builds. It is therefor of utmost importance to not put statements into
    /// this macro that cause side effects required for correct program execution.
#define NPP_DEBUG_ASSERT(C) do {if (!(C)) throw npp::Exception(#C " debug assertion faild!", __FILE__, __LINE__);} while(false)
#else
#define NPP_DEBUG_ASSERT(C)
#endif
    /// ASSERT for null-pointer test.
    /// It is safe to put code with side effects into this macro. Also: This macro never
    /// gets compiled to a no-op because resource allocation may fail based on external causes not under
    /// control of a software developer.
#define NPP_ASSERT_NOT_NULL(P) do {if ((P) == 0) throw npp::Exception(#P " not null assertion faild!", __FILE__, __LINE__);} while(false)
    /// Macro for flagging methods as not implemented.
    /// The macro throws an exception with a message that an implementation was missing
#define NPP_NOT_IMPLEMENTED() do {throw npp::Exception("Implementation missing!", __FILE__, __LINE__);} while(false)
    /// Macro for checking error return code of CUDA (runtime) calls.
    /// This macro never gets disabled.
#define NPP_CHECK_CUDA(S) do {cudaError_t eCUDAResult; \
        eCUDAResult = S; \
        if (eCUDAResult != cudaSuccess) std::cout << "NPP_CHECK_CUDA - eCUDAResult = " << eCUDAResult << std::endl; \
        NPP_ASSERT(eCUDAResult == cudaSuccess);} while (false)
    /// Macro for checking error return code for NPP calls.
#define NPP_CHECK_NPP(S) do {NppStatus eStatusNPP; \
        eStatusNPP = S; \
        if (eStatusNPP != NPP_SUCCESS) std::cout << "NPP_CHECK_NPP - eStatusNPP = " << _cudaGetErrorEnum(eStatusNPP) << "("<< eStatusNPP << ")" << std::endl; \
        NPP_ASSERT(eStatusNPP == NPP_SUCCESS);} while (false)
    /// Macro for checking error return codes from cuFFT calls.
#define NPP_CHECK_CUFFT(S) do {cufftResult eCUFFTResult; \
        eCUFFTResult = S; \
        if (eCUFFTResult != NPP_SUCCESS) std::cout << "NPP_CHECK_CUFFT - eCUFFTResult = " << eCUFFTResult << std::endl; \
        NPP_ASSERT(eCUFFTResult == CUFFT_SUCCESS);} while (false)
} // npp namespace
#endif // NV_UTIL_NPP_EXCEPTIONS_H
goconv/inc/helper_cuda.h
New file
@@ -0,0 +1,1261 @@
/**
 * Copyright 1993-2013 NVIDIA Corporation.  All rights reserved.
 *
 * Please refer to the NVIDIA end user license agreement (EULA) associated
 * with this source code for terms and conditions that govern your use of
 * this software. Any use, reproduction, disclosure, or distribution of
 * this software and related documentation outside the terms of the EULA
 * is strictly prohibited.
 *
 */
////////////////////////////////////////////////////////////////////////////////
// These are CUDA Helper functions for initialization and error checking
#ifndef HELPER_CUDA_H
#define HELPER_CUDA_H
#pragma once
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <helper_string.h>
#ifndef EXIT_WAIVED
#define EXIT_WAIVED 2
#endif
// Note, it is required that your SDK sample to include the proper header files, please
// refer the CUDA examples for examples of the needed CUDA headers, which may change depending
// on which CUDA functions are used.
// CUDA Runtime error messages
#ifdef __DRIVER_TYPES_H__
static const char *_cudaGetErrorEnum(cudaError_t error)
{
    switch (error)
    {
        case cudaSuccess:
            return "cudaSuccess";
        case cudaErrorMissingConfiguration:
            return "cudaErrorMissingConfiguration";
        case cudaErrorMemoryAllocation:
            return "cudaErrorMemoryAllocation";
        case cudaErrorInitializationError:
            return "cudaErrorInitializationError";
        case cudaErrorLaunchFailure:
            return "cudaErrorLaunchFailure";
        case cudaErrorPriorLaunchFailure:
            return "cudaErrorPriorLaunchFailure";
        case cudaErrorLaunchTimeout:
            return "cudaErrorLaunchTimeout";
        case cudaErrorLaunchOutOfResources:
            return "cudaErrorLaunchOutOfResources";
        case cudaErrorInvalidDeviceFunction:
            return "cudaErrorInvalidDeviceFunction";
        case cudaErrorInvalidConfiguration:
            return "cudaErrorInvalidConfiguration";
        case cudaErrorInvalidDevice:
            return "cudaErrorInvalidDevice";
        case cudaErrorInvalidValue:
            return "cudaErrorInvalidValue";
        case cudaErrorInvalidPitchValue:
            return "cudaErrorInvalidPitchValue";
        case cudaErrorInvalidSymbol:
            return "cudaErrorInvalidSymbol";
        case cudaErrorMapBufferObjectFailed:
            return "cudaErrorMapBufferObjectFailed";
        case cudaErrorUnmapBufferObjectFailed:
            return "cudaErrorUnmapBufferObjectFailed";
        case cudaErrorInvalidHostPointer:
            return "cudaErrorInvalidHostPointer";
        case cudaErrorInvalidDevicePointer:
            return "cudaErrorInvalidDevicePointer";
        case cudaErrorInvalidTexture:
            return "cudaErrorInvalidTexture";
        case cudaErrorInvalidTextureBinding:
            return "cudaErrorInvalidTextureBinding";
        case cudaErrorInvalidChannelDescriptor:
            return "cudaErrorInvalidChannelDescriptor";
        case cudaErrorInvalidMemcpyDirection:
            return "cudaErrorInvalidMemcpyDirection";
        case cudaErrorAddressOfConstant:
            return "cudaErrorAddressOfConstant";
        case cudaErrorTextureFetchFailed:
            return "cudaErrorTextureFetchFailed";
        case cudaErrorTextureNotBound:
            return "cudaErrorTextureNotBound";
        case cudaErrorSynchronizationError:
            return "cudaErrorSynchronizationError";
        case cudaErrorInvalidFilterSetting:
            return "cudaErrorInvalidFilterSetting";
        case cudaErrorInvalidNormSetting:
            return "cudaErrorInvalidNormSetting";
        case cudaErrorMixedDeviceExecution:
            return "cudaErrorMixedDeviceExecution";
        case cudaErrorCudartUnloading:
            return "cudaErrorCudartUnloading";
        case cudaErrorUnknown:
            return "cudaErrorUnknown";
        case cudaErrorNotYetImplemented:
            return "cudaErrorNotYetImplemented";
        case cudaErrorMemoryValueTooLarge:
            return "cudaErrorMemoryValueTooLarge";
        case cudaErrorInvalidResourceHandle:
            return "cudaErrorInvalidResourceHandle";
        case cudaErrorNotReady:
            return "cudaErrorNotReady";
        case cudaErrorInsufficientDriver:
            return "cudaErrorInsufficientDriver";
        case cudaErrorSetOnActiveProcess:
            return "cudaErrorSetOnActiveProcess";
        case cudaErrorInvalidSurface:
            return "cudaErrorInvalidSurface";
        case cudaErrorNoDevice:
            return "cudaErrorNoDevice";
        case cudaErrorECCUncorrectable:
            return "cudaErrorECCUncorrectable";
        case cudaErrorSharedObjectSymbolNotFound:
            return "cudaErrorSharedObjectSymbolNotFound";
        case cudaErrorSharedObjectInitFailed:
            return "cudaErrorSharedObjectInitFailed";
        case cudaErrorUnsupportedLimit:
            return "cudaErrorUnsupportedLimit";
        case cudaErrorDuplicateVariableName:
            return "cudaErrorDuplicateVariableName";
        case cudaErrorDuplicateTextureName:
            return "cudaErrorDuplicateTextureName";
        case cudaErrorDuplicateSurfaceName:
            return "cudaErrorDuplicateSurfaceName";
        case cudaErrorDevicesUnavailable:
            return "cudaErrorDevicesUnavailable";
        case cudaErrorInvalidKernelImage:
            return "cudaErrorInvalidKernelImage";
        case cudaErrorNoKernelImageForDevice:
            return "cudaErrorNoKernelImageForDevice";
        case cudaErrorIncompatibleDriverContext:
            return "cudaErrorIncompatibleDriverContext";
        case cudaErrorPeerAccessAlreadyEnabled:
            return "cudaErrorPeerAccessAlreadyEnabled";
        case cudaErrorPeerAccessNotEnabled:
            return "cudaErrorPeerAccessNotEnabled";
        case cudaErrorDeviceAlreadyInUse:
            return "cudaErrorDeviceAlreadyInUse";
        case cudaErrorProfilerDisabled:
            return "cudaErrorProfilerDisabled";
        case cudaErrorProfilerNotInitialized:
            return "cudaErrorProfilerNotInitialized";
        case cudaErrorProfilerAlreadyStarted:
            return "cudaErrorProfilerAlreadyStarted";
        case cudaErrorProfilerAlreadyStopped:
            return "cudaErrorProfilerAlreadyStopped";
        /* Since CUDA 4.0*/
        case cudaErrorAssert:
            return "cudaErrorAssert";
        case cudaErrorTooManyPeers:
            return "cudaErrorTooManyPeers";
        case cudaErrorHostMemoryAlreadyRegistered:
            return "cudaErrorHostMemoryAlreadyRegistered";
        case cudaErrorHostMemoryNotRegistered:
            return "cudaErrorHostMemoryNotRegistered";
        /* Since CUDA 5.0 */
        case cudaErrorOperatingSystem:
            return "cudaErrorOperatingSystem";
        case cudaErrorPeerAccessUnsupported:
            return "cudaErrorPeerAccessUnsupported";
        case cudaErrorLaunchMaxDepthExceeded:
            return "cudaErrorLaunchMaxDepthExceeded";
        case cudaErrorLaunchFileScopedTex:
            return "cudaErrorLaunchFileScopedTex";
        case cudaErrorLaunchFileScopedSurf:
            return "cudaErrorLaunchFileScopedSurf";
        case cudaErrorSyncDepthExceeded:
            return "cudaErrorSyncDepthExceeded";
        case cudaErrorLaunchPendingCountExceeded:
            return "cudaErrorLaunchPendingCountExceeded";
        case cudaErrorNotPermitted:
            return "cudaErrorNotPermitted";
        case cudaErrorNotSupported:
            return "cudaErrorNotSupported";
        /* Since CUDA 6.0 */
        case cudaErrorHardwareStackError:
            return "cudaErrorHardwareStackError";
        case cudaErrorIllegalInstruction:
            return "cudaErrorIllegalInstruction";
        case cudaErrorMisalignedAddress:
            return "cudaErrorMisalignedAddress";
        case cudaErrorInvalidAddressSpace:
            return "cudaErrorInvalidAddressSpace";
        case cudaErrorInvalidPc:
            return "cudaErrorInvalidPc";
        case cudaErrorIllegalAddress:
            return "cudaErrorIllegalAddress";
        /* Since CUDA 6.5*/
        case cudaErrorInvalidPtx:
            return "cudaErrorInvalidPtx";
        case cudaErrorInvalidGraphicsContext:
            return "cudaErrorInvalidGraphicsContext";
        case cudaErrorStartupFailure:
            return "cudaErrorStartupFailure";
        case cudaErrorApiFailureBase:
            return "cudaErrorApiFailureBase";
    }
    return "<unknown>";
}
#endif
#ifdef __cuda_cuda_h__
// CUDA Driver API errors
static const char *_cudaGetErrorEnum(CUresult error)
{
    switch (error)
    {
        case CUDA_SUCCESS:
            return "CUDA_SUCCESS";
        case CUDA_ERROR_INVALID_VALUE:
            return "CUDA_ERROR_INVALID_VALUE";
        case CUDA_ERROR_OUT_OF_MEMORY:
            return "CUDA_ERROR_OUT_OF_MEMORY";
        case CUDA_ERROR_NOT_INITIALIZED:
            return "CUDA_ERROR_NOT_INITIALIZED";
        case CUDA_ERROR_DEINITIALIZED:
            return "CUDA_ERROR_DEINITIALIZED";
        case CUDA_ERROR_PROFILER_DISABLED:
            return "CUDA_ERROR_PROFILER_DISABLED";
        case CUDA_ERROR_PROFILER_NOT_INITIALIZED:
            return "CUDA_ERROR_PROFILER_NOT_INITIALIZED";
        case CUDA_ERROR_PROFILER_ALREADY_STARTED:
            return "CUDA_ERROR_PROFILER_ALREADY_STARTED";
        case CUDA_ERROR_PROFILER_ALREADY_STOPPED:
            return "CUDA_ERROR_PROFILER_ALREADY_STOPPED";
        case CUDA_ERROR_NO_DEVICE:
            return "CUDA_ERROR_NO_DEVICE";
        case CUDA_ERROR_INVALID_DEVICE:
            return "CUDA_ERROR_INVALID_DEVICE";
        case CUDA_ERROR_INVALID_IMAGE:
            return "CUDA_ERROR_INVALID_IMAGE";
        case CUDA_ERROR_INVALID_CONTEXT:
            return "CUDA_ERROR_INVALID_CONTEXT";
        case CUDA_ERROR_CONTEXT_ALREADY_CURRENT:
            return "CUDA_ERROR_CONTEXT_ALREADY_CURRENT";
        case CUDA_ERROR_MAP_FAILED:
            return "CUDA_ERROR_MAP_FAILED";
        case CUDA_ERROR_UNMAP_FAILED:
            return "CUDA_ERROR_UNMAP_FAILED";
        case CUDA_ERROR_ARRAY_IS_MAPPED:
            return "CUDA_ERROR_ARRAY_IS_MAPPED";
        case CUDA_ERROR_ALREADY_MAPPED:
            return "CUDA_ERROR_ALREADY_MAPPED";
        case CUDA_ERROR_NO_BINARY_FOR_GPU:
            return "CUDA_ERROR_NO_BINARY_FOR_GPU";
        case CUDA_ERROR_ALREADY_ACQUIRED:
            return "CUDA_ERROR_ALREADY_ACQUIRED";
        case CUDA_ERROR_NOT_MAPPED:
            return "CUDA_ERROR_NOT_MAPPED";
        case CUDA_ERROR_NOT_MAPPED_AS_ARRAY:
            return "CUDA_ERROR_NOT_MAPPED_AS_ARRAY";
        case CUDA_ERROR_NOT_MAPPED_AS_POINTER:
            return "CUDA_ERROR_NOT_MAPPED_AS_POINTER";
        case CUDA_ERROR_ECC_UNCORRECTABLE:
            return "CUDA_ERROR_ECC_UNCORRECTABLE";
        case CUDA_ERROR_UNSUPPORTED_LIMIT:
            return "CUDA_ERROR_UNSUPPORTED_LIMIT";
        case CUDA_ERROR_CONTEXT_ALREADY_IN_USE:
            return "CUDA_ERROR_CONTEXT_ALREADY_IN_USE";
        case CUDA_ERROR_PEER_ACCESS_UNSUPPORTED:
            return "CUDA_ERROR_PEER_ACCESS_UNSUPPORTED";
        case CUDA_ERROR_INVALID_PTX:
            return "CUDA_ERROR_INVALID_PTX";
        case CUDA_ERROR_INVALID_GRAPHICS_CONTEXT:
            return "CUDA_ERROR_INVALID_GRAPHICS_CONTEXT";
        case CUDA_ERROR_INVALID_SOURCE:
            return "CUDA_ERROR_INVALID_SOURCE";
        case CUDA_ERROR_FILE_NOT_FOUND:
            return "CUDA_ERROR_FILE_NOT_FOUND";
        case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND:
            return "CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND";
        case CUDA_ERROR_SHARED_OBJECT_INIT_FAILED:
            return "CUDA_ERROR_SHARED_OBJECT_INIT_FAILED";
        case CUDA_ERROR_OPERATING_SYSTEM:
            return "CUDA_ERROR_OPERATING_SYSTEM";
        case CUDA_ERROR_INVALID_HANDLE:
            return "CUDA_ERROR_INVALID_HANDLE";
        case CUDA_ERROR_NOT_FOUND:
            return "CUDA_ERROR_NOT_FOUND";
        case CUDA_ERROR_NOT_READY:
            return "CUDA_ERROR_NOT_READY";
        case CUDA_ERROR_ILLEGAL_ADDRESS:
            return "CUDA_ERROR_ILLEGAL_ADDRESS";
        case CUDA_ERROR_LAUNCH_FAILED:
            return "CUDA_ERROR_LAUNCH_FAILED";
        case CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES:
            return "CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES";
        case CUDA_ERROR_LAUNCH_TIMEOUT:
            return "CUDA_ERROR_LAUNCH_TIMEOUT";
        case CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING:
            return "CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING";
        case CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED:
            return "CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED";
        case CUDA_ERROR_PEER_ACCESS_NOT_ENABLED:
            return "CUDA_ERROR_PEER_ACCESS_NOT_ENABLED";
        case CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE:
            return "CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE";
        case CUDA_ERROR_CONTEXT_IS_DESTROYED:
            return "CUDA_ERROR_CONTEXT_IS_DESTROYED";
        case CUDA_ERROR_ASSERT:
            return "CUDA_ERROR_ASSERT";
        case CUDA_ERROR_TOO_MANY_PEERS:
            return "CUDA_ERROR_TOO_MANY_PEERS";
        case CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED:
            return "CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED";
        case CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED:
            return "CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED";
        case CUDA_ERROR_HARDWARE_STACK_ERROR:
            return "CUDA_ERROR_HARDWARE_STACK_ERROR";
        case CUDA_ERROR_ILLEGAL_INSTRUCTION:
            return "CUDA_ERROR_ILLEGAL_INSTRUCTION";
        case CUDA_ERROR_MISALIGNED_ADDRESS:
            return "CUDA_ERROR_MISALIGNED_ADDRESS";
        case CUDA_ERROR_INVALID_ADDRESS_SPACE:
            return "CUDA_ERROR_INVALID_ADDRESS_SPACE";
        case CUDA_ERROR_INVALID_PC:
            return "CUDA_ERROR_INVALID_PC";
        case CUDA_ERROR_NOT_PERMITTED:
            return "CUDA_ERROR_NOT_PERMITTED";
        case CUDA_ERROR_NOT_SUPPORTED:
            return "CUDA_ERROR_NOT_SUPPORTED";
        case CUDA_ERROR_UNKNOWN:
            return "CUDA_ERROR_UNKNOWN";
    }
    return "<unknown>";
}
#endif
#ifdef CUBLAS_API_H_
// cuBLAS API errors
static const char *_cudaGetErrorEnum(cublasStatus_t error)
{
    switch (error)
    {
        case CUBLAS_STATUS_SUCCESS:
            return "CUBLAS_STATUS_SUCCESS";
        case CUBLAS_STATUS_NOT_INITIALIZED:
            return "CUBLAS_STATUS_NOT_INITIALIZED";
        case CUBLAS_STATUS_ALLOC_FAILED:
            return "CUBLAS_STATUS_ALLOC_FAILED";
        case CUBLAS_STATUS_INVALID_VALUE:
            return "CUBLAS_STATUS_INVALID_VALUE";
        case CUBLAS_STATUS_ARCH_MISMATCH:
            return "CUBLAS_STATUS_ARCH_MISMATCH";
        case CUBLAS_STATUS_MAPPING_ERROR:
            return "CUBLAS_STATUS_MAPPING_ERROR";
        case CUBLAS_STATUS_EXECUTION_FAILED:
            return "CUBLAS_STATUS_EXECUTION_FAILED";
        case CUBLAS_STATUS_INTERNAL_ERROR:
            return "CUBLAS_STATUS_INTERNAL_ERROR";
        case CUBLAS_STATUS_NOT_SUPPORTED:
            return "CUBLAS_STATUS_NOT_SUPPORTED";
        case CUBLAS_STATUS_LICENSE_ERROR:
            return "CUBLAS_STATUS_LICENSE_ERROR";
    }
    return "<unknown>";
}
#endif
#ifdef _CUFFT_H_
// cuFFT API errors
static const char *_cudaGetErrorEnum(cufftResult error)
{
    switch (error)
    {
        case CUFFT_SUCCESS:
            return "CUFFT_SUCCESS";
        case CUFFT_INVALID_PLAN:
            return "CUFFT_INVALID_PLAN";
        case CUFFT_ALLOC_FAILED:
            return "CUFFT_ALLOC_FAILED";
        case CUFFT_INVALID_TYPE:
            return "CUFFT_INVALID_TYPE";
        case CUFFT_INVALID_VALUE:
            return "CUFFT_INVALID_VALUE";
        case CUFFT_INTERNAL_ERROR:
            return "CUFFT_INTERNAL_ERROR";
        case CUFFT_EXEC_FAILED:
            return "CUFFT_EXEC_FAILED";
        case CUFFT_SETUP_FAILED:
            return "CUFFT_SETUP_FAILED";
        case CUFFT_INVALID_SIZE:
            return "CUFFT_INVALID_SIZE";
        case CUFFT_UNALIGNED_DATA:
            return "CUFFT_UNALIGNED_DATA";
        case CUFFT_INCOMPLETE_PARAMETER_LIST:
            return "CUFFT_INCOMPLETE_PARAMETER_LIST";
        case CUFFT_INVALID_DEVICE:
            return "CUFFT_INVALID_DEVICE";
        case CUFFT_PARSE_ERROR:
            return "CUFFT_PARSE_ERROR";
        case CUFFT_NO_WORKSPACE:
            return "CUFFT_NO_WORKSPACE";
        case CUFFT_NOT_IMPLEMENTED:
            return "CUFFT_NOT_IMPLEMENTED";
        case CUFFT_LICENSE_ERROR:
            return "CUFFT_LICENSE_ERROR";
    }
    return "<unknown>";
}
#endif
#ifdef CUSPARSEAPI
// cuSPARSE API errors
static const char *_cudaGetErrorEnum(cusparseStatus_t error)
{
    switch (error)
    {
        case CUSPARSE_STATUS_SUCCESS:
            return "CUSPARSE_STATUS_SUCCESS";
        case CUSPARSE_STATUS_NOT_INITIALIZED:
            return "CUSPARSE_STATUS_NOT_INITIALIZED";
        case CUSPARSE_STATUS_ALLOC_FAILED:
            return "CUSPARSE_STATUS_ALLOC_FAILED";
        case CUSPARSE_STATUS_INVALID_VALUE:
            return "CUSPARSE_STATUS_INVALID_VALUE";
        case CUSPARSE_STATUS_ARCH_MISMATCH:
            return "CUSPARSE_STATUS_ARCH_MISMATCH";
        case CUSPARSE_STATUS_MAPPING_ERROR:
            return "CUSPARSE_STATUS_MAPPING_ERROR";
        case CUSPARSE_STATUS_EXECUTION_FAILED:
            return "CUSPARSE_STATUS_EXECUTION_FAILED";
        case CUSPARSE_STATUS_INTERNAL_ERROR:
            return "CUSPARSE_STATUS_INTERNAL_ERROR";
        case CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED:
            return "CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED";
    }
    return "<unknown>";
}
#endif
#ifdef CUSOLVER_COMMON_H_
//cuSOLVER API errors
static const char *_cudaGetErrorEnum(cusolverStatus_t error)
{
   switch(error)
   {
       case CUSOLVER_STATUS_SUCCESS:
           return "CUSOLVER_STATUS_SUCCESS";
       case CUSOLVER_STATUS_NOT_INITIALIZED:
           return "CUSOLVER_STATUS_NOT_INITIALIZED";
       case CUSOLVER_STATUS_ALLOC_FAILED:
           return "CUSOLVER_STATUS_ALLOC_FAILED";
       case CUSOLVER_STATUS_INVALID_VALUE:
           return "CUSOLVER_STATUS_INVALID_VALUE";
       case CUSOLVER_STATUS_ARCH_MISMATCH:
           return "CUSOLVER_STATUS_ARCH_MISMATCH";
       case CUSOLVER_STATUS_MAPPING_ERROR:
           return "CUSOLVER_STATUS_MAPPING_ERROR";
       case CUSOLVER_STATUS_EXECUTION_FAILED:
           return "CUSOLVER_STATUS_EXECUTION_FAILED";
       case CUSOLVER_STATUS_INTERNAL_ERROR:
           return "CUSOLVER_STATUS_INTERNAL_ERROR";
       case CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED:
           return "CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED";
       case CUSOLVER_STATUS_NOT_SUPPORTED :
           return "CUSOLVER_STATUS_NOT_SUPPORTED ";
       case CUSOLVER_STATUS_ZERO_PIVOT:
           return "CUSOLVER_STATUS_ZERO_PIVOT";
       case CUSOLVER_STATUS_INVALID_LICENSE:
           return "CUSOLVER_STATUS_INVALID_LICENSE";
    }
    return "<unknown>";
}
#endif
#ifdef CURAND_H_
// cuRAND API errors
static const char *_cudaGetErrorEnum(curandStatus_t error)
{
    switch (error)
    {
        case CURAND_STATUS_SUCCESS:
            return "CURAND_STATUS_SUCCESS";
        case CURAND_STATUS_VERSION_MISMATCH:
            return "CURAND_STATUS_VERSION_MISMATCH";
        case CURAND_STATUS_NOT_INITIALIZED:
            return "CURAND_STATUS_NOT_INITIALIZED";
        case CURAND_STATUS_ALLOCATION_FAILED:
            return "CURAND_STATUS_ALLOCATION_FAILED";
        case CURAND_STATUS_TYPE_ERROR:
            return "CURAND_STATUS_TYPE_ERROR";
        case CURAND_STATUS_OUT_OF_RANGE:
            return "CURAND_STATUS_OUT_OF_RANGE";
        case CURAND_STATUS_LENGTH_NOT_MULTIPLE:
            return "CURAND_STATUS_LENGTH_NOT_MULTIPLE";
        case CURAND_STATUS_DOUBLE_PRECISION_REQUIRED:
            return "CURAND_STATUS_DOUBLE_PRECISION_REQUIRED";
        case CURAND_STATUS_LAUNCH_FAILURE:
            return "CURAND_STATUS_LAUNCH_FAILURE";
        case CURAND_STATUS_PREEXISTING_FAILURE:
            return "CURAND_STATUS_PREEXISTING_FAILURE";
        case CURAND_STATUS_INITIALIZATION_FAILED:
            return "CURAND_STATUS_INITIALIZATION_FAILED";
        case CURAND_STATUS_ARCH_MISMATCH:
            return "CURAND_STATUS_ARCH_MISMATCH";
        case CURAND_STATUS_INTERNAL_ERROR:
            return "CURAND_STATUS_INTERNAL_ERROR";
    }
    return "<unknown>";
}
#endif
#ifdef NV_NPPIDEFS_H
// NPP API errors
static const char *_cudaGetErrorEnum(NppStatus error)
{
    switch (error)
    {
        case NPP_NOT_SUPPORTED_MODE_ERROR:
            return "NPP_NOT_SUPPORTED_MODE_ERROR";
        case NPP_ROUND_MODE_NOT_SUPPORTED_ERROR:
            return "NPP_ROUND_MODE_NOT_SUPPORTED_ERROR";
        case NPP_RESIZE_NO_OPERATION_ERROR:
            return "NPP_RESIZE_NO_OPERATION_ERROR";
        case NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY:
            return "NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY";
#if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) <= 0x5000
        case NPP_BAD_ARG_ERROR:
            return "NPP_BAD_ARGUMENT_ERROR";
        case NPP_COEFF_ERROR:
            return "NPP_COEFFICIENT_ERROR";
        case NPP_RECT_ERROR:
            return "NPP_RECTANGLE_ERROR";
        case NPP_QUAD_ERROR:
            return "NPP_QUADRANGLE_ERROR";
        case NPP_MEM_ALLOC_ERR:
            return "NPP_MEMORY_ALLOCATION_ERROR";
        case NPP_HISTO_NUMBER_OF_LEVELS_ERROR:
            return "NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR";
        case NPP_INVALID_INPUT:
            return "NPP_INVALID_INPUT";
        case NPP_POINTER_ERROR:
            return "NPP_POINTER_ERROR";
        case NPP_WARNING:
            return "NPP_WARNING";
        case NPP_ODD_ROI_WARNING:
            return "NPP_ODD_ROI_WARNING";
#else
            // These are for CUDA 5.5 or higher
        case NPP_BAD_ARGUMENT_ERROR:
            return "NPP_BAD_ARGUMENT_ERROR";
        case NPP_COEFFICIENT_ERROR:
            return "NPP_COEFFICIENT_ERROR";
        case NPP_RECTANGLE_ERROR:
            return "NPP_RECTANGLE_ERROR";
        case NPP_QUADRANGLE_ERROR:
            return "NPP_QUADRANGLE_ERROR";
        case NPP_MEMORY_ALLOCATION_ERR:
            return "NPP_MEMORY_ALLOCATION_ERROR";
        case NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR:
            return "NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR";
        case NPP_INVALID_HOST_POINTER_ERROR:
            return "NPP_INVALID_HOST_POINTER_ERROR";
        case NPP_INVALID_DEVICE_POINTER_ERROR:
            return "NPP_INVALID_DEVICE_POINTER_ERROR";
#endif
        case NPP_LUT_NUMBER_OF_LEVELS_ERROR:
            return "NPP_LUT_NUMBER_OF_LEVELS_ERROR";
        case NPP_TEXTURE_BIND_ERROR:
            return "NPP_TEXTURE_BIND_ERROR";
        case NPP_WRONG_INTERSECTION_ROI_ERROR:
            return "NPP_WRONG_INTERSECTION_ROI_ERROR";
        case NPP_NOT_EVEN_STEP_ERROR:
            return "NPP_NOT_EVEN_STEP_ERROR";
        case NPP_INTERPOLATION_ERROR:
            return "NPP_INTERPOLATION_ERROR";
        case NPP_RESIZE_FACTOR_ERROR:
            return "NPP_RESIZE_FACTOR_ERROR";
        case NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR:
            return "NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR";
#if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) <= 0x5000
        case NPP_MEMFREE_ERR:
            return "NPP_MEMFREE_ERR";
        case NPP_MEMSET_ERR:
            return "NPP_MEMSET_ERR";
        case NPP_MEMCPY_ERR:
            return "NPP_MEMCPY_ERROR";
        case NPP_MIRROR_FLIP_ERR:
            return "NPP_MIRROR_FLIP_ERR";
#else
        case NPP_MEMFREE_ERROR:
            return "NPP_MEMFREE_ERROR";
        case NPP_MEMSET_ERROR:
            return "NPP_MEMSET_ERROR";
        case NPP_MEMCPY_ERROR:
            return "NPP_MEMCPY_ERROR";
        case NPP_MIRROR_FLIP_ERROR:
            return "NPP_MIRROR_FLIP_ERROR";
#endif
        case NPP_ALIGNMENT_ERROR:
            return "NPP_ALIGNMENT_ERROR";
        case NPP_STEP_ERROR:
            return "NPP_STEP_ERROR";
        case NPP_SIZE_ERROR:
            return "NPP_SIZE_ERROR";
        case NPP_NULL_POINTER_ERROR:
            return "NPP_NULL_POINTER_ERROR";
        case NPP_CUDA_KERNEL_EXECUTION_ERROR:
            return "NPP_CUDA_KERNEL_EXECUTION_ERROR";
        case NPP_NOT_IMPLEMENTED_ERROR:
            return "NPP_NOT_IMPLEMENTED_ERROR";
        case NPP_ERROR:
            return "NPP_ERROR";
        case NPP_SUCCESS:
            return "NPP_SUCCESS";
        case NPP_WRONG_INTERSECTION_QUAD_WARNING:
            return "NPP_WRONG_INTERSECTION_QUAD_WARNING";
        case NPP_MISALIGNED_DST_ROI_WARNING:
            return "NPP_MISALIGNED_DST_ROI_WARNING";
        case NPP_AFFINE_QUAD_INCORRECT_WARNING:
            return "NPP_AFFINE_QUAD_INCORRECT_WARNING";
        case NPP_DOUBLE_SIZE_WARNING:
            return "NPP_DOUBLE_SIZE_WARNING";
        case NPP_WRONG_INTERSECTION_ROI_WARNING:
            return "NPP_WRONG_INTERSECTION_ROI_WARNING";
#if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) >= 0x6000
        /* These are 6.0 or higher */
        case NPP_LUT_PALETTE_BITSIZE_ERROR:
            return "NPP_LUT_PALETTE_BITSIZE_ERROR";
        case NPP_ZC_MODE_NOT_SUPPORTED_ERROR:
            return "NPP_ZC_MODE_NOT_SUPPORTED_ERROR";
        case NPP_QUALITY_INDEX_ERROR:
            return "NPP_QUALITY_INDEX_ERROR";
        case NPP_CHANNEL_ORDER_ERROR:
            return "NPP_CHANNEL_ORDER_ERROR";
        case NPP_ZERO_MASK_VALUE_ERROR:
            return "NPP_ZERO_MASK_VALUE_ERROR";
        case NPP_NUMBER_OF_CHANNELS_ERROR:
            return "NPP_NUMBER_OF_CHANNELS_ERROR";
        case NPP_COI_ERROR:
            return "NPP_COI_ERROR";
        case NPP_DIVISOR_ERROR:
            return "NPP_DIVISOR_ERROR";
        case NPP_CHANNEL_ERROR:
            return "NPP_CHANNEL_ERROR";
        case NPP_STRIDE_ERROR:
            return "NPP_STRIDE_ERROR";
        case NPP_ANCHOR_ERROR:
            return "NPP_ANCHOR_ERROR";
        case NPP_MASK_SIZE_ERROR:
            return "NPP_MASK_SIZE_ERROR";
        case NPP_MOMENT_00_ZERO_ERROR:
            return "NPP_MOMENT_00_ZERO_ERROR";
        case NPP_THRESHOLD_NEGATIVE_LEVEL_ERROR:
            return "NPP_THRESHOLD_NEGATIVE_LEVEL_ERROR";
        case NPP_THRESHOLD_ERROR:
            return "NPP_THRESHOLD_ERROR";
        case NPP_CONTEXT_MATCH_ERROR:
            return "NPP_CONTEXT_MATCH_ERROR";
        case NPP_FFT_FLAG_ERROR:
            return "NPP_FFT_FLAG_ERROR";
        case NPP_FFT_ORDER_ERROR:
            return "NPP_FFT_ORDER_ERROR";
        case NPP_SCALE_RANGE_ERROR:
            return "NPP_SCALE_RANGE_ERROR";
        case NPP_DATA_TYPE_ERROR:
            return "NPP_DATA_TYPE_ERROR";
        case NPP_OUT_OFF_RANGE_ERROR:
            return "NPP_OUT_OFF_RANGE_ERROR";
        case NPP_DIVIDE_BY_ZERO_ERROR:
            return "NPP_DIVIDE_BY_ZERO_ERROR";
        case NPP_RANGE_ERROR:
            return "NPP_RANGE_ERROR";
        case NPP_NO_MEMORY_ERROR:
            return "NPP_NO_MEMORY_ERROR";
        case NPP_ERROR_RESERVED:
            return "NPP_ERROR_RESERVED";
        case NPP_NO_OPERATION_WARNING:
            return "NPP_NO_OPERATION_WARNING";
        case NPP_DIVIDE_BY_ZERO_WARNING:
            return "NPP_DIVIDE_BY_ZERO_WARNING";
#endif
    }
    return "<unknown>";
}
#endif
#ifdef __DRIVER_TYPES_H__
#ifndef DEVICE_RESET
#define DEVICE_RESET cudaDeviceReset();
#endif
#else
#ifndef DEVICE_RESET
#define DEVICE_RESET
#endif
#endif
template< typename T >
void check(T result, char const *const func, const char *const file, int const line)
{
    if (result)
    {
        fprintf(stderr, "CUDA error at %s:%d code=%d(%s) \"%s\" \n",
                file, line, static_cast<unsigned int>(result), _cudaGetErrorEnum(result), func);
        DEVICE_RESET
        // Make sure we call CUDA Device Reset before exiting
        exit(EXIT_FAILURE);
    }
}
#ifdef __DRIVER_TYPES_H__
// This will output the proper CUDA error strings in the event that a CUDA host call returns an error
#define checkCudaErrors(val)           check ( (val), #val, __FILE__, __LINE__ )
// This will output the proper error string when calling cudaGetLastError
#define getLastCudaError(msg)      __getLastCudaError (msg, __FILE__, __LINE__)
inline void __getLastCudaError(const char *errorMessage, const char *file, const int line)
{
    cudaError_t err = cudaGetLastError();
    if (cudaSuccess != err)
    {
        fprintf(stderr, "%s(%i) : getLastCudaError() CUDA error : %s : (%d) %s.\n",
                file, line, errorMessage, (int)err, cudaGetErrorString(err));
        DEVICE_RESET
        exit(EXIT_FAILURE);
    }
}
#endif
#ifndef MAX
#define MAX(a,b) (a > b ? a : b)
#endif
// Float To Int conversion
inline int ftoi(float value)
{
    return (value >= 0 ? (int)(value + 0.5) : (int)(value - 0.5));
}
// Beginning of GPU Architecture definitions
inline int _ConvertSMVer2Cores(int major, int minor)
{
    // Defines for GPU Architecture types (using the SM version to determine the # of cores per SM
    typedef struct
    {
        int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version
        int Cores;
    } sSMtoCores;
    sSMtoCores nGpuArchCoresPerSM[] =
    {
        { 0x20, 32 }, // Fermi Generation (SM 2.0) GF100 class
        { 0x21, 48 }, // Fermi Generation (SM 2.1) GF10x class
        { 0x30, 192}, // Kepler Generation (SM 3.0) GK10x class
        { 0x32, 192}, // Kepler Generation (SM 3.2) GK10x class
        { 0x35, 192}, // Kepler Generation (SM 3.5) GK11x class
        { 0x37, 192}, // Kepler Generation (SM 3.7) GK21x class
        { 0x50, 128}, // Maxwell Generation (SM 5.0) GM10x class
        { 0x52, 128}, // Maxwell Generation (SM 5.2) GM20x class
        {   -1, -1 }
    };
    int index = 0;
    while (nGpuArchCoresPerSM[index].SM != -1)
    {
        if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor))
        {
            return nGpuArchCoresPerSM[index].Cores;
        }
        index++;
    }
    // If we don't find the values, we default use the previous one to run properly
    printf("MapSMtoCores for SM %d.%d is undefined.  Default to use %d Cores/SM\n", major, minor, nGpuArchCoresPerSM[index-1].Cores);
    return nGpuArchCoresPerSM[index-1].Cores;
}
// end of GPU Architecture definitions
#ifdef __CUDA_RUNTIME_H__
// General GPU Device CUDA Initialization
inline int gpuDeviceInit(int devID)
{
    int device_count;
    checkCudaErrors(cudaGetDeviceCount(&device_count));
    if (device_count == 0)
    {
        fprintf(stderr, "gpuDeviceInit() CUDA error: no devices supporting CUDA.\n");
        exit(EXIT_FAILURE);
    }
    if (devID < 0)
    {
        devID = 0;
    }
    if (devID > device_count-1)
    {
        fprintf(stderr, "\n");
        fprintf(stderr, ">> %d CUDA capable GPU device(s) detected. <<\n", device_count);
        fprintf(stderr, ">> gpuDeviceInit (-device=%d) is not a valid GPU device. <<\n", devID);
        fprintf(stderr, "\n");
        return -devID;
    }
    cudaDeviceProp deviceProp;
    checkCudaErrors(cudaGetDeviceProperties(&deviceProp, devID));
    if (deviceProp.computeMode == cudaComputeModeProhibited)
    {
        fprintf(stderr, "Error: device is running in <Compute Mode Prohibited>, no threads can use ::cudaSetDevice().\n");
        return -1;
    }
    if (deviceProp.major < 1)
    {
        fprintf(stderr, "gpuDeviceInit(): GPU device does not support CUDA.\n");
        exit(EXIT_FAILURE);
    }
    checkCudaErrors(cudaSetDevice(devID));
    printf("gpuDeviceInit() CUDA Device [%d]: \"%s\n", devID, deviceProp.name);
    return devID;
}
// This function returns the best GPU (with maximum GFLOPS)
inline int gpuGetMaxGflopsDeviceId()
{
    int current_device     = 0, sm_per_multiproc  = 0;
    int max_perf_device    = 0;
    int device_count       = 0, best_SM_arch      = 0;
    int devices_prohibited = 0;
    unsigned long long max_compute_perf = 0;
    cudaDeviceProp deviceProp;
    cudaGetDeviceCount(&device_count);
    checkCudaErrors(cudaGetDeviceCount(&device_count));
    if (device_count == 0)
    {
        fprintf(stderr, "gpuGetMaxGflopsDeviceId() CUDA error: no devices supporting CUDA.\n");
        exit(EXIT_FAILURE);
    }
    // Find the best major SM Architecture GPU device
    while (current_device < device_count)
    {
        cudaGetDeviceProperties(&deviceProp, current_device);
        // If this GPU is not running on Compute Mode prohibited, then we can add it to the list
        if (deviceProp.computeMode != cudaComputeModeProhibited)
        {
            if (deviceProp.major > 0 && deviceProp.major < 9999)
            {
                best_SM_arch = MAX(best_SM_arch, deviceProp.major);
            }
        }
        else
        {
            devices_prohibited++;
        }
        current_device++;
    }
    if (devices_prohibited == device_count)
    {
        fprintf(stderr, "gpuGetMaxGflopsDeviceId() CUDA error: all devices have compute mode prohibited.\n");
        exit(EXIT_FAILURE);
    }
    // Find the best CUDA capable GPU device
    current_device = 0;
    while (current_device < device_count)
    {
        cudaGetDeviceProperties(&deviceProp, current_device);
        // If this GPU is not running on Compute Mode prohibited, then we can add it to the list
        if (deviceProp.computeMode != cudaComputeModeProhibited)
        {
            if (deviceProp.major == 9999 && deviceProp.minor == 9999)
            {
                sm_per_multiproc = 1;
            }
            else
            {
                sm_per_multiproc = _ConvertSMVer2Cores(deviceProp.major, deviceProp.minor);
            }
            unsigned long long compute_perf  = (unsigned long long) deviceProp.multiProcessorCount * sm_per_multiproc * deviceProp.clockRate;
            if (compute_perf  > max_compute_perf)
            {
                // If we find GPU with SM major > 2, search only these
                if (best_SM_arch > 2)
                {
                    // If our device==dest_SM_arch, choose this, or else pass
                    if (deviceProp.major == best_SM_arch)
                    {
                        max_compute_perf  = compute_perf;
                        max_perf_device   = current_device;
                    }
                }
                else
                {
                    max_compute_perf  = compute_perf;
                    max_perf_device   = current_device;
                }
            }
        }
        ++current_device;
    }
    return max_perf_device;
}
// Initialization code to find the best CUDA Device
inline int findCudaDevice(int argc, const char **argv)
{
    cudaDeviceProp deviceProp;
    int devID = 0;
    // If the command-line has a device number specified, use it
    if (checkCmdLineFlag(argc, argv, "device"))
    {
        devID = getCmdLineArgumentInt(argc, argv, "device=");
        if (devID < 0)
        {
            printf("Invalid command line parameter\n ");
            exit(EXIT_FAILURE);
        }
        else
        {
            devID = gpuDeviceInit(devID);
            if (devID < 0)
            {
                printf("exiting...\n");
                exit(EXIT_FAILURE);
            }
        }
    }
    else
    {
        // Otherwise pick the device with highest Gflops/s
        devID = gpuGetMaxGflopsDeviceId();
        checkCudaErrors(cudaSetDevice(devID));
        checkCudaErrors(cudaGetDeviceProperties(&deviceProp, devID));
        printf("GPU Device %d: \"%s\" with compute capability %d.%d\n\n", devID, deviceProp.name, deviceProp.major, deviceProp.minor);
    }
    return devID;
}
// General check for CUDA GPU SM Capabilities
inline bool checkCudaCapabilities(int major_version, int minor_version)
{
    cudaDeviceProp deviceProp;
    deviceProp.major = 0;
    deviceProp.minor = 0;
    int dev;
    checkCudaErrors(cudaGetDevice(&dev));
    checkCudaErrors(cudaGetDeviceProperties(&deviceProp, dev));
    if ((deviceProp.major > major_version) ||
        (deviceProp.major == major_version && deviceProp.minor >= minor_version))
    {
        printf("  Device %d: <%16s >, Compute SM %d.%d detected\n", dev, deviceProp.name, deviceProp.major, deviceProp.minor);
        return true;
    }
    else
    {
        printf("  No GPU device was found that can support CUDA compute capability %d.%d.\n", major_version, minor_version);
        return false;
    }
}
#endif
// end of CUDA Helper Functions
#endif
goconv/inc/helper_string.h
New file
@@ -0,0 +1,526 @@
/**
 * Copyright 1993-2013 NVIDIA Corporation.  All rights reserved.
 *
 * Please refer to the NVIDIA end user license agreement (EULA) associated
 * with this source code for terms and conditions that govern your use of
 * this software. Any use, reproduction, disclosure, or distribution of
 * this software and related documentation outside the terms of the EULA
 * is strictly prohibited.
 *
 */
// These are helper functions for the SDK samples (string parsing, timers, etc)
#ifndef STRING_HELPER_H
#define STRING_HELPER_H
#include <stdio.h>
#include <stdlib.h>
#include <fstream>
#include <string>
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
#ifndef _CRT_SECURE_NO_DEPRECATE
#define _CRT_SECURE_NO_DEPRECATE
#endif
#ifndef STRCASECMP
#define STRCASECMP  _stricmp
#endif
#ifndef STRNCASECMP
#define STRNCASECMP _strnicmp
#endif
#ifndef STRCPY
#define STRCPY(sFilePath, nLength, sPath) strcpy_s(sFilePath, nLength, sPath)
#endif
#ifndef FOPEN
#define FOPEN(fHandle,filename,mode) fopen_s(&fHandle, filename, mode)
#endif
#ifndef FOPEN_FAIL
#define FOPEN_FAIL(result) (result != 0)
#endif
#ifndef SSCANF
#define SSCANF sscanf_s
#endif
#ifndef SPRINTF
#define SPRINTF sprintf_s
#endif
#else // Linux Includes
#include <string.h>
#include <strings.h>
#ifndef STRCASECMP
#define STRCASECMP  strcasecmp
#endif
#ifndef STRNCASECMP
#define STRNCASECMP strncasecmp
#endif
#ifndef STRCPY
#define STRCPY(sFilePath, nLength, sPath) strcpy(sFilePath, sPath)
#endif
#ifndef FOPEN
#define FOPEN(fHandle,filename,mode) (fHandle = fopen(filename, mode))
#endif
#ifndef FOPEN_FAIL
#define FOPEN_FAIL(result) (result == NULL)
#endif
#ifndef SSCANF
#define SSCANF sscanf
#endif
#ifndef SPRINTF
#define SPRINTF sprintf
#endif
#endif
#ifndef EXIT_WAIVED
#define EXIT_WAIVED 2
#endif
// CUDA Utility Helper Functions
inline int stringRemoveDelimiter(char delimiter, const char *string)
{
    int string_start = 0;
    while (string[string_start] == delimiter)
    {
        string_start++;
    }
    if (string_start >= (int)strlen(string)-1)
    {
        return 0;
    }
    return string_start;
}
inline int getFileExtension(char *filename, char **extension)
{
    int string_length = (int)strlen(filename);
    while (filename[string_length--] != '.')
    {
        if (string_length == 0)
            break;
    }
    if (string_length > 0) string_length += 2;
    if (string_length == 0)
        *extension = NULL;
    else
        *extension = &filename[string_length];
    return string_length;
}
inline bool checkCmdLineFlag(const int argc, const char **argv, const char *string_ref)
{
    bool bFound = false;
    if (argc >= 1)
    {
        for (int i=1; i < argc; i++)
        {
            int string_start = stringRemoveDelimiter('-', argv[i]);
            const char *string_argv = &argv[i][string_start];
            const char *equal_pos = strchr(string_argv, '=');
            int argv_length = (int)(equal_pos == 0 ? strlen(string_argv) : equal_pos - string_argv);
            int length = (int)strlen(string_ref);
            if (length == argv_length && !STRNCASECMP(string_argv, string_ref, length))
            {
                bFound = true;
                continue;
            }
        }
    }
    return bFound;
}
// This function wraps the CUDA Driver API into a template function
template <class T>
inline bool getCmdLineArgumentValue(const int argc, const char **argv, const char *string_ref, T *value)
{
    bool bFound = false;
    if (argc >= 1)
    {
        for (int i=1; i < argc; i++)
        {
            int string_start = stringRemoveDelimiter('-', argv[i]);
            const char *string_argv = &argv[i][string_start];
            int length = (int)strlen(string_ref);
            if (!STRNCASECMP(string_argv, string_ref, length))
            {
                if (length+1 <= (int)strlen(string_argv))
                {
                    int auto_inc = (string_argv[length] == '=') ? 1 : 0;
                    *value = (T)atoi(&string_argv[length + auto_inc]);
                }
                bFound = true;
                i=argc;
            }
        }
    }
    return bFound;
}
inline int getCmdLineArgumentInt(const int argc, const char **argv, const char *string_ref)
{
    bool bFound = false;
    int value = -1;
    if (argc >= 1)
    {
        for (int i=1; i < argc; i++)
        {
            int string_start = stringRemoveDelimiter('-', argv[i]);
            const char *string_argv = &argv[i][string_start];
            int length = (int)strlen(string_ref);
            if (!STRNCASECMP(string_argv, string_ref, length))
            {
                if (length+1 <= (int)strlen(string_argv))
                {
                    int auto_inc = (string_argv[length] == '=') ? 1 : 0;
                    value = atoi(&string_argv[length + auto_inc]);
                }
                else
                {
                    value = 0;
                }
                bFound = true;
                continue;
            }
        }
    }
    if (bFound)
    {
        return value;
    }
    else
    {
        return 0;
    }
}
inline float getCmdLineArgumentFloat(const int argc, const char **argv, const char *string_ref)
{
    bool bFound = false;
    float value = -1;
    if (argc >= 1)
    {
        for (int i=1; i < argc; i++)
        {
            int string_start = stringRemoveDelimiter('-', argv[i]);
            const char *string_argv = &argv[i][string_start];
            int length = (int)strlen(string_ref);
            if (!STRNCASECMP(string_argv, string_ref, length))
            {
                if (length+1 <= (int)strlen(string_argv))
                {
                    int auto_inc = (string_argv[length] == '=') ? 1 : 0;
                    value = (float)atof(&string_argv[length + auto_inc]);
                }
                else
                {
                    value = 0.f;
                }
                bFound = true;
                continue;
            }
        }
    }
    if (bFound)
    {
        return value;
    }
    else
    {
        return 0;
    }
}
inline bool getCmdLineArgumentString(const int argc, const char **argv,
                                     const char *string_ref, char **string_retval)
{
    bool bFound = false;
    if (argc >= 1)
    {
        for (int i=1; i < argc; i++)
        {
            int string_start = stringRemoveDelimiter('-', argv[i]);
            char *string_argv = (char *)&argv[i][string_start];
            int length = (int)strlen(string_ref);
            if (!STRNCASECMP(string_argv, string_ref, length))
            {
                *string_retval = &string_argv[length+1];
                bFound = true;
                continue;
            }
        }
    }
    if (!bFound)
    {
        *string_retval = NULL;
    }
    return bFound;
}
//////////////////////////////////////////////////////////////////////////////
//! Find the path for a file assuming that
//! files are found in the searchPath.
//!
//! @return the path if succeeded, otherwise 0
//! @param filename         name of the file
//! @param executable_path  optional absolute path of the executable
//////////////////////////////////////////////////////////////////////////////
inline char *sdkFindFilePath(const char *filename, const char *executable_path)
{
    // <executable_name> defines a variable that is replaced with the name of the executable
    // Typical relative search paths to locate needed companion files (e.g. sample input data, or JIT source files)
    // The origin for the relative search may be the .exe file, a .bat file launching an .exe, a browser .exe launching the .exe or .bat, etc
    const char *searchPath[] =
    {
        "./",                                       // same dir
        "./common/",                                // "/common/" subdir
        "./common/data/",                           // "/common/data/" subdir
        "./data/",                                  // "/data/" subdir
        "./src/",                                   // "/src/" subdir
        "./src/<executable_name>/data/",            // "/src/<executable_name>/data/" subdir
        "./inc/",                                   // "/inc/" subdir
        "./0_Simple/",                              // "/0_Simple/" subdir
        "./1_Utilities/",                           // "/1_Utilities/" subdir
        "./2_Graphics/",                            // "/2_Graphics/" subdir
        "./3_Imaging/",                             // "/3_Imaging/" subdir
        "./4_Finance/",                             // "/4_Finance/" subdir
        "./5_Simulations/",                         // "/5_Simulations/" subdir
        "./6_Advanced/",                            // "/6_Advanced/" subdir
        "./7_CUDALibraries/",                       // "/7_CUDALibraries/" subdir
        "./8_Android/",                             // "/8_Android/" subdir
        "./samples/",                               // "/samples/" subdir
        "./0_Simple/<executable_name>/data/",        // "/0_Simple/<executable_name>/data/" subdir
        "./1_Utilities/<executable_name>/data/",     // "/1_Utilities/<executable_name>/data/" subdir
        "./2_Graphics/<executable_name>/data/",      // "/2_Graphics/<executable_name>/data/" subdir
        "./3_Imaging/<executable_name>/data/",       // "/3_Imaging/<executable_name>/data/" subdir
        "./4_Finance/<executable_name>/data/",       // "/4_Finance/<executable_name>/data/" subdir
        "./5_Simulations/<executable_name>/data/",   // "/5_Simulations/<executable_name>/data/" subdir
        "./6_Advanced/<executable_name>/data/",      // "/6_Advanced/<executable_name>/data/" subdir
        "./7_CUDALibraries/<executable_name>/",      // "/7_CUDALibraries/<executable_name>/" subdir
        "./7_CUDALibraries/<executable_name>/data/", // "/7_CUDALibraries/<executable_name>/data/" subdir
        "../",                                      // up 1 in tree
        "../common/",                               // up 1 in tree, "/common/" subdir
        "../common/data/",                          // up 1 in tree, "/common/data/" subdir
        "../data/",                                 // up 1 in tree, "/data/" subdir
        "../src/",                                  // up 1 in tree, "/src/" subdir
        "../inc/",                                  // up 1 in tree, "/inc/" subdir
        "../0_Simple/<executable_name>/data/",       // up 1 in tree, "/0_Simple/<executable_name>/" subdir
        "../1_Utilities/<executable_name>/data/",    // up 1 in tree, "/1_Utilities/<executable_name>/" subdir
        "../2_Graphics/<executable_name>/data/",     // up 1 in tree, "/2_Graphics/<executable_name>/" subdir
        "../3_Imaging/<executable_name>/data/",      // up 1 in tree, "/3_Imaging/<executable_name>/" subdir
        "../4_Finance/<executable_name>/data/",      // up 1 in tree, "/4_Finance/<executable_name>/" subdir
        "../5_Simulations/<executable_name>/data/",  // up 1 in tree, "/5_Simulations/<executable_name>/" subdir
        "../6_Advanced/<executable_name>/data/",     // up 1 in tree, "/6_Advanced/<executable_name>/" subdir
        "../7_CUDALibraries/<executable_name>/data/",// up 1 in tree, "/7_CUDALibraries/<executable_name>/" subdir
        "../8_Android/<executable_name>/data/",      // up 1 in tree, "/8_Android/<executable_name>/" subdir
        "../samples/<executable_name>/data/",        // up 1 in tree, "/samples/<executable_name>/" subdir
        "../../",                                        // up 2 in tree
        "../../common/",                                 // up 2 in tree, "/common/" subdir
        "../../common/data/",                            // up 2 in tree, "/common/data/" subdir
        "../../data/",                                   // up 2 in tree, "/data/" subdir
        "../../src/",                                    // up 2 in tree, "/src/" subdir
        "../../inc/",                                    // up 2 in tree, "/inc/" subdir
        "../../sandbox/<executable_name>/data/",         // up 2 in tree, "/sandbox/<executable_name>/" subdir
        "../../0_Simple/<executable_name>/data/",        // up 2 in tree, "/0_Simple/<executable_name>/" subdir
        "../../1_Utilities/<executable_name>/data/",     // up 2 in tree, "/1_Utilities/<executable_name>/" subdir
        "../../2_Graphics/<executable_name>/data/",      // up 2 in tree, "/2_Graphics/<executable_name>/" subdir
        "../../3_Imaging/<executable_name>/data/",       // up 2 in tree, "/3_Imaging/<executable_name>/" subdir
        "../../4_Finance/<executable_name>/data/",       // up 2 in tree, "/4_Finance/<executable_name>/" subdir
        "../../5_Simulations/<executable_name>/data/",   // up 2 in tree, "/5_Simulations/<executable_name>/" subdir
        "../../6_Advanced/<executable_name>/data/",      // up 2 in tree, "/6_Advanced/<executable_name>/" subdir
        "../../7_CUDALibraries/<executable_name>/data/", // up 2 in tree, "/7_CUDALibraries/<executable_name>/" subdir
        "../../8_Android/<executable_name>/data/",       // up 2 in tree, "/8_Android/<executable_name>/" subdir
        "../../samples/<executable_name>/data/",         // up 2 in tree, "/samples/<executable_name>/" subdir
        "../../../",                                        // up 3 in tree
        "../../../src/<executable_name>/",                  // up 3 in tree, "/src/<executable_name>/" subdir
        "../../../src/<executable_name>/data/",             // up 3 in tree, "/src/<executable_name>/data/" subdir
        "../../../src/<executable_name>/src/",              // up 3 in tree, "/src/<executable_name>/src/" subdir
        "../../../src/<executable_name>/inc/",              // up 3 in tree, "/src/<executable_name>/inc/" subdir
        "../../../sandbox/<executable_name>/",              // up 3 in tree, "/sandbox/<executable_name>/" subdir
        "../../../sandbox/<executable_name>/data/",         // up 3 in tree, "/sandbox/<executable_name>/data/" subdir
        "../../../sandbox/<executable_name>/src/",          // up 3 in tree, "/sandbox/<executable_name>/src/" subdir
        "../../../sandbox/<executable_name>/inc/",          // up 3 in tree, "/sandbox/<executable_name>/inc/" subdir
        "../../../0_Simple/<executable_name>/data/",        // up 3 in tree, "/0_Simple/<executable_name>/" subdir
        "../../../1_Utilities/<executable_name>/data/",     // up 3 in tree, "/1_Utilities/<executable_name>/" subdir
        "../../../2_Graphics/<executable_name>/data/",      // up 3 in tree, "/2_Graphics/<executable_name>/" subdir
        "../../../3_Imaging/<executable_name>/data/",       // up 3 in tree, "/3_Imaging/<executable_name>/" subdir
        "../../../4_Finance/<executable_name>/data/",       // up 3 in tree, "/4_Finance/<executable_name>/" subdir
        "../../../5_Simulations/<executable_name>/data/",   // up 3 in tree, "/5_Simulations/<executable_name>/" subdir
        "../../../6_Advanced/<executable_name>/data/",      // up 3 in tree, "/6_Advanced/<executable_name>/" subdir
        "../../../7_CUDALibraries/<executable_name>/data/", // up 3 in tree, "/7_CUDALibraries/<executable_name>/" subdir
        "../../../8_Android/<executable_name>/data/",       // up 3 in tree, "/8_Android/<executable_name>/" subdir
        "../../../0_Simple/<executable_name>/",        // up 3 in tree, "/0_Simple/<executable_name>/" subdir
        "../../../1_Utilities/<executable_name>/",     // up 3 in tree, "/1_Utilities/<executable_name>/" subdir
        "../../../2_Graphics/<executable_name>/",      // up 3 in tree, "/2_Graphics/<executable_name>/" subdir
        "../../../3_Imaging/<executable_name>/",       // up 3 in tree, "/3_Imaging/<executable_name>/" subdir
        "../../../4_Finance/<executable_name>/",       // up 3 in tree, "/4_Finance/<executable_name>/" subdir
        "../../../5_Simulations/<executable_name>/",   // up 3 in tree, "/5_Simulations/<executable_name>/" subdir
        "../../../6_Advanced/<executable_name>/",      // up 3 in tree, "/6_Advanced/<executable_name>/" subdir
        "../../../7_CUDALibraries/<executable_name>/", // up 3 in tree, "/7_CUDALibraries/<executable_name>/" subdir
        "../../../8_Android/<executable_name>/",       // up 3 in tree, "/8_Android/<executable_name>/" subdir
        "../../../samples/<executable_name>/data/",         // up 3 in tree, "/samples/<executable_name>/" subdir
        "../../../common/",                                 // up 3 in tree, "../../../common/" subdir
        "../../../common/data/",                            // up 3 in tree, "../../../common/data/" subdir
        "../../../data/",                                   // up 3 in tree, "../../../data/" subdir
        "../../../../",                                // up 4 in tree
        "../../../../src/<executable_name>/",          // up 4 in tree, "/src/<executable_name>/" subdir
        "../../../../src/<executable_name>/data/",     // up 4 in tree, "/src/<executable_name>/data/" subdir
        "../../../../src/<executable_name>/src/",      // up 4 in tree, "/src/<executable_name>/src/" subdir
        "../../../../src/<executable_name>/inc/",      // up 4 in tree, "/src/<executable_name>/inc/" subdir
        "../../../../sandbox/<executable_name>/",      // up 4 in tree, "/sandbox/<executable_name>/" subdir
        "../../../../sandbox/<executable_name>/data/", // up 4 in tree, "/sandbox/<executable_name>/data/" subdir
        "../../../../sandbox/<executable_name>/src/",  // up 4 in tree, "/sandbox/<executable_name>/src/" subdir
        "../../../../sandbox/<executable_name>/inc/",   // up 4 in tree, "/sandbox/<executable_name>/inc/" subdir
        "../../../../0_Simple/<executable_name>/data/",     // up 4 in tree, "/0_Simple/<executable_name>/" subdir
        "../../../../1_Utilities/<executable_name>/data/",  // up 4 in tree, "/1_Utilities/<executable_name>/" subdir
        "../../../../2_Graphics/<executable_name>/data/",   // up 4 in tree, "/2_Graphics/<executable_name>/" subdir
        "../../../../3_Imaging/<executable_name>/data/",    // up 4 in tree, "/3_Imaging/<executable_name>/" subdir
        "../../../../4_Finance/<executable_name>/data/",    // up 4 in tree, "/4_Finance/<executable_name>/" subdir
        "../../../../5_Simulations/<executable_name>/data/",// up 4 in tree, "/5_Simulations/<executable_name>/" subdir
        "../../../../6_Advanced/<executable_name>/data/",   // up 4 in tree, "/6_Advanced/<executable_name>/" subdir
        "../../../../7_CUDALibraries/<executable_name>/data/", // up 4 in tree, "/7_CUDALibraries/<executable_name>/" subdir
        "../../../../8_Android/<executable_name>/data/",    // up 4 in tree, "/8_Android/<executable_name>/" subdir
        "../../../../0_Simple/<executable_name>/",     // up 4 in tree, "/0_Simple/<executable_name>/" subdir
        "../../../../1_Utilities/<executable_name>/",  // up 4 in tree, "/1_Utilities/<executable_name>/" subdir
        "../../../../2_Graphics/<executable_name>/",   // up 4 in tree, "/2_Graphics/<executable_name>/" subdir
        "../../../../3_Imaging/<executable_name>/",    // up 4 in tree, "/3_Imaging/<executable_name>/" subdir
        "../../../../4_Finance/<executable_name>/",    // up 4 in tree, "/4_Finance/<executable_name>/" subdir
        "../../../../5_Simulations/<executable_name>/",// up 4 in tree, "/5_Simulations/<executable_name>/" subdir
        "../../../../6_Advanced/<executable_name>/",   // up 4 in tree, "/6_Advanced/<executable_name>/" subdir
        "../../../../7_CUDALibraries/<executable_name>/", // up 4 in tree, "/7_CUDALibraries/<executable_name>/" subdir
        "../../../../8_Android/<executable_name>/",    // up 4 in tree, "/8_Android/<executable_name>/" subdir
        "../../../../samples/<executable_name>/data/",      // up 4 in tree, "/samples/<executable_name>/" subdir
        "../../../../common/",                              // up 4 in tree, "../../../common/" subdir
        "../../../../common/data/",                         // up 4 in tree, "../../../common/data/" subdir
        "../../../../data/",                                // up 4 in tree, "../../../data/" subdir
        "../../../../../",                                // up 5 in tree
        "../../../../../src/<executable_name>/",          // up 5 in tree, "/src/<executable_name>/" subdir
        "../../../../../src/<executable_name>/data/",     // up 5 in tree, "/src/<executable_name>/data/" subdir
        "../../../../../src/<executable_name>/src/",      // up 5 in tree, "/src/<executable_name>/src/" subdir
        "../../../../../src/<executable_name>/inc/",      // up 5 in tree, "/src/<executable_name>/inc/" subdir
        "../../../../../sandbox/<executable_name>/",      // up 5 in tree, "/sandbox/<executable_name>/" subdir
        "../../../../../sandbox/<executable_name>/data/", // up 5 in tree, "/sandbox/<executable_name>/data/" subdir
        "../../../../../sandbox/<executable_name>/src/",  // up 5 in tree, "/sandbox/<executable_name>/src/" subdir
        "../../../../../sandbox/<executable_name>/inc/",   // up 5 in tree, "/sandbox/<executable_name>/inc/" subdir
        "../../../../../0_Simple/<executable_name>/data/",     // up 5 in tree, "/0_Simple/<executable_name>/" subdir
        "../../../../../1_Utilities/<executable_name>/data/",  // up 5 in tree, "/1_Utilities/<executable_name>/" subdir
        "../../../../../2_Graphics/<executable_name>/data/",   // up 5 in tree, "/2_Graphics/<executable_name>/" subdir
        "../../../../../3_Imaging/<executable_name>/data/",    // up 5 in tree, "/3_Imaging/<executable_name>/" subdir
        "../../../../../4_Finance/<executable_name>/data/",    // up 5 in tree, "/4_Finance/<executable_name>/" subdir
        "../../../../../5_Simulations/<executable_name>/data/",// up 5 in tree, "/5_Simulations/<executable_name>/" subdir
        "../../../../../6_Advanced/<executable_name>/data/",   // up 5 in tree, "/6_Advanced/<executable_name>/" subdir
        "../../../../../7_CUDALibraries/<executable_name>/data/", // up 5 in tree, "/7_CUDALibraries/<executable_name>/" subdir
        "../../../../../8_Android/<executable_name>/data/",    // up 5 in tree, "/8_Android/<executable_name>/" subdir
        "../../../../../samples/<executable_name>/data/",      // up 5 in tree, "/samples/<executable_name>/" subdir
        "../../../../../common/",                         // up 5 in tree, "../../../common/" subdir
        "../../../../../common/data/",                    // up 5 in tree, "../../../common/data/" subdir
    };
    // Extract the executable name
    std::string executable_name;
    if (executable_path != 0)
    {
        executable_name = std::string(executable_path);
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
        // Windows path delimiter
        size_t delimiter_pos = executable_name.find_last_of('\\');
        executable_name.erase(0, delimiter_pos + 1);
        if (executable_name.rfind(".exe") != std::string::npos)
        {
            // we strip .exe, only if the .exe is found
            executable_name.resize(executable_name.size() - 4);
        }
#else
        // Linux & OSX path delimiter
        size_t delimiter_pos = executable_name.find_last_of('/');
        executable_name.erase(0,delimiter_pos+1);
#endif
    }
    // Loop over all search paths and return the first hit
    for (unsigned int i = 0; i < sizeof(searchPath)/sizeof(char *); ++i)
    {
        std::string path(searchPath[i]);
        size_t executable_name_pos = path.find("<executable_name>");
        // If there is executable_name variable in the searchPath
        // replace it with the value
        if (executable_name_pos != std::string::npos)
        {
            if (executable_path != 0)
            {
                path.replace(executable_name_pos, strlen("<executable_name>"), executable_name);
            }
            else
            {
                // Skip this path entry if no executable argument is given
                continue;
            }
        }
#ifdef _DEBUG
        printf("sdkFindFilePath <%s> in %s\n", filename, path.c_str());
#endif
        // Test if the file exists
        path.append(filename);
        FILE *fp;
        FOPEN(fp, path.c_str(), "rb");
        if (fp != NULL)
        {
            fclose(fp);
            // File found
            // returning an allocated array here for backwards compatibility reasons
            char *file_path = (char *) malloc(path.length() + 1);
            STRCPY(file_path, path.length() + 1, path.c_str());
            return file_path;
        }
        if (fp)
        {
            fclose(fp);
        }
    }
    // File not found
    return 0;
}
#endif
gohumantrack/gohumantrack.go
@@ -17,12 +17,15 @@
void *create_batch_image(const int size){
    c_img *imgs = (c_img*)malloc(size * sizeof(c_img));
    for(int i = 0; i < size; i++){
        imgs[i].data_ = NULL;
    }
    return imgs;
}
int fill_images(void *imgs, const int size, const int index, void *data, const int w, const int h, const int c){
    if(!imgs || !data || size <= index) return -1;
    c_img *images = (c_img*)imgs;
    images[index].data_ = data;
    images[index].data_ = (unsigned char*)data;
    images[index].w_ = w;
    images[index].h_ = h;
    images[index].c_ = c;
@@ -36,9 +39,9 @@
    }
    return ret;
}
void *process(void *handle, void *imgs, const int size){
void *process(void *handle, void *imgs, const int size, void *result){
    c_img *images = (c_img*)imgs;
    c_fgRet *res = init_fgres(size);
    c_fgRet *res = (c_fgRet *)result;
    int ret = c_human_tracker_process(handle, images, size, res);
    if (ret != 0)
        return NULL;
@@ -95,6 +98,7 @@
// HumanTracker struct
type HumanTracker struct {
    handle    unsafe.Pointer
    result       unsafe.Pointer
    batchSize int
}
@@ -105,7 +109,8 @@
    }
    p := C.c_human_tracker_create(C.int(gpu), C.int(batchSize), C.int(flag))
    if p != nil {
        return &HumanTracker{p, batchSize}
        res := C.init_fgres(C.int(batchSize))
        return &HumanTracker{p, res, batchSize}
    }
    return nil
}
@@ -114,6 +119,9 @@
func (h *HumanTracker) Free() {
    if h.handle != nil {
        C.c_human_tracker_release(&h.handle)
    }
    if h.result != nil {
        C.free(h.result)
    }
}
@@ -146,11 +154,10 @@
        }
    }
    cRet := C.process(h.handle, cImgs, C.int(h.batchSize))
    cRet := C.process(h.handle, cImgs, C.int(h.batchSize), h.result)
    if cRet == nil {
        return nil, errors.New("create C results error")
    }
    defer C.free(unsafe.Pointer(cRet))
    var result []FgResult
    p := uintptr(cRet)
main.go
@@ -6,7 +6,7 @@
    "time"
    "track/gohumantrack"
    "track/goconv"
    "basic.com/valib/goffmpeg.git"
)
@@ -19,43 +19,26 @@
    flag.StringVar(&url1, "cam1", "", "url")
    flag.StringVar(&url2, "cam2", "", "url")
}
func main() {
    flag.Parse()
    fmt.Println("url1: ", url1, " url2: ", url2)
    if len(url1) == 0 || len(url2) == 0 {
        fmt.Println("url null")
    }
    tracker := gohumantrack.NewHumanTracker(0, 2, 0)
    fmt.Println("version: ", tracker.GetVersion())
    goffmpeg.InitFFmpeg("./runtime/libcffmpeg.so")
    cam1 := goffmpeg.New(false, false)
    cam1.Run(url1)
    cam1.BuildDecoder()
    cam1.CloseStream()
    cam2 := goffmpeg.New(false, false)
    cam2.Run(url2)
    cam2.BuildDecoder()
    cam2.CloseStream()
    for {
func run(cam1, cam2 *goffmpeg.GoFFMPEG, tracker *gohumantrack.HumanTracker) bool {
        data1, ow1, oh1, _ := cam1.GetYUV()
        data2, ow2, oh2, _ := cam2.GetYUV()
        if ow1 > 0 && oh1 > 0 && ow2 > 0 && oh2 > 0 {
        bgr1 := goconv.YUV2BGR(data1, ow1, oh1)
        bgr2 := goconv.YUV2BGR(data2, ow2, oh2)
        if bgr1 == nil || bgr2 == nil{
            return false
        }
            var images []gohumantrack.ImageHumanTracker
            img := gohumantrack.ImageHumanTracker{
                Data:    data1,
            Data:    bgr1,
                Width:   ow1,
                Height:  oh1,
                Channel: 3,
            }
            images = append(images, img)
            img = gohumantrack.ImageHumanTracker{
                Data:    data2,
            Data:    bgr2,
                Width:   ow2,
                Height:  oh2,
                Channel: 3,
@@ -79,8 +62,35 @@
            } else {
                fmt.Println("process error: ", err)
            }
        } else {
            //            fmt.Println("cam1 size: ", ow1, "x", oh1, " cam2 size: ", ow2, "x", oh2)
        return true
    }
    return false
}
func main() {
    flag.Parse()
    fmt.Println("url1: ", url1, " url2: ", url2)
    if len(url1) == 0 || len(url2) == 0 {
        fmt.Println("url null")
    }
    tracker := gohumantrack.NewHumanTracker(0, 2, 0)
    fmt.Println("version: ", tracker.GetVersion())
    goffmpeg.InitFFmpeg("./runtime/libcffmpeg.so")
    cam1 := goffmpeg.New(false, false)
    cam1.Run(url1)
    cam1.BuildDecoder()
    cam1.CloseStream()
    cam2 := goffmpeg.New(false, false)
    cam2.Run(url2)
    cam2.BuildDecoder()
    cam2.CloseStream()
    for {
        if !run(cam1, cam2, tracker){
            time.Sleep(38 * time.Millisecond)
        }
runtime/libcffmpeg.so
Binary files differ