From 6d6e6d425c4fe63a487ff27be9341671b2c1dd93 Mon Sep 17 00:00:00 2001
From: stydb <suntianyu0923@gmail.com>
Date: 星期二, 03 十二月 2019 11:12:36 +0800
Subject: [PATCH] update
---
goconv/inc/helper_string.h | 526 ++++++++++
goconv/goconv.go | 258 +++++
.gitignore | 1
go.sum | 22
goconv/inc/helper_cuda.h | 1261 +++++++++++++++++++++++++
goconv/inc/Exceptions.h | 181 +++
go.mod | 8
goconv/conv.cpp | 592 +++++++++++
main.go | 90 +
gohumantrack/gohumantrack.go | 19
runtime/libcffmpeg.so | 0
goconv/conv.h | 23
12 files changed, 2,932 insertions(+), 49 deletions(-)
diff --git a/.gitignore b/.gitignore
index 553d97f..3dee989 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,3 @@
*.avi
track
+FPN_REID*
diff --git a/go.mod b/go.mod
index 5af2311..2a00f22 100644
--- a/go.mod
+++ b/go.mod
@@ -2,4 +2,10 @@
go 1.12
-require basic.com/valib/goffmpeg.git v0.0.0-20191129092141-7363d175bd62
+require (
+ basic.com/valib/godraw.git v0.0.0-20191122082247-26e9987cd183
+ basic.com/valib/goffmpeg.git v0.0.0-20191203025021-783b80757681
+ basic.com/valib/gogpu.git v0.0.0-20190711044327-62043b070865
+ github.com/disintegration/imaging v1.6.2
+ github.com/llgcode/draw2d v0.0.0-20190810100245-79e59b6b8fbc // indirect
+)
diff --git a/go.sum b/go.sum
index 4bb58e5..f28b48d 100644
--- a/go.sum
+++ b/go.sum
@@ -1,2 +1,20 @@
-basic.com/valib/goffmpeg.git v0.0.0-20191129092141-7363d175bd62 h1:KzkPzJE76RkHeYBgAMfSiz1vzJaQRKkRDCXnw2XmxqA=
-basic.com/valib/goffmpeg.git v0.0.0-20191129092141-7363d175bd62/go.mod h1:1x75Hawh/BjgPsQtuJ24px89gzk3uAslD8e0Xs6Z7GQ=
+basic.com/valib/godraw.git v0.0.0-20191122082247-26e9987cd183 h1:QQ1L0Ev4vcSD23d99+rW5S/mnmdTAPAI2GZ7tkMgCE4=
+basic.com/valib/godraw.git v0.0.0-20191122082247-26e9987cd183/go.mod h1:LntbWczdG87utrKx7rWYmIh1VZ+X2oPN7Q2IXb6oRE0=
+basic.com/valib/goffmpeg.git v0.0.0-20191203025021-783b80757681 h1:n5zinCkvVghdKw0ZenxMo+lFjaXhHSr9ecICuQZLjNw=
+basic.com/valib/goffmpeg.git v0.0.0-20191203025021-783b80757681/go.mod h1:1x75Hawh/BjgPsQtuJ24px89gzk3uAslD8e0Xs6Z7GQ=
+basic.com/valib/gogpu.git v0.0.0-20190711044327-62043b070865 h1:3XvkNdRlJDXV45ie8U0uGA9ImJZtyTT0C/h+4Rizv0Y=
+basic.com/valib/gogpu.git v0.0.0-20190711044327-62043b070865/go.mod h1:yxux5RP4A6a591vWljXxGlHdERVVyWDD3TwwQjuyogw=
+github.com/disintegration/imaging v1.6.2 h1:w1LecBlG2Lnp8B3jk5zSuNqd7b4DXhcjwek1ei82L+c=
+github.com/disintegration/imaging v1.6.2/go.mod h1:44/5580QXChDfwIclfc/PCwrr44amcmDAg8hxG0Ewe4=
+github.com/go-gl/gl v0.0.0-20180407155706-68e253793080/go.mod h1:482civXOzJJCPzJ4ZOX/pwvXBWSnzD4OKMdH4ClKGbk=
+github.com/go-gl/glfw v0.0.0-20180426074136-46a8d530c326/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU=
+github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0 h1:DACJavvAHhabrF08vX0COfcOBJRhZ8lUbR+ZWIs0Y5g=
+github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k=
+github.com/jung-kurt/gofpdf v1.0.0/go.mod h1:7Id9E/uU8ce6rXgefFLlgrJj/GYY22cpxn+r32jIOes=
+github.com/llgcode/draw2d v0.0.0-20190810100245-79e59b6b8fbc h1:ZvNhCJfRtl03A0VOIfvO9W22/0b6dmn1APa4Q6j9oHM=
+github.com/llgcode/draw2d v0.0.0-20190810100245-79e59b6b8fbc/go.mod h1:mVa0dA29Db2S4LVqDYLlsePDzRJLDfdhVZiI15uY0FA=
+github.com/llgcode/ps v0.0.0-20150911083025-f1443b32eedb/go.mod h1:1l8ky+Ew27CMX29uG+a2hNOKpeNYEQjjtiALiBlFQbY=
+golang.org/x/image v0.0.0-20180708004352-c73c2afc3b81/go.mod h1:ux5Hcp/YLpHSI86hEcLt0YII63i6oz57MZXIpbrjZUs=
+golang.org/x/image v0.0.0-20191009234506-e7c1f5e7dbb8 h1:hVwzHzIUGRjiF7EcUjqNxk3NCfkPxbDKRdnNE1Rpg0U=
+golang.org/x/image v0.0.0-20191009234506-e7c1f5e7dbb8/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
+golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
diff --git a/goconv/conv.cpp b/goconv/conv.cpp
new file mode 100644
index 0000000..098a396
--- /dev/null
+++ b/goconv/conv.cpp
@@ -0,0 +1,592 @@
+#include "conv.h"
+
+#include <cmath>
+#include <sys/time.h>
+
+#include <npp.h>
+#include <helper_cuda.h>
+#include <helper_string.h>
+#include "Exceptions.h"
+
+
+static const int MEMORY_ALGN_DEVICE = 511;
+static const int HD_MEMORY_ALGN_DEVICE = 511;
+
+static inline int DivUp(int x, int d)
+{
+ return (x + d - 1) / d;
+}
+
+static int set_data(uint8_t *data, const int width, const int height, unsigned char *mY, unsigned char *mU, unsigned char *mV)
+{
+ uint8_t* yuv_data = (uint8_t*)data;
+ if (!yuv_data)
+ {
+ return -1;
+ }
+
+ uint32_t i, j;
+ uint32_t off;
+ uint32_t off_yuv;
+ uint32_t half_h;
+ uint32_t half_w;
+ uint32_t u_size;
+ uint8_t* yuv_ptr;
+ uint8_t* u_ptr;
+ uint8_t* v_ptr;
+
+ int w = width;
+ int h = height;
+
+ //浠庤繖涓�鍙ユ潵鐪嬶紝鍗充娇鏄悓涓�绉嶆牸寮忥紝杩涙潵涔熻澶勭悊涓�涓嬨��
+ size_t nPitch = (w + HD_MEMORY_ALGN_DEVICE) & ~HD_MEMORY_ALGN_DEVICE;
+ off = 0;
+ off_yuv = 0;
+ for (i = 0; i < (uint32_t)h; i++)
+ {
+ memcpy(mY + off, yuv_data + off_yuv, w);
+ off += nPitch;
+ off_yuv += w;
+ }
+
+ half_w = w >> 1;
+ half_h = h >> 1;
+ u_size = half_w * half_h;
+ nPitch = (half_w + HD_MEMORY_ALGN_DEVICE) & ~HD_MEMORY_ALGN_DEVICE;
+
+ off_yuv = w * h;
+ off = 0;
+ for (i = 0; i < half_h; i++)
+ {
+ yuv_ptr = yuv_data + off_yuv;
+ u_ptr = mU + off;
+ v_ptr = mV + off;
+ for (j = 0; j < (uint32_t)w; j += 2)
+ {
+ *u_ptr++ = *yuv_ptr++;
+ *v_ptr++ = *yuv_ptr++;
+ }
+ off_yuv += w;
+ off += nPitch;
+ }
+
+ return 0;
+}
+
+/////////////handle
+class convertor{
+public:
+ convertor(const int srcW, const int srcH, const int dstW, const int dstH, const int gpu);
+ ~convertor();
+ int yuv2bgr(unsigned char **bgr, int *bgrLen);
+ int resize2bgr(unsigned char *in, unsigned char **data, int *data_len);
+ int resizeyuv(unsigned char *in, unsigned char **data, int *data_len);
+ int fill_yuv(const unsigned char *yuv);
+private:
+ void init_yuv();
+ void init_resize();
+ void init_resize_bgr();
+ void init_resize_yuv();
+private:
+ int width;
+ int height;
+
+ unsigned char aSamplingFactors[3];
+ int nMCUBlocksH;
+ int nMCUBlocksV;
+
+ Npp8u *apSrcImage[3];
+ NppiSize aSrcSize[3];
+ Npp32s aSrcImageStep[3];
+ size_t aSrcPitch[3];
+
+ uint8_t *mY;
+ uint8_t *mU;
+ uint8_t *mV;
+
+///////////////////////////
+ int rWidth;
+ int rHeight;
+ float fx;
+ float fy;
+
+ Npp8u *apDstImage[3] = {0,0,0};
+ Npp32s aDstImageStep[3];
+ NppiSize aDstSize[3];
+
+/////////////////////////////
+ Npp8u *imgOrigin;
+ size_t pitchOrigin;
+ NppiSize sizeOrigin;
+
+ unsigned char *bgrOrigin;
+ int bgrOriginLen;
+ size_t bgrOriginPitch;
+
+////////////////////////////
+ Npp8u *imgResize;
+ size_t pitchResize;
+ NppiSize sizeResize;
+
+ unsigned char *bgrScale;
+ int bgrScaleLen;
+ size_t bgrScalePitch;
+
+// resize only
+////////////////////////////
+ Npp8u *originBGR;
+ int pitchOriginBGR;
+ Npp8u *resizedBGR;
+ int pitchResizedBGR;
+ unsigned char *hostResizedBGR;
+
+///////////////////////////
+ unsigned char *nv12;
+
+ bool initialized_yuv, initialized_resize, initialized_resize_bgr, initialized_resize_yuv;
+ int gpu_index;
+};
+
+
+convertor::convertor(const int srcW, const int srcH, const int dstW, const int dstH, const int gpu)
+:width(srcW)
+,height(srcH)
+,rWidth(dstW)
+,rHeight(dstH)
+,fx(-1)
+,fy(-1)
+,mY(NULL)
+,mU(NULL)
+,mV(NULL)
+,imgOrigin(0)
+,imgResize(0)
+,bgrOrigin(NULL)
+,bgrOriginLen(0)
+,bgrScale(NULL)
+,bgrScaleLen(0)
+,originBGR(0)
+,pitchOriginBGR(0)
+,resizedBGR(0)
+,pitchResizedBGR(0)
+,hostResizedBGR(NULL)
+,nv12(NULL)
+,initialized_yuv(false)
+,initialized_resize(false)
+,initialized_resize_bgr(false)
+,initialized_resize_yuv(false)
+,gpu_index(gpu)
+{}
+
+static void setGPUDevice(const int gpu){
+ if (gpu >= 0){
+ cudaSetDevice(gpu);
+ }
+}
+
+void convertor::init_yuv(){
+ if (initialized_yuv) return;
+ initialized_yuv = true;
+
+ setGPUDevice(gpu_index);
+
+ for(int i = 0; i < 3; i++){
+ apSrcImage[i] = 0;
+ apDstImage[i] = 0;
+ }
+
+ aSamplingFactors[0] = 34;
+ aSamplingFactors[1] = 17;
+ aSamplingFactors[2] = 17;
+
+ nMCUBlocksH = 0;
+ nMCUBlocksV = 0;
+
+ for (int i = 0; i < 3; ++i)
+ {
+ nMCUBlocksV = std::max(nMCUBlocksV, aSamplingFactors[i] & 0x0f);
+ nMCUBlocksH = std::max(nMCUBlocksH, aSamplingFactors[i] >> 4);
+ }
+
+ for (int i = 0; i < 3; ++i)
+ {
+ NppiSize oBlocks;
+ NppiSize oBlocksPerMCU = { aSamplingFactors[i] >> 4, aSamplingFactors[i] & 0x0f };
+
+ oBlocks.width = (int)ceil((width + 7) / 8 *
+ static_cast<float>(oBlocksPerMCU.width) / nMCUBlocksH);
+ oBlocks.width = DivUp(oBlocks.width, oBlocksPerMCU.width) * oBlocksPerMCU.width;
+
+ oBlocks.height = (int)ceil((height + 7) / 8 *
+ static_cast<float>(oBlocksPerMCU.height) / nMCUBlocksV);
+ oBlocks.height = DivUp(oBlocks.height, oBlocksPerMCU.height) * oBlocksPerMCU.height;
+
+ aSrcSize[i].width = oBlocks.width * 8;
+ aSrcSize[i].height = oBlocks.height * 8;
+
+ // Allocate Memory
+ size_t nPitch;
+ NPP_CHECK_CUDA(cudaMallocPitch((void**)&(apSrcImage[i]), &nPitch, aSrcSize[i].width, aSrcSize[i].height));
+ aSrcPitch[i] = nPitch;
+ aSrcImageStep[i] = static_cast<Npp32s>(nPitch);
+ }
+
+ NPP_CHECK_CUDA(cudaMallocPitch((void**)&imgOrigin, &pitchOrigin, width * 3, height));
+
+ bgrOriginPitch = width * 3;
+ bgrOriginLen = bgrOriginPitch * height;
+ NPP_CHECK_CUDA(cudaHostAlloc((void**)&bgrOrigin, bgrOriginLen, cudaHostAllocDefault));
+
+ sizeOrigin.width = width;
+ sizeOrigin.height = height;
+
+ uint32_t nPitch = (width + MEMORY_ALGN_DEVICE) & ~MEMORY_ALGN_DEVICE;
+ NPP_CHECK_CUDA(cudaHostAlloc((void**)&mY, nPitch * height, cudaHostAllocDefault));
+ nPitch = (width/2 + MEMORY_ALGN_DEVICE) & ~MEMORY_ALGN_DEVICE;
+ NPP_CHECK_CUDA(cudaHostAlloc((void**)&mU, nPitch * height / 2, cudaHostAllocDefault));
+ NPP_CHECK_CUDA(cudaHostAlloc((void**)&mV, nPitch * height / 2, cudaHostAllocDefault));
+
+}
+
+void convertor::init_resize(){
+ if (initialized_resize) return;
+ initialized_resize = true;
+
+ setGPUDevice(gpu_index);
+
+ NppiSize oDstImageSize;
+ oDstImageSize.width = std::max(1, rWidth);
+ oDstImageSize.height = std::max(1, rHeight);
+
+ sizeResize.width = oDstImageSize.width;
+ sizeResize.height = oDstImageSize.height;
+
+ for (int i=0; i < 3; ++i)
+ {
+ NppiSize oBlocks;
+ NppiSize oBlocksPerMCU = { aSamplingFactors[i] & 0x0f, aSamplingFactors[i] >> 4};
+
+ oBlocks.width = (int)ceil((oDstImageSize.width + 7)/8 *
+ static_cast<float>(oBlocksPerMCU.width)/nMCUBlocksH);
+ oBlocks.width = DivUp(oBlocks.width, oBlocksPerMCU.width) * oBlocksPerMCU.width;
+
+ oBlocks.height = (int)ceil((oDstImageSize.height+7)/8 *
+ static_cast<float>(oBlocksPerMCU.height)/nMCUBlocksV);
+ oBlocks.height = DivUp(oBlocks.height, oBlocksPerMCU.height) * oBlocksPerMCU.height;
+
+ aDstSize[i].width = oBlocks.width * 8;
+ aDstSize[i].height = oBlocks.height * 8;
+
+ // Allocate Memory
+ size_t nPitch;
+ NPP_CHECK_CUDA(cudaMallocPitch((void**)&apDstImage[i], &nPitch, aDstSize[i].width, aDstSize[i].height));
+ aDstImageStep[i] = static_cast<Npp32s>(nPitch);
+ }
+
+ if (rWidth > 0 && rHeight > 0 && rWidth < width && rHeight < height){
+ fx = (float)(rWidth) / (float)(width);
+ fy = (float)(rHeight) / (float)(height);
+ }
+
+ if (imgResize == 0){
+ if (rWidth > 0 && rHeight > 0 && rWidth < width && rHeight < height){
+ NPP_CHECK_CUDA(cudaMallocPitch((void**)&imgResize, &pitchResize, rWidth * 3, rHeight));
+ }
+ }
+ if (!bgrScale){
+ if (rWidth > 0 && rHeight > 0 && rWidth < width && rHeight < height){
+ bgrScalePitch = rWidth * 3;
+ bgrScaleLen = bgrScalePitch * rHeight;
+ NPP_CHECK_CUDA(cudaHostAlloc((void**)&bgrScale, bgrScaleLen, cudaHostAllocDefault));
+ }
+ }
+}
+
+void convertor::init_resize_bgr(){
+ if (initialized_resize_bgr) return;
+ initialized_resize_bgr = true;
+
+ setGPUDevice(gpu_index);
+ if (originBGR == 0){
+ originBGR = nppiMalloc_8u_C3(width, height, &pitchOriginBGR);
+ }
+ if (resizedBGR == 0){
+ resizedBGR = nppiMalloc_8u_C3(rWidth, rHeight, &pitchResizedBGR);
+ }
+ if (hostResizedBGR == NULL){
+ NPP_CHECK_CUDA(cudaHostAlloc((void**)&hostResizedBGR, rWidth * 3 * rHeight, cudaHostAllocDefault));
+ }
+}
+
+void convertor::init_resize_yuv(){
+ if (initialized_resize_yuv) return;
+ initialized_resize_yuv = true;
+
+ if (rWidth > 0 && rHeight > 0){
+ fx = (float)(width) / (float)(rWidth);
+ fy = (float)(height) / (float)(rHeight);
+ }
+
+ nv12 = (unsigned char*)malloc(rWidth*rHeight*3/2);
+}
+
+convertor::~convertor(){
+ setGPUDevice(gpu_index);
+
+ if(mY) cudaFreeHost(mY);
+ if(mU) cudaFreeHost(mU);
+ if(mV) cudaFreeHost(mV);
+
+ for (int i = 0; i < 3; ++i)//鍐呭瓨閲婃斁
+ {
+ cudaFree(apSrcImage[i]);
+ cudaFree(apDstImage[i]);
+ }
+
+ if (imgOrigin) cudaFree(imgOrigin);
+ if (imgResize) cudaFree(imgResize);
+
+ if (bgrOrigin) cudaFreeHost(bgrOrigin);
+ if (bgrScale) cudaFreeHost(bgrScale);
+
+ if (originBGR) nppiFree(originBGR);
+ if (resizedBGR) nppiFree(resizedBGR);
+ if (hostResizedBGR) cudaFreeHost(hostResizedBGR);
+
+ if (nv12) free(nv12);
+}
+
+int convertor::fill_yuv(const unsigned char *yuv){
+ init_yuv();
+ int ret = set_data((uint8_t*)yuv, width, height, mY, mU, mV);
+ if (ret < 0) return ret;
+
+ setGPUDevice(gpu_index);
+
+ NPP_CHECK_CUDA(cudaMemcpy(apSrcImage[0], mY, aSrcPitch[0] * height, cudaMemcpyHostToDevice));
+ NPP_CHECK_CUDA(cudaMemcpy(apSrcImage[1], mU, aSrcPitch[1] * height / 2, cudaMemcpyHostToDevice));
+ NPP_CHECK_CUDA(cudaMemcpy(apSrcImage[2], mV, aSrcPitch[2] * height / 2, cudaMemcpyHostToDevice));
+ return 0;
+}
+
+int convertor::yuv2bgr(unsigned char **bgr, int *bgrLen){
+
+ *bgr = NULL;
+ *bgrLen = 0;
+
+ setGPUDevice(gpu_index);
+
+ NPP_CHECK_NPP(nppiYUV420ToBGR_8u_P3C3R(apSrcImage, aSrcImageStep, imgOrigin, pitchOrigin, sizeOrigin));
+
+ NPP_CHECK_CUDA(cudaMemcpy2D(bgrOrigin, bgrOriginPitch, imgOrigin, pitchOrigin, bgrOriginPitch, height, cudaMemcpyDeviceToHost));
+ *bgr = bgrOrigin;
+ *bgrLen = bgrOriginLen;
+
+ return 0;
+}
+
+int convertor::resize2bgr(unsigned char *in, unsigned char **data, int *data_len){
+ *data = NULL;
+ *data_len = 0;
+
+ if ((rWidth < 0 && rHeight < 0) || (rWidth > width && rHeight > height)){
+ return -1;
+ }
+
+ setGPUDevice(gpu_index);
+
+ if (!in){
+
+ init_resize();
+
+ NppiSize oDstImageSize;
+ oDstImageSize.width = std::max(1, rWidth);
+ oDstImageSize.height = std::max(1, rHeight);
+ for (int i = 0; i < 3; ++i)
+ {
+ NppiSize oBlocksPerMCU = { aSamplingFactors[i] & 0x0f, aSamplingFactors[i] >> 4};
+ NppiSize oSrcImageSize = {(width * oBlocksPerMCU.width) / nMCUBlocksH, (height * oBlocksPerMCU.height)/nMCUBlocksV};
+ NppiRect oSrcImageROI = {0,0,oSrcImageSize.width, oSrcImageSize.height};
+ NppiRect oDstImageROI;
+ NppiInterpolationMode eInterploationMode = NPPI_INTER_SUPER;
+ NPP_CHECK_NPP(nppiGetResizeRect(oSrcImageROI, &oDstImageROI,
+ fx,
+ fy,
+ 0.0, 0.0, eInterploationMode));
+ NPP_CHECK_NPP(nppiResizeSqrPixel_8u_C1R(apSrcImage[i], oSrcImageSize, aSrcImageStep[i], oSrcImageROI,
+ apDstImage[i], aDstImageStep[i], oDstImageROI ,
+ fx,
+ fy,
+ 0.0, 0.0, eInterploationMode));
+ }
+ NPP_CHECK_NPP(nppiYUV420ToBGR_8u_P3C3R(apDstImage, aDstImageStep, imgResize, pitchResize, sizeResize));
+ NPP_CHECK_CUDA(cudaMemcpy2D(bgrScale, bgrScalePitch, imgResize, pitchResize, bgrScalePitch, rHeight, cudaMemcpyDeviceToHost));
+ *data = bgrScale;
+ *data_len = bgrScaleLen;
+ }else{
+
+ init_resize_bgr();
+
+ NppiSize oSrcSize;
+ oSrcSize.width = width;
+ oSrcSize.height = height;
+
+ NPP_CHECK_CUDA(cudaMemcpy2D(originBGR, pitchOriginBGR, in, width*3, width*3, height, cudaMemcpyHostToDevice));
+
+ NppiRect oSrcROI;
+ oSrcROI.x = 0;
+ oSrcROI.y = 0;
+ oSrcROI.width = width;
+ oSrcROI.height = height;
+
+
+ NppiRect oDstROI;
+ oDstROI.x = 0;
+ oDstROI.y = 0;
+ oDstROI.width = rWidth;
+ oDstROI.height = rHeight;
+
+ // Scale Factor
+ double nXFactor = double(oDstROI.width) / double(oSrcROI.width);
+ double nYFactor = double(oDstROI.height) / double(oSrcROI.height);
+
+ // Scaled X/Y Shift
+ double nXShift = - oSrcROI.x * nXFactor ;
+ double nYShift = - oSrcROI.y * nYFactor;
+ int eInterpolation = NPPI_INTER_SUPER;
+ if (nXFactor >= 1.f || nYFactor >= 1.f)
+ eInterpolation = NPPI_INTER_LANCZOS;
+
+ NppStatus ret = nppiResizeSqrPixel_8u_C3R(originBGR, oSrcSize, pitchOriginBGR, oSrcROI,
+ resizedBGR, pitchResizedBGR, oDstROI, nXFactor, nYFactor, nXShift, nYShift, eInterpolation );
+
+ if(ret != NPP_SUCCESS) {
+ printf("imageResize_8u_C3R failed %d.\n", ret);
+ return -2;
+ }
+ size_t pitch = rWidth * 3;
+ *data_len = pitch * rHeight;
+ NPP_CHECK_CUDA(cudaMemcpy2D(hostResizedBGR, pitch, resizedBGR, pitchResizedBGR, pitch, rHeight, cudaMemcpyDeviceToHost));
+ *data = hostResizedBGR;
+ }
+ return 0;
+}
+
+static int nv12_nearest_scale(uint8_t* __restrict src, uint8_t* __restrict dst,
+ int srcWidth, int srcHeight, int dstWidth, int dstHeight)
+{
+ register int sw = srcWidth; //register keyword is for local var to accelorate
+ register int sh = srcHeight;
+ register int dw = dstWidth;
+ register int dh = dstHeight;
+ register int y, x;
+ unsigned long int srcy, srcx, src_index, dst_index;
+ unsigned long int xrIntFloat_16 = (sw << 16) / dw + 1; //better than float division
+ unsigned long int yrIntFloat_16 = (sh << 16) / dh + 1;
+
+ uint8_t* dst_uv = dst + dh * dw; //memory start pointer of dest uv
+ uint8_t* src_uv = src + sh * sw; //memory start pointer of source uv
+ uint8_t* dst_uv_yScanline;
+ uint8_t* src_uv_yScanline;
+ uint8_t* dst_y_slice = dst; //memory start pointer of dest y
+ uint8_t* src_y_slice;
+ uint8_t* sp;
+ uint8_t* dp;
+
+ for (y = 0; y < (dh & ~7); ++y) //'dh & ~7' is to generate faster assembly code
+ {
+ srcy = (y * yrIntFloat_16) >> 16;
+ src_y_slice = src + srcy * sw;
+
+ if((y & 1) == 0)
+ {
+ dst_uv_yScanline = dst_uv + (y / 2) * dw;
+ src_uv_yScanline = src_uv + (srcy / 2) * sw;
+ }
+
+ for(x = 0; x < (dw & ~7); ++x)
+ {
+ srcx = (x * xrIntFloat_16) >> 16;
+ dst_y_slice[x] = src_y_slice[srcx];
+
+ if((y & 1) == 0) //y is even
+ {
+ if((x & 1) == 0) //x is even
+ {
+ src_index = (srcx / 2) * 2;
+
+ sp = dst_uv_yScanline + x;
+ dp = src_uv_yScanline + src_index;
+ *sp = *dp;
+ ++sp;
+ ++dp;
+ *sp = *dp;
+ }
+ }
+ }
+ dst_y_slice += dw;
+ }
+ return 0;
+}
+
+int convertor::resizeyuv(unsigned char *in, unsigned char **data, int *data_len){
+
+ init_resize_yuv();
+
+ *data_len = rWidth*rHeight*3/2;
+ *data = nv12;
+
+ return nv12_nearest_scale(in, nv12, width, height, rWidth, rHeight);
+}
+
+convHandle conv_create(const int srcW, const int srcH, const int dstW, const int dstH, const int gpu){
+ if (gpu < 0) return NULL;
+
+ convertor *conv = new convertor(srcW, srcH, dstW, dstH, gpu);
+ return conv;
+}
+
+void conv_destroy(convHandle h){
+ if (!h) return;
+ convertor *conv = (convertor*)h;
+ delete conv;
+}
+
+int yuv2bgrandresize(convHandle h, void *yuv, unsigned char **bgr, int *bgrLen, unsigned char **scaleBGR, int *scaleBGRLen){
+ if (!h) return -2;
+ convertor *conv = (convertor*)h;
+ int ret = conv->fill_yuv((unsigned char*)yuv);
+ if (ret != 0) return ret;
+ ret = conv->yuv2bgr(bgr, bgrLen);
+ if (ret != 0) return ret;
+ ret = conv->resize2bgr(NULL, scaleBGR, scaleBGRLen);
+ return ret;
+}
+
+int yuv2bgr(convHandle h, void *yuv, unsigned char **bgr, int *bgrLen){
+ if (!h) return -2;
+ convertor *conv = (convertor*)h;
+ int ret = conv->fill_yuv((unsigned char*)yuv);
+ if (ret != 0) return ret;
+ return conv->yuv2bgr(bgr, bgrLen);
+}
+
+int yuv2resizedbgr(convHandle h, void *yuv, unsigned char **bgr, int *bgrLen){
+ if (!h) return -2;
+ convertor *conv = (convertor*)h;
+ int ret = conv->fill_yuv((unsigned char*)yuv);
+ if (ret != 0) return ret;
+ ret = conv->resize2bgr(NULL, bgr, bgrLen);
+ return ret;
+}
+
+int resizebgr(convHandle h, void *data, unsigned char **resized, int *len){
+ if (!h) return -2;
+ convertor *conv = (convertor*)h;
+ return conv->resize2bgr((unsigned char*)data, resized, len);
+}
+
+int resizeyuv(convHandle h, void *data, unsigned char **resized, int *len){
+ if (!h) return -2;
+ convertor *conv = (convertor*)h;
+ return conv->resizeyuv((unsigned char*)data, resized, len);
+}
diff --git a/goconv/conv.h b/goconv/conv.h
new file mode 100644
index 0000000..a55d9a8
--- /dev/null
+++ b/goconv/conv.h
@@ -0,0 +1,23 @@
+#ifndef __RESIZE_NPP_H__
+#define __RESIZE_NPP_H__
+
+#ifdef __cplusplus
+extern "C"{
+#endif
+
+typedef void* convHandle;
+convHandle conv_create(const int srcW, const int srcH, const int dstW, const int dstH, const int gpu);
+
+void conv_destroy(convHandle h);
+int yuv2bgrandresize(convHandle h, void *yuv, unsigned char **bgr, int *bgrLen, unsigned char **scaleBGR, int *scaleBGRLen);
+int yuv2bgr(convHandle h, void *yuv, unsigned char **bgr, int *bgrLen);
+int yuv2resizedbgr(convHandle h, void *yuv, unsigned char **bgr, int *bgrLen);
+
+int resizebgr(convHandle h, void *data, unsigned char **resized, int *len);
+int resizeyuv(convHandle h, void *data, unsigned char **resized, int *len);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif //__RESIZE_NPP_H__
diff --git a/goconv/goconv.go b/goconv/goconv.go
new file mode 100644
index 0000000..db38a94
--- /dev/null
+++ b/goconv/goconv.go
@@ -0,0 +1,258 @@
+package goconv
+
+/*
+#cgo CFLAGS: -I./ -I./inc -I/usr/local/cuda/include
+#cgo CXXFLAGS: -I./ -I./inc -I/usr/local/cuda/include -std=c++11
+#cgo LDFLAGS: -L/usr/local/cuda/lib64 -lnppig -lnppicc -lnppial -lnppisu -lcudart -ldl
+#include <stdlib.h>
+#include "conv.h"
+*/
+import "C"
+import (
+ "unsafe"
+
+ "basic.com/valib/godraw.git"
+ "basic.com/valib/gogpu.git"
+ "github.com/disintegration/imaging"
+)
+
+const (
+ need = 200
+ reserved = 512
+)
+
+func gpuIndex(lastIndex int) int {
+ indices := gogpu.RankGPU()
+ if len(indices) == 0 {
+ return -1
+ }
+
+ for _, v := range indices {
+ if v != lastIndex {
+ if gogpu.SatisfyGPU(v, need, need/2) {
+ return v
+ }
+ }
+ }
+
+ if gogpu.SatisfyGPU(lastIndex, need, reserved) {
+ return lastIndex
+ }
+ return -1
+}
+
+type convertor struct {
+ width int
+ height int
+ rWidth int
+ rHeight int
+ conv C.convHandle
+}
+
+var convts []*convertor
+
+func find(w, h, rw, rh int) *convertor {
+ for _, v := range convts {
+ if v.width == w && v.height == h && v.rWidth == rw && v.rHeight == rh {
+ return v
+ }
+ }
+ gpu := gpuIndex(0)
+ if gpu < 0 {
+ return nil
+ }
+ cw := C.conv_create(C.int(w), C.int(h), C.int(rw), C.int(rh), C.int(gpu))
+ if cw == nil {
+ return nil
+ }
+ c := &convertor{w, h, rw, rh, cw}
+ convts = append(convts, c)
+ return c
+}
+
+// YUV2BGR yuv->bgr
+func YUV2BGR(yuv []byte, w, h int) []byte {
+
+ cw := find(w, h, -1, -1)
+ if cw == nil {
+ return yuv2bgr(yuv, w, h)
+ }
+ var bgr *C.uchar
+ var bgrLen C.int
+ ret := C.yuv2bgr(cw.conv, unsafe.Pointer(&yuv[0]), &bgr, &bgrLen)
+ if ret != 0 {
+ return nil
+ }
+ const maxLen = 0x7fffffff
+ goBGRLen := int(bgrLen)
+ if goBGRLen > 0 {
+ return (*[maxLen]byte)(unsafe.Pointer(bgr))[:goBGRLen:goBGRLen]
+ }
+ return nil
+}
+
+// YUV2ResizedBGR yuv -> resized bgr
+func YUV2ResizedBGR(yuv []byte, w, h, rw, rh int) []byte {
+
+ cw := find(w, h, rw, rh)
+ if cw == nil {
+ bgr := yuv2bgr(yuv, w, h)
+ return bgresize(bgr, w, h, rw, rh)
+ }
+ var bgr *C.uchar
+ var bgrLen C.int
+ ret := C.yuv2resizedbgr(cw.conv, unsafe.Pointer(&yuv[0]), &bgr, &bgrLen)
+ if ret != 0 {
+ return nil
+ }
+ const maxLen = 0x7fffffff
+ goBGRLen := int(bgrLen)
+ if goBGRLen > 0 {
+ return (*[maxLen]byte)(unsafe.Pointer(bgr))[:goBGRLen:goBGRLen]
+ }
+ return nil
+
+}
+
+// ResizeBGR resize
+func ResizeBGR(bgrO []byte, w, h, rw, rh int) []byte {
+ if (rw < 0 && rh < 0) || (rw > w && rh > h) {
+ return bgrO
+ }
+
+ cw := find(w, h, rw, rh)
+ if cw == nil {
+ return bgresize(bgrO, w, h, rw, rh)
+ }
+
+ var bgr *C.uchar
+ var bgrLen C.int
+ ret := C.resizebgr(cw.conv, unsafe.Pointer(&bgrO[0]), &bgr, &bgrLen)
+ if ret != 0 {
+ return nil
+ }
+ const maxLen = 0x7fffffff
+ goBGRLen := int(bgrLen)
+ if goBGRLen > 0 {
+ return (*[maxLen]byte)(unsafe.Pointer(bgr))[:goBGRLen:goBGRLen]
+ }
+ return nil
+}
+
+// ResizeYUV yuv
+func ResizeYUV(yuv []byte, w, h, rw, rh int) []byte {
+ if (rw < 0 && rh < 0) || (rw > w && rh > h) {
+ return yuv
+ }
+
+ cw := find(w, h, rw, rh)
+ if cw == nil {
+ return yuv
+ }
+
+ var resized *C.uchar
+ var resizedLen C.int
+ ret := C.resizeyuv(cw.conv, unsafe.Pointer(&yuv[0]), &resized, &resizedLen)
+ if ret != 0 {
+ return nil
+ }
+
+ const maxLen = 0x7fffffff
+ goResizedLen := int(resizedLen)
+ if goResizedLen > 0 {
+ return (*[maxLen]byte)(unsafe.Pointer(resized))[:goResizedLen:goResizedLen]
+ }
+ return nil
+}
+
+// YUV2BGRandResize conv and resize
+func YUV2BGRandResize(yuv []byte, w, h, rw, rh int) ([]byte, []byte) {
+ cw := find(w, h, rw, rh)
+ if cw == nil {
+ origin := yuv2bgr(yuv, w, h)
+ resized := bgresize(origin, w, h, rw, rh)
+ return origin, resized
+ }
+
+ var bgr *C.uchar
+ var bgrLen C.int
+ var scale *C.uchar
+ var scaleLen C.int
+
+ ret := C.yuv2bgrandresize(cw.conv, unsafe.Pointer(&yuv[0]), &bgr, &bgrLen, &scale, &scaleLen)
+
+ if ret != 0 {
+ return nil, nil
+ }
+ var out, resized []byte
+
+ const maxLen = 0x7fffffff
+ goBGRLen, goScaleLen := int(bgrLen), int(scaleLen)
+ if goBGRLen > 0 {
+ out = (*[maxLen]byte)(unsafe.Pointer(bgr))[:goBGRLen:goBGRLen]
+ }
+ if goScaleLen > 0 {
+ resized = (*[maxLen]byte)(unsafe.Pointer(scale))[:goScaleLen:goScaleLen]
+ }
+ return out, resized
+
+}
+
+// Free free
+func Free() {
+ for _, v := range convts {
+ if v.conv != nil {
+ C.conv_destroy(v.conv)
+ }
+ }
+}
+
+func yuv2bgr(yuv []byte, w, h int) []byte {
+
+ data := make([]byte, 0, w*h*3)
+ start := w * h
+ for i := 0; i < h; i++ {
+ for j := 0; j < w; j++ {
+
+ index := i/2*w + j - (j & 0x01)
+
+ y := int32(yuv[j+i*w])
+ v := int32(yuv[start+index])
+ u := int32(yuv[start+index+1])
+
+ r := y + (140*(v-128))/100
+ g := y - (34*(u-128)+71*(v-128))/100
+ b := y + (177*(u-128))/100
+
+ if r > 255 {
+ r = 255
+ }
+ if r < 0 {
+ r = 0
+ }
+ if g > 255 {
+ g = 255
+ }
+ if g < 0 {
+ g = 0
+ }
+ if b > 255 {
+ b = 255
+ }
+ if b < 0 {
+ b = 0
+ }
+ data = append(data, byte(r), byte(g), byte(b))
+ }
+ }
+ return data
+}
+
+func bgresize(bgr []byte, w, h, rw, rh int) []byte {
+ img, err := godraw.ToImage(bgr, w, h)
+ if err != nil {
+ return nil
+ }
+ dstImg := imaging.Resize(img, rw, rh, imaging.NearestNeighbor)
+ return godraw.Image2BGR(dstImg)
+}
diff --git a/goconv/inc/Exceptions.h b/goconv/inc/Exceptions.h
new file mode 100644
index 0000000..627bfd9
--- /dev/null
+++ b/goconv/inc/Exceptions.h
@@ -0,0 +1,181 @@
+/**
+ * Copyright 1993-2015 NVIDIA Corporation. All rights reserved.
+ *
+ * Please refer to the NVIDIA end user license agreement (EULA) associated
+ * with this source code for terms and conditions that govern your use of
+ * this software. Any use, reproduction, disclosure, or distribution of
+ * this software and related documentation outside the terms of the EULA
+ * is strictly prohibited.
+ *
+ */
+
+#ifndef NV_UTIL_NPP_EXCEPTIONS_H
+#define NV_UTIL_NPP_EXCEPTIONS_H
+
+
+#include <string>
+#include <sstream>
+#include <iostream>
+
+/// All npp related C++ classes are put into the npp namespace.
+namespace npp
+{
+
+ /// Exception base class.
+ /// This exception base class will be used for everything C++ throught
+ /// the NPP project.
+ /// The exception contains a string message, as well as data fields for a string
+ /// containing the name of the file as well as the line number where the exception was thrown.
+ /// The easiest way of throwing exceptions and providing filename and line number is
+ /// to use one of the ASSERT macros defined for that purpose.
+ class Exception
+ {
+ public:
+ /// Constructor.
+ /// \param rMessage A message with information as to why the exception was thrown.
+ /// \param rFileName The name of the file where the exception was thrown.
+ /// \param nLineNumber Line number in the file where the exception was thrown.
+ explicit
+ Exception(const std::string &rMessage = "", const std::string &rFileName = "", unsigned int nLineNumber = 0)
+ : sMessage_(rMessage), sFileName_(rFileName), nLineNumber_(nLineNumber)
+ { };
+
+ Exception(const Exception &rException)
+ : sMessage_(rException.sMessage_), sFileName_(rException.sFileName_), nLineNumber_(rException.nLineNumber_)
+ { };
+
+ virtual
+ ~Exception()
+ { };
+
+ /// Get the exception's message.
+ const
+ std::string &
+ message()
+ const
+ {
+ return sMessage_;
+ }
+
+ /// Get the exception's file info.
+ const
+ std::string &
+ fileName()
+ const
+ {
+ return sFileName_;
+ }
+
+ /// Get the exceptions's line info.
+ unsigned int
+ lineNumber()
+ const
+ {
+ return nLineNumber_;
+ }
+
+
+ /// Create a clone of this exception.
+ /// This creates a new Exception object on the heap. It is
+ /// the responsibility of the user of this function to free this memory
+ /// (delete x).
+ virtual
+ Exception *
+ clone()
+ const
+ {
+ return new Exception(*this);
+ }
+
+ /// Create a single string with all the exceptions information.
+ /// The virtual toString() method is used by the operator<<()
+ /// so that all exceptions derived from this base-class can print
+ /// their full information correctly even if a reference to their
+ /// exact type is not had at the time of printing (i.e. the basic
+ /// operator<<() is used).
+ virtual
+ std::string
+ toString()
+ const
+ {
+ std::ostringstream oOutputString;
+ oOutputString << fileName() << ":" << lineNumber() << ": " << message();
+ return oOutputString.str();
+ }
+
+ private:
+ std::string sMessage_; ///< Message regarding the cause of the exception.
+ std::string sFileName_; ///< Name of the file where the exception was thrown.
+ unsigned int nLineNumber_; ///< Line number in the file where the exception was thrown
+ };
+
+ /// Output stream inserter for Exception.
+ /// \param rOutputStream The stream the exception information is written to.
+ /// \param rException The exception that's being written.
+ /// \return Reference to the output stream being used.
+ static std::ostream &
+ operator << (std::ostream &rOutputStream, const Exception &rException)
+ {
+ rOutputStream << rException.toString();
+ return rOutputStream;
+ }
+
+ /// Basic assert macro.
+ /// This macro should be used to enforce any kind of pre or post conditions.
+ /// Unlike the C-runtime assert macro, this macro does not abort execution, but throws
+ /// a C++ exception. The exception is automatically filled with information about the failing
+ /// condition, the filename and line number where the exception was thrown.
+ /// \note The macro is written in such a way that omitting a semicolon after its usage
+ /// causes a compiler error. The correct way to invoke this macro is:
+ /// NPP_ASSERT(n < MAX);
+#define NPP_ASSERT(C) do {if (!(C)) throw npp::Exception(#C " assertion faild!", __FILE__, __LINE__);} while(false)
+
+ // ASSERT macro.
+ // Same functionality as the basic assert macro with the added ability to pass
+ // a message M. M should be a string literal.
+ // Note: Never use code inside ASSERT() that causes a side-effect ASSERT macros may get compiled
+ // out in release mode.
+#define NPP_ASSERT_MSG(C, M) do {if (!(C)) throw npp::Exception(#C " assertion faild! Message: " M, __FILE__, __LINE__);} while(false)
+
+#ifdef _DEBUG
+ /// Basic debug assert macro.
+ /// This macro is identical in every respect to NPP_ASSERT(C) but it does get compiled to a
+ /// no-op in release builds. It is therefor of utmost importance to not put statements into
+ /// this macro that cause side effects required for correct program execution.
+#define NPP_DEBUG_ASSERT(C) do {if (!(C)) throw npp::Exception(#C " debug assertion faild!", __FILE__, __LINE__);} while(false)
+#else
+#define NPP_DEBUG_ASSERT(C)
+#endif
+
+ /// ASSERT for null-pointer test.
+ /// It is safe to put code with side effects into this macro. Also: This macro never
+ /// gets compiled to a no-op because resource allocation may fail based on external causes not under
+ /// control of a software developer.
+#define NPP_ASSERT_NOT_NULL(P) do {if ((P) == 0) throw npp::Exception(#P " not null assertion faild!", __FILE__, __LINE__);} while(false)
+
+ /// Macro for flagging methods as not implemented.
+ /// The macro throws an exception with a message that an implementation was missing
+#define NPP_NOT_IMPLEMENTED() do {throw npp::Exception("Implementation missing!", __FILE__, __LINE__);} while(false)
+
+ /// Macro for checking error return code of CUDA (runtime) calls.
+ /// This macro never gets disabled.
+#define NPP_CHECK_CUDA(S) do {cudaError_t eCUDAResult; \
+ eCUDAResult = S; \
+ if (eCUDAResult != cudaSuccess) std::cout << "NPP_CHECK_CUDA - eCUDAResult = " << eCUDAResult << std::endl; \
+ NPP_ASSERT(eCUDAResult == cudaSuccess);} while (false)
+
+ /// Macro for checking error return code for NPP calls.
+#define NPP_CHECK_NPP(S) do {NppStatus eStatusNPP; \
+ eStatusNPP = S; \
+ if (eStatusNPP != NPP_SUCCESS) std::cout << "NPP_CHECK_NPP - eStatusNPP = " << _cudaGetErrorEnum(eStatusNPP) << "("<< eStatusNPP << ")" << std::endl; \
+ NPP_ASSERT(eStatusNPP == NPP_SUCCESS);} while (false)
+
+ /// Macro for checking error return codes from cuFFT calls.
+#define NPP_CHECK_CUFFT(S) do {cufftResult eCUFFTResult; \
+ eCUFFTResult = S; \
+ if (eCUFFTResult != NPP_SUCCESS) std::cout << "NPP_CHECK_CUFFT - eCUFFTResult = " << eCUFFTResult << std::endl; \
+ NPP_ASSERT(eCUFFTResult == CUFFT_SUCCESS);} while (false)
+
+} // npp namespace
+
+#endif // NV_UTIL_NPP_EXCEPTIONS_H
diff --git a/goconv/inc/helper_cuda.h b/goconv/inc/helper_cuda.h
new file mode 100644
index 0000000..b24684c
--- /dev/null
+++ b/goconv/inc/helper_cuda.h
@@ -0,0 +1,1261 @@
+/**
+ * Copyright 1993-2013 NVIDIA Corporation. All rights reserved.
+ *
+ * Please refer to the NVIDIA end user license agreement (EULA) associated
+ * with this source code for terms and conditions that govern your use of
+ * this software. Any use, reproduction, disclosure, or distribution of
+ * this software and related documentation outside the terms of the EULA
+ * is strictly prohibited.
+ *
+ */
+
+////////////////////////////////////////////////////////////////////////////////
+// These are CUDA Helper functions for initialization and error checking
+
+#ifndef HELPER_CUDA_H
+#define HELPER_CUDA_H
+
+#pragma once
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <helper_string.h>
+
+#ifndef EXIT_WAIVED
+#define EXIT_WAIVED 2
+#endif
+
+// Note, it is required that your SDK sample to include the proper header files, please
+// refer the CUDA examples for examples of the needed CUDA headers, which may change depending
+// on which CUDA functions are used.
+
+// CUDA Runtime error messages
+#ifdef __DRIVER_TYPES_H__
+static const char *_cudaGetErrorEnum(cudaError_t error)
+{
+ switch (error)
+ {
+ case cudaSuccess:
+ return "cudaSuccess";
+
+ case cudaErrorMissingConfiguration:
+ return "cudaErrorMissingConfiguration";
+
+ case cudaErrorMemoryAllocation:
+ return "cudaErrorMemoryAllocation";
+
+ case cudaErrorInitializationError:
+ return "cudaErrorInitializationError";
+
+ case cudaErrorLaunchFailure:
+ return "cudaErrorLaunchFailure";
+
+ case cudaErrorPriorLaunchFailure:
+ return "cudaErrorPriorLaunchFailure";
+
+ case cudaErrorLaunchTimeout:
+ return "cudaErrorLaunchTimeout";
+
+ case cudaErrorLaunchOutOfResources:
+ return "cudaErrorLaunchOutOfResources";
+
+ case cudaErrorInvalidDeviceFunction:
+ return "cudaErrorInvalidDeviceFunction";
+
+ case cudaErrorInvalidConfiguration:
+ return "cudaErrorInvalidConfiguration";
+
+ case cudaErrorInvalidDevice:
+ return "cudaErrorInvalidDevice";
+
+ case cudaErrorInvalidValue:
+ return "cudaErrorInvalidValue";
+
+ case cudaErrorInvalidPitchValue:
+ return "cudaErrorInvalidPitchValue";
+
+ case cudaErrorInvalidSymbol:
+ return "cudaErrorInvalidSymbol";
+
+ case cudaErrorMapBufferObjectFailed:
+ return "cudaErrorMapBufferObjectFailed";
+
+ case cudaErrorUnmapBufferObjectFailed:
+ return "cudaErrorUnmapBufferObjectFailed";
+
+ case cudaErrorInvalidHostPointer:
+ return "cudaErrorInvalidHostPointer";
+
+ case cudaErrorInvalidDevicePointer:
+ return "cudaErrorInvalidDevicePointer";
+
+ case cudaErrorInvalidTexture:
+ return "cudaErrorInvalidTexture";
+
+ case cudaErrorInvalidTextureBinding:
+ return "cudaErrorInvalidTextureBinding";
+
+ case cudaErrorInvalidChannelDescriptor:
+ return "cudaErrorInvalidChannelDescriptor";
+
+ case cudaErrorInvalidMemcpyDirection:
+ return "cudaErrorInvalidMemcpyDirection";
+
+ case cudaErrorAddressOfConstant:
+ return "cudaErrorAddressOfConstant";
+
+ case cudaErrorTextureFetchFailed:
+ return "cudaErrorTextureFetchFailed";
+
+ case cudaErrorTextureNotBound:
+ return "cudaErrorTextureNotBound";
+
+ case cudaErrorSynchronizationError:
+ return "cudaErrorSynchronizationError";
+
+ case cudaErrorInvalidFilterSetting:
+ return "cudaErrorInvalidFilterSetting";
+
+ case cudaErrorInvalidNormSetting:
+ return "cudaErrorInvalidNormSetting";
+
+ case cudaErrorMixedDeviceExecution:
+ return "cudaErrorMixedDeviceExecution";
+
+ case cudaErrorCudartUnloading:
+ return "cudaErrorCudartUnloading";
+
+ case cudaErrorUnknown:
+ return "cudaErrorUnknown";
+
+ case cudaErrorNotYetImplemented:
+ return "cudaErrorNotYetImplemented";
+
+ case cudaErrorMemoryValueTooLarge:
+ return "cudaErrorMemoryValueTooLarge";
+
+ case cudaErrorInvalidResourceHandle:
+ return "cudaErrorInvalidResourceHandle";
+
+ case cudaErrorNotReady:
+ return "cudaErrorNotReady";
+
+ case cudaErrorInsufficientDriver:
+ return "cudaErrorInsufficientDriver";
+
+ case cudaErrorSetOnActiveProcess:
+ return "cudaErrorSetOnActiveProcess";
+
+ case cudaErrorInvalidSurface:
+ return "cudaErrorInvalidSurface";
+
+ case cudaErrorNoDevice:
+ return "cudaErrorNoDevice";
+
+ case cudaErrorECCUncorrectable:
+ return "cudaErrorECCUncorrectable";
+
+ case cudaErrorSharedObjectSymbolNotFound:
+ return "cudaErrorSharedObjectSymbolNotFound";
+
+ case cudaErrorSharedObjectInitFailed:
+ return "cudaErrorSharedObjectInitFailed";
+
+ case cudaErrorUnsupportedLimit:
+ return "cudaErrorUnsupportedLimit";
+
+ case cudaErrorDuplicateVariableName:
+ return "cudaErrorDuplicateVariableName";
+
+ case cudaErrorDuplicateTextureName:
+ return "cudaErrorDuplicateTextureName";
+
+ case cudaErrorDuplicateSurfaceName:
+ return "cudaErrorDuplicateSurfaceName";
+
+ case cudaErrorDevicesUnavailable:
+ return "cudaErrorDevicesUnavailable";
+
+ case cudaErrorInvalidKernelImage:
+ return "cudaErrorInvalidKernelImage";
+
+ case cudaErrorNoKernelImageForDevice:
+ return "cudaErrorNoKernelImageForDevice";
+
+ case cudaErrorIncompatibleDriverContext:
+ return "cudaErrorIncompatibleDriverContext";
+
+ case cudaErrorPeerAccessAlreadyEnabled:
+ return "cudaErrorPeerAccessAlreadyEnabled";
+
+ case cudaErrorPeerAccessNotEnabled:
+ return "cudaErrorPeerAccessNotEnabled";
+
+ case cudaErrorDeviceAlreadyInUse:
+ return "cudaErrorDeviceAlreadyInUse";
+
+ case cudaErrorProfilerDisabled:
+ return "cudaErrorProfilerDisabled";
+
+ case cudaErrorProfilerNotInitialized:
+ return "cudaErrorProfilerNotInitialized";
+
+ case cudaErrorProfilerAlreadyStarted:
+ return "cudaErrorProfilerAlreadyStarted";
+
+ case cudaErrorProfilerAlreadyStopped:
+ return "cudaErrorProfilerAlreadyStopped";
+
+ /* Since CUDA 4.0*/
+ case cudaErrorAssert:
+ return "cudaErrorAssert";
+
+ case cudaErrorTooManyPeers:
+ return "cudaErrorTooManyPeers";
+
+ case cudaErrorHostMemoryAlreadyRegistered:
+ return "cudaErrorHostMemoryAlreadyRegistered";
+
+ case cudaErrorHostMemoryNotRegistered:
+ return "cudaErrorHostMemoryNotRegistered";
+
+ /* Since CUDA 5.0 */
+ case cudaErrorOperatingSystem:
+ return "cudaErrorOperatingSystem";
+
+ case cudaErrorPeerAccessUnsupported:
+ return "cudaErrorPeerAccessUnsupported";
+
+ case cudaErrorLaunchMaxDepthExceeded:
+ return "cudaErrorLaunchMaxDepthExceeded";
+
+ case cudaErrorLaunchFileScopedTex:
+ return "cudaErrorLaunchFileScopedTex";
+
+ case cudaErrorLaunchFileScopedSurf:
+ return "cudaErrorLaunchFileScopedSurf";
+
+ case cudaErrorSyncDepthExceeded:
+ return "cudaErrorSyncDepthExceeded";
+
+ case cudaErrorLaunchPendingCountExceeded:
+ return "cudaErrorLaunchPendingCountExceeded";
+
+ case cudaErrorNotPermitted:
+ return "cudaErrorNotPermitted";
+
+ case cudaErrorNotSupported:
+ return "cudaErrorNotSupported";
+
+ /* Since CUDA 6.0 */
+ case cudaErrorHardwareStackError:
+ return "cudaErrorHardwareStackError";
+
+ case cudaErrorIllegalInstruction:
+ return "cudaErrorIllegalInstruction";
+
+ case cudaErrorMisalignedAddress:
+ return "cudaErrorMisalignedAddress";
+
+ case cudaErrorInvalidAddressSpace:
+ return "cudaErrorInvalidAddressSpace";
+
+ case cudaErrorInvalidPc:
+ return "cudaErrorInvalidPc";
+
+ case cudaErrorIllegalAddress:
+ return "cudaErrorIllegalAddress";
+
+ /* Since CUDA 6.5*/
+ case cudaErrorInvalidPtx:
+ return "cudaErrorInvalidPtx";
+
+ case cudaErrorInvalidGraphicsContext:
+ return "cudaErrorInvalidGraphicsContext";
+
+ case cudaErrorStartupFailure:
+ return "cudaErrorStartupFailure";
+
+ case cudaErrorApiFailureBase:
+ return "cudaErrorApiFailureBase";
+ }
+
+ return "<unknown>";
+}
+#endif
+
+#ifdef __cuda_cuda_h__
+// CUDA Driver API errors
+static const char *_cudaGetErrorEnum(CUresult error)
+{
+ switch (error)
+ {
+ case CUDA_SUCCESS:
+ return "CUDA_SUCCESS";
+
+ case CUDA_ERROR_INVALID_VALUE:
+ return "CUDA_ERROR_INVALID_VALUE";
+
+ case CUDA_ERROR_OUT_OF_MEMORY:
+ return "CUDA_ERROR_OUT_OF_MEMORY";
+
+ case CUDA_ERROR_NOT_INITIALIZED:
+ return "CUDA_ERROR_NOT_INITIALIZED";
+
+ case CUDA_ERROR_DEINITIALIZED:
+ return "CUDA_ERROR_DEINITIALIZED";
+
+ case CUDA_ERROR_PROFILER_DISABLED:
+ return "CUDA_ERROR_PROFILER_DISABLED";
+
+ case CUDA_ERROR_PROFILER_NOT_INITIALIZED:
+ return "CUDA_ERROR_PROFILER_NOT_INITIALIZED";
+
+ case CUDA_ERROR_PROFILER_ALREADY_STARTED:
+ return "CUDA_ERROR_PROFILER_ALREADY_STARTED";
+
+ case CUDA_ERROR_PROFILER_ALREADY_STOPPED:
+ return "CUDA_ERROR_PROFILER_ALREADY_STOPPED";
+
+ case CUDA_ERROR_NO_DEVICE:
+ return "CUDA_ERROR_NO_DEVICE";
+
+ case CUDA_ERROR_INVALID_DEVICE:
+ return "CUDA_ERROR_INVALID_DEVICE";
+
+ case CUDA_ERROR_INVALID_IMAGE:
+ return "CUDA_ERROR_INVALID_IMAGE";
+
+ case CUDA_ERROR_INVALID_CONTEXT:
+ return "CUDA_ERROR_INVALID_CONTEXT";
+
+ case CUDA_ERROR_CONTEXT_ALREADY_CURRENT:
+ return "CUDA_ERROR_CONTEXT_ALREADY_CURRENT";
+
+ case CUDA_ERROR_MAP_FAILED:
+ return "CUDA_ERROR_MAP_FAILED";
+
+ case CUDA_ERROR_UNMAP_FAILED:
+ return "CUDA_ERROR_UNMAP_FAILED";
+
+ case CUDA_ERROR_ARRAY_IS_MAPPED:
+ return "CUDA_ERROR_ARRAY_IS_MAPPED";
+
+ case CUDA_ERROR_ALREADY_MAPPED:
+ return "CUDA_ERROR_ALREADY_MAPPED";
+
+ case CUDA_ERROR_NO_BINARY_FOR_GPU:
+ return "CUDA_ERROR_NO_BINARY_FOR_GPU";
+
+ case CUDA_ERROR_ALREADY_ACQUIRED:
+ return "CUDA_ERROR_ALREADY_ACQUIRED";
+
+ case CUDA_ERROR_NOT_MAPPED:
+ return "CUDA_ERROR_NOT_MAPPED";
+
+ case CUDA_ERROR_NOT_MAPPED_AS_ARRAY:
+ return "CUDA_ERROR_NOT_MAPPED_AS_ARRAY";
+
+ case CUDA_ERROR_NOT_MAPPED_AS_POINTER:
+ return "CUDA_ERROR_NOT_MAPPED_AS_POINTER";
+
+ case CUDA_ERROR_ECC_UNCORRECTABLE:
+ return "CUDA_ERROR_ECC_UNCORRECTABLE";
+
+ case CUDA_ERROR_UNSUPPORTED_LIMIT:
+ return "CUDA_ERROR_UNSUPPORTED_LIMIT";
+
+ case CUDA_ERROR_CONTEXT_ALREADY_IN_USE:
+ return "CUDA_ERROR_CONTEXT_ALREADY_IN_USE";
+
+ case CUDA_ERROR_PEER_ACCESS_UNSUPPORTED:
+ return "CUDA_ERROR_PEER_ACCESS_UNSUPPORTED";
+
+ case CUDA_ERROR_INVALID_PTX:
+ return "CUDA_ERROR_INVALID_PTX";
+
+ case CUDA_ERROR_INVALID_GRAPHICS_CONTEXT:
+ return "CUDA_ERROR_INVALID_GRAPHICS_CONTEXT";
+
+ case CUDA_ERROR_INVALID_SOURCE:
+ return "CUDA_ERROR_INVALID_SOURCE";
+
+ case CUDA_ERROR_FILE_NOT_FOUND:
+ return "CUDA_ERROR_FILE_NOT_FOUND";
+
+ case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND:
+ return "CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND";
+
+ case CUDA_ERROR_SHARED_OBJECT_INIT_FAILED:
+ return "CUDA_ERROR_SHARED_OBJECT_INIT_FAILED";
+
+ case CUDA_ERROR_OPERATING_SYSTEM:
+ return "CUDA_ERROR_OPERATING_SYSTEM";
+
+ case CUDA_ERROR_INVALID_HANDLE:
+ return "CUDA_ERROR_INVALID_HANDLE";
+
+ case CUDA_ERROR_NOT_FOUND:
+ return "CUDA_ERROR_NOT_FOUND";
+
+ case CUDA_ERROR_NOT_READY:
+ return "CUDA_ERROR_NOT_READY";
+
+ case CUDA_ERROR_ILLEGAL_ADDRESS:
+ return "CUDA_ERROR_ILLEGAL_ADDRESS";
+
+ case CUDA_ERROR_LAUNCH_FAILED:
+ return "CUDA_ERROR_LAUNCH_FAILED";
+
+ case CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES:
+ return "CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES";
+
+ case CUDA_ERROR_LAUNCH_TIMEOUT:
+ return "CUDA_ERROR_LAUNCH_TIMEOUT";
+
+ case CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING:
+ return "CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING";
+
+ case CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED:
+ return "CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED";
+
+ case CUDA_ERROR_PEER_ACCESS_NOT_ENABLED:
+ return "CUDA_ERROR_PEER_ACCESS_NOT_ENABLED";
+
+ case CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE:
+ return "CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE";
+
+ case CUDA_ERROR_CONTEXT_IS_DESTROYED:
+ return "CUDA_ERROR_CONTEXT_IS_DESTROYED";
+
+ case CUDA_ERROR_ASSERT:
+ return "CUDA_ERROR_ASSERT";
+
+ case CUDA_ERROR_TOO_MANY_PEERS:
+ return "CUDA_ERROR_TOO_MANY_PEERS";
+
+ case CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED:
+ return "CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED";
+
+ case CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED:
+ return "CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED";
+
+ case CUDA_ERROR_HARDWARE_STACK_ERROR:
+ return "CUDA_ERROR_HARDWARE_STACK_ERROR";
+
+ case CUDA_ERROR_ILLEGAL_INSTRUCTION:
+ return "CUDA_ERROR_ILLEGAL_INSTRUCTION";
+
+ case CUDA_ERROR_MISALIGNED_ADDRESS:
+ return "CUDA_ERROR_MISALIGNED_ADDRESS";
+
+ case CUDA_ERROR_INVALID_ADDRESS_SPACE:
+ return "CUDA_ERROR_INVALID_ADDRESS_SPACE";
+
+ case CUDA_ERROR_INVALID_PC:
+ return "CUDA_ERROR_INVALID_PC";
+
+ case CUDA_ERROR_NOT_PERMITTED:
+ return "CUDA_ERROR_NOT_PERMITTED";
+
+ case CUDA_ERROR_NOT_SUPPORTED:
+ return "CUDA_ERROR_NOT_SUPPORTED";
+
+ case CUDA_ERROR_UNKNOWN:
+ return "CUDA_ERROR_UNKNOWN";
+ }
+
+ return "<unknown>";
+}
+#endif
+
+#ifdef CUBLAS_API_H_
+// cuBLAS API errors
+static const char *_cudaGetErrorEnum(cublasStatus_t error)
+{
+ switch (error)
+ {
+ case CUBLAS_STATUS_SUCCESS:
+ return "CUBLAS_STATUS_SUCCESS";
+
+ case CUBLAS_STATUS_NOT_INITIALIZED:
+ return "CUBLAS_STATUS_NOT_INITIALIZED";
+
+ case CUBLAS_STATUS_ALLOC_FAILED:
+ return "CUBLAS_STATUS_ALLOC_FAILED";
+
+ case CUBLAS_STATUS_INVALID_VALUE:
+ return "CUBLAS_STATUS_INVALID_VALUE";
+
+ case CUBLAS_STATUS_ARCH_MISMATCH:
+ return "CUBLAS_STATUS_ARCH_MISMATCH";
+
+ case CUBLAS_STATUS_MAPPING_ERROR:
+ return "CUBLAS_STATUS_MAPPING_ERROR";
+
+ case CUBLAS_STATUS_EXECUTION_FAILED:
+ return "CUBLAS_STATUS_EXECUTION_FAILED";
+
+ case CUBLAS_STATUS_INTERNAL_ERROR:
+ return "CUBLAS_STATUS_INTERNAL_ERROR";
+
+ case CUBLAS_STATUS_NOT_SUPPORTED:
+ return "CUBLAS_STATUS_NOT_SUPPORTED";
+
+ case CUBLAS_STATUS_LICENSE_ERROR:
+ return "CUBLAS_STATUS_LICENSE_ERROR";
+ }
+
+ return "<unknown>";
+}
+#endif
+
+#ifdef _CUFFT_H_
+// cuFFT API errors
+static const char *_cudaGetErrorEnum(cufftResult error)
+{
+ switch (error)
+ {
+ case CUFFT_SUCCESS:
+ return "CUFFT_SUCCESS";
+
+ case CUFFT_INVALID_PLAN:
+ return "CUFFT_INVALID_PLAN";
+
+ case CUFFT_ALLOC_FAILED:
+ return "CUFFT_ALLOC_FAILED";
+
+ case CUFFT_INVALID_TYPE:
+ return "CUFFT_INVALID_TYPE";
+
+ case CUFFT_INVALID_VALUE:
+ return "CUFFT_INVALID_VALUE";
+
+ case CUFFT_INTERNAL_ERROR:
+ return "CUFFT_INTERNAL_ERROR";
+
+ case CUFFT_EXEC_FAILED:
+ return "CUFFT_EXEC_FAILED";
+
+ case CUFFT_SETUP_FAILED:
+ return "CUFFT_SETUP_FAILED";
+
+ case CUFFT_INVALID_SIZE:
+ return "CUFFT_INVALID_SIZE";
+
+ case CUFFT_UNALIGNED_DATA:
+ return "CUFFT_UNALIGNED_DATA";
+
+ case CUFFT_INCOMPLETE_PARAMETER_LIST:
+ return "CUFFT_INCOMPLETE_PARAMETER_LIST";
+
+ case CUFFT_INVALID_DEVICE:
+ return "CUFFT_INVALID_DEVICE";
+
+ case CUFFT_PARSE_ERROR:
+ return "CUFFT_PARSE_ERROR";
+
+ case CUFFT_NO_WORKSPACE:
+ return "CUFFT_NO_WORKSPACE";
+
+ case CUFFT_NOT_IMPLEMENTED:
+ return "CUFFT_NOT_IMPLEMENTED";
+
+ case CUFFT_LICENSE_ERROR:
+ return "CUFFT_LICENSE_ERROR";
+ }
+
+ return "<unknown>";
+}
+#endif
+
+
+#ifdef CUSPARSEAPI
+// cuSPARSE API errors
+static const char *_cudaGetErrorEnum(cusparseStatus_t error)
+{
+ switch (error)
+ {
+ case CUSPARSE_STATUS_SUCCESS:
+ return "CUSPARSE_STATUS_SUCCESS";
+
+ case CUSPARSE_STATUS_NOT_INITIALIZED:
+ return "CUSPARSE_STATUS_NOT_INITIALIZED";
+
+ case CUSPARSE_STATUS_ALLOC_FAILED:
+ return "CUSPARSE_STATUS_ALLOC_FAILED";
+
+ case CUSPARSE_STATUS_INVALID_VALUE:
+ return "CUSPARSE_STATUS_INVALID_VALUE";
+
+ case CUSPARSE_STATUS_ARCH_MISMATCH:
+ return "CUSPARSE_STATUS_ARCH_MISMATCH";
+
+ case CUSPARSE_STATUS_MAPPING_ERROR:
+ return "CUSPARSE_STATUS_MAPPING_ERROR";
+
+ case CUSPARSE_STATUS_EXECUTION_FAILED:
+ return "CUSPARSE_STATUS_EXECUTION_FAILED";
+
+ case CUSPARSE_STATUS_INTERNAL_ERROR:
+ return "CUSPARSE_STATUS_INTERNAL_ERROR";
+
+ case CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED:
+ return "CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED";
+ }
+
+ return "<unknown>";
+}
+#endif
+
+#ifdef CUSOLVER_COMMON_H_
+//cuSOLVER API errors
+static const char *_cudaGetErrorEnum(cusolverStatus_t error)
+{
+ switch(error)
+ {
+ case CUSOLVER_STATUS_SUCCESS:
+ return "CUSOLVER_STATUS_SUCCESS";
+ case CUSOLVER_STATUS_NOT_INITIALIZED:
+ return "CUSOLVER_STATUS_NOT_INITIALIZED";
+ case CUSOLVER_STATUS_ALLOC_FAILED:
+ return "CUSOLVER_STATUS_ALLOC_FAILED";
+ case CUSOLVER_STATUS_INVALID_VALUE:
+ return "CUSOLVER_STATUS_INVALID_VALUE";
+ case CUSOLVER_STATUS_ARCH_MISMATCH:
+ return "CUSOLVER_STATUS_ARCH_MISMATCH";
+ case CUSOLVER_STATUS_MAPPING_ERROR:
+ return "CUSOLVER_STATUS_MAPPING_ERROR";
+ case CUSOLVER_STATUS_EXECUTION_FAILED:
+ return "CUSOLVER_STATUS_EXECUTION_FAILED";
+ case CUSOLVER_STATUS_INTERNAL_ERROR:
+ return "CUSOLVER_STATUS_INTERNAL_ERROR";
+ case CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED:
+ return "CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED";
+ case CUSOLVER_STATUS_NOT_SUPPORTED :
+ return "CUSOLVER_STATUS_NOT_SUPPORTED ";
+ case CUSOLVER_STATUS_ZERO_PIVOT:
+ return "CUSOLVER_STATUS_ZERO_PIVOT";
+ case CUSOLVER_STATUS_INVALID_LICENSE:
+ return "CUSOLVER_STATUS_INVALID_LICENSE";
+ }
+
+ return "<unknown>";
+
+}
+#endif
+
+#ifdef CURAND_H_
+// cuRAND API errors
+static const char *_cudaGetErrorEnum(curandStatus_t error)
+{
+ switch (error)
+ {
+ case CURAND_STATUS_SUCCESS:
+ return "CURAND_STATUS_SUCCESS";
+
+ case CURAND_STATUS_VERSION_MISMATCH:
+ return "CURAND_STATUS_VERSION_MISMATCH";
+
+ case CURAND_STATUS_NOT_INITIALIZED:
+ return "CURAND_STATUS_NOT_INITIALIZED";
+
+ case CURAND_STATUS_ALLOCATION_FAILED:
+ return "CURAND_STATUS_ALLOCATION_FAILED";
+
+ case CURAND_STATUS_TYPE_ERROR:
+ return "CURAND_STATUS_TYPE_ERROR";
+
+ case CURAND_STATUS_OUT_OF_RANGE:
+ return "CURAND_STATUS_OUT_OF_RANGE";
+
+ case CURAND_STATUS_LENGTH_NOT_MULTIPLE:
+ return "CURAND_STATUS_LENGTH_NOT_MULTIPLE";
+
+ case CURAND_STATUS_DOUBLE_PRECISION_REQUIRED:
+ return "CURAND_STATUS_DOUBLE_PRECISION_REQUIRED";
+
+ case CURAND_STATUS_LAUNCH_FAILURE:
+ return "CURAND_STATUS_LAUNCH_FAILURE";
+
+ case CURAND_STATUS_PREEXISTING_FAILURE:
+ return "CURAND_STATUS_PREEXISTING_FAILURE";
+
+ case CURAND_STATUS_INITIALIZATION_FAILED:
+ return "CURAND_STATUS_INITIALIZATION_FAILED";
+
+ case CURAND_STATUS_ARCH_MISMATCH:
+ return "CURAND_STATUS_ARCH_MISMATCH";
+
+ case CURAND_STATUS_INTERNAL_ERROR:
+ return "CURAND_STATUS_INTERNAL_ERROR";
+ }
+
+ return "<unknown>";
+}
+#endif
+
+#ifdef NV_NPPIDEFS_H
+// NPP API errors
+static const char *_cudaGetErrorEnum(NppStatus error)
+{
+ switch (error)
+ {
+ case NPP_NOT_SUPPORTED_MODE_ERROR:
+ return "NPP_NOT_SUPPORTED_MODE_ERROR";
+
+ case NPP_ROUND_MODE_NOT_SUPPORTED_ERROR:
+ return "NPP_ROUND_MODE_NOT_SUPPORTED_ERROR";
+
+ case NPP_RESIZE_NO_OPERATION_ERROR:
+ return "NPP_RESIZE_NO_OPERATION_ERROR";
+
+ case NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY:
+ return "NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY";
+
+#if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) <= 0x5000
+
+ case NPP_BAD_ARG_ERROR:
+ return "NPP_BAD_ARGUMENT_ERROR";
+
+ case NPP_COEFF_ERROR:
+ return "NPP_COEFFICIENT_ERROR";
+
+ case NPP_RECT_ERROR:
+ return "NPP_RECTANGLE_ERROR";
+
+ case NPP_QUAD_ERROR:
+ return "NPP_QUADRANGLE_ERROR";
+
+ case NPP_MEM_ALLOC_ERR:
+ return "NPP_MEMORY_ALLOCATION_ERROR";
+
+ case NPP_HISTO_NUMBER_OF_LEVELS_ERROR:
+ return "NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR";
+
+ case NPP_INVALID_INPUT:
+ return "NPP_INVALID_INPUT";
+
+ case NPP_POINTER_ERROR:
+ return "NPP_POINTER_ERROR";
+
+ case NPP_WARNING:
+ return "NPP_WARNING";
+
+ case NPP_ODD_ROI_WARNING:
+ return "NPP_ODD_ROI_WARNING";
+#else
+
+ // These are for CUDA 5.5 or higher
+ case NPP_BAD_ARGUMENT_ERROR:
+ return "NPP_BAD_ARGUMENT_ERROR";
+
+ case NPP_COEFFICIENT_ERROR:
+ return "NPP_COEFFICIENT_ERROR";
+
+ case NPP_RECTANGLE_ERROR:
+ return "NPP_RECTANGLE_ERROR";
+
+ case NPP_QUADRANGLE_ERROR:
+ return "NPP_QUADRANGLE_ERROR";
+
+ case NPP_MEMORY_ALLOCATION_ERR:
+ return "NPP_MEMORY_ALLOCATION_ERROR";
+
+ case NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR:
+ return "NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR";
+
+ case NPP_INVALID_HOST_POINTER_ERROR:
+ return "NPP_INVALID_HOST_POINTER_ERROR";
+
+ case NPP_INVALID_DEVICE_POINTER_ERROR:
+ return "NPP_INVALID_DEVICE_POINTER_ERROR";
+#endif
+
+ case NPP_LUT_NUMBER_OF_LEVELS_ERROR:
+ return "NPP_LUT_NUMBER_OF_LEVELS_ERROR";
+
+ case NPP_TEXTURE_BIND_ERROR:
+ return "NPP_TEXTURE_BIND_ERROR";
+
+ case NPP_WRONG_INTERSECTION_ROI_ERROR:
+ return "NPP_WRONG_INTERSECTION_ROI_ERROR";
+
+ case NPP_NOT_EVEN_STEP_ERROR:
+ return "NPP_NOT_EVEN_STEP_ERROR";
+
+ case NPP_INTERPOLATION_ERROR:
+ return "NPP_INTERPOLATION_ERROR";
+
+ case NPP_RESIZE_FACTOR_ERROR:
+ return "NPP_RESIZE_FACTOR_ERROR";
+
+ case NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR:
+ return "NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR";
+
+
+#if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) <= 0x5000
+
+ case NPP_MEMFREE_ERR:
+ return "NPP_MEMFREE_ERR";
+
+ case NPP_MEMSET_ERR:
+ return "NPP_MEMSET_ERR";
+
+ case NPP_MEMCPY_ERR:
+ return "NPP_MEMCPY_ERROR";
+
+ case NPP_MIRROR_FLIP_ERR:
+ return "NPP_MIRROR_FLIP_ERR";
+#else
+
+ case NPP_MEMFREE_ERROR:
+ return "NPP_MEMFREE_ERROR";
+
+ case NPP_MEMSET_ERROR:
+ return "NPP_MEMSET_ERROR";
+
+ case NPP_MEMCPY_ERROR:
+ return "NPP_MEMCPY_ERROR";
+
+ case NPP_MIRROR_FLIP_ERROR:
+ return "NPP_MIRROR_FLIP_ERROR";
+#endif
+
+ case NPP_ALIGNMENT_ERROR:
+ return "NPP_ALIGNMENT_ERROR";
+
+ case NPP_STEP_ERROR:
+ return "NPP_STEP_ERROR";
+
+ case NPP_SIZE_ERROR:
+ return "NPP_SIZE_ERROR";
+
+ case NPP_NULL_POINTER_ERROR:
+ return "NPP_NULL_POINTER_ERROR";
+
+ case NPP_CUDA_KERNEL_EXECUTION_ERROR:
+ return "NPP_CUDA_KERNEL_EXECUTION_ERROR";
+
+ case NPP_NOT_IMPLEMENTED_ERROR:
+ return "NPP_NOT_IMPLEMENTED_ERROR";
+
+ case NPP_ERROR:
+ return "NPP_ERROR";
+
+ case NPP_SUCCESS:
+ return "NPP_SUCCESS";
+
+ case NPP_WRONG_INTERSECTION_QUAD_WARNING:
+ return "NPP_WRONG_INTERSECTION_QUAD_WARNING";
+
+ case NPP_MISALIGNED_DST_ROI_WARNING:
+ return "NPP_MISALIGNED_DST_ROI_WARNING";
+
+ case NPP_AFFINE_QUAD_INCORRECT_WARNING:
+ return "NPP_AFFINE_QUAD_INCORRECT_WARNING";
+
+ case NPP_DOUBLE_SIZE_WARNING:
+ return "NPP_DOUBLE_SIZE_WARNING";
+
+ case NPP_WRONG_INTERSECTION_ROI_WARNING:
+ return "NPP_WRONG_INTERSECTION_ROI_WARNING";
+
+#if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) >= 0x6000
+ /* These are 6.0 or higher */
+ case NPP_LUT_PALETTE_BITSIZE_ERROR:
+ return "NPP_LUT_PALETTE_BITSIZE_ERROR";
+
+ case NPP_ZC_MODE_NOT_SUPPORTED_ERROR:
+ return "NPP_ZC_MODE_NOT_SUPPORTED_ERROR";
+
+ case NPP_QUALITY_INDEX_ERROR:
+ return "NPP_QUALITY_INDEX_ERROR";
+
+ case NPP_CHANNEL_ORDER_ERROR:
+ return "NPP_CHANNEL_ORDER_ERROR";
+
+ case NPP_ZERO_MASK_VALUE_ERROR:
+ return "NPP_ZERO_MASK_VALUE_ERROR";
+
+ case NPP_NUMBER_OF_CHANNELS_ERROR:
+ return "NPP_NUMBER_OF_CHANNELS_ERROR";
+
+ case NPP_COI_ERROR:
+ return "NPP_COI_ERROR";
+
+ case NPP_DIVISOR_ERROR:
+ return "NPP_DIVISOR_ERROR";
+
+ case NPP_CHANNEL_ERROR:
+ return "NPP_CHANNEL_ERROR";
+
+ case NPP_STRIDE_ERROR:
+ return "NPP_STRIDE_ERROR";
+
+ case NPP_ANCHOR_ERROR:
+ return "NPP_ANCHOR_ERROR";
+
+ case NPP_MASK_SIZE_ERROR:
+ return "NPP_MASK_SIZE_ERROR";
+
+ case NPP_MOMENT_00_ZERO_ERROR:
+ return "NPP_MOMENT_00_ZERO_ERROR";
+
+ case NPP_THRESHOLD_NEGATIVE_LEVEL_ERROR:
+ return "NPP_THRESHOLD_NEGATIVE_LEVEL_ERROR";
+
+ case NPP_THRESHOLD_ERROR:
+ return "NPP_THRESHOLD_ERROR";
+
+ case NPP_CONTEXT_MATCH_ERROR:
+ return "NPP_CONTEXT_MATCH_ERROR";
+
+ case NPP_FFT_FLAG_ERROR:
+ return "NPP_FFT_FLAG_ERROR";
+
+ case NPP_FFT_ORDER_ERROR:
+ return "NPP_FFT_ORDER_ERROR";
+
+ case NPP_SCALE_RANGE_ERROR:
+ return "NPP_SCALE_RANGE_ERROR";
+
+ case NPP_DATA_TYPE_ERROR:
+ return "NPP_DATA_TYPE_ERROR";
+
+ case NPP_OUT_OFF_RANGE_ERROR:
+ return "NPP_OUT_OFF_RANGE_ERROR";
+
+ case NPP_DIVIDE_BY_ZERO_ERROR:
+ return "NPP_DIVIDE_BY_ZERO_ERROR";
+
+ case NPP_RANGE_ERROR:
+ return "NPP_RANGE_ERROR";
+
+ case NPP_NO_MEMORY_ERROR:
+ return "NPP_NO_MEMORY_ERROR";
+
+ case NPP_ERROR_RESERVED:
+ return "NPP_ERROR_RESERVED";
+
+ case NPP_NO_OPERATION_WARNING:
+ return "NPP_NO_OPERATION_WARNING";
+
+ case NPP_DIVIDE_BY_ZERO_WARNING:
+ return "NPP_DIVIDE_BY_ZERO_WARNING";
+#endif
+
+ }
+
+ return "<unknown>";
+}
+#endif
+
+#ifdef __DRIVER_TYPES_H__
+#ifndef DEVICE_RESET
+#define DEVICE_RESET cudaDeviceReset();
+#endif
+#else
+#ifndef DEVICE_RESET
+#define DEVICE_RESET
+#endif
+#endif
+
+template< typename T >
+void check(T result, char const *const func, const char *const file, int const line)
+{
+ if (result)
+ {
+ fprintf(stderr, "CUDA error at %s:%d code=%d(%s) \"%s\" \n",
+ file, line, static_cast<unsigned int>(result), _cudaGetErrorEnum(result), func);
+ DEVICE_RESET
+ // Make sure we call CUDA Device Reset before exiting
+ exit(EXIT_FAILURE);
+ }
+}
+
+#ifdef __DRIVER_TYPES_H__
+// This will output the proper CUDA error strings in the event that a CUDA host call returns an error
+#define checkCudaErrors(val) check ( (val), #val, __FILE__, __LINE__ )
+
+// This will output the proper error string when calling cudaGetLastError
+#define getLastCudaError(msg) __getLastCudaError (msg, __FILE__, __LINE__)
+
+inline void __getLastCudaError(const char *errorMessage, const char *file, const int line)
+{
+ cudaError_t err = cudaGetLastError();
+
+ if (cudaSuccess != err)
+ {
+ fprintf(stderr, "%s(%i) : getLastCudaError() CUDA error : %s : (%d) %s.\n",
+ file, line, errorMessage, (int)err, cudaGetErrorString(err));
+ DEVICE_RESET
+ exit(EXIT_FAILURE);
+ }
+}
+#endif
+
+#ifndef MAX
+#define MAX(a,b) (a > b ? a : b)
+#endif
+
+// Float To Int conversion
+inline int ftoi(float value)
+{
+ return (value >= 0 ? (int)(value + 0.5) : (int)(value - 0.5));
+}
+
+// Beginning of GPU Architecture definitions
+inline int _ConvertSMVer2Cores(int major, int minor)
+{
+ // Defines for GPU Architecture types (using the SM version to determine the # of cores per SM
+ typedef struct
+ {
+ int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version
+ int Cores;
+ } sSMtoCores;
+
+ sSMtoCores nGpuArchCoresPerSM[] =
+ {
+ { 0x20, 32 }, // Fermi Generation (SM 2.0) GF100 class
+ { 0x21, 48 }, // Fermi Generation (SM 2.1) GF10x class
+ { 0x30, 192}, // Kepler Generation (SM 3.0) GK10x class
+ { 0x32, 192}, // Kepler Generation (SM 3.2) GK10x class
+ { 0x35, 192}, // Kepler Generation (SM 3.5) GK11x class
+ { 0x37, 192}, // Kepler Generation (SM 3.7) GK21x class
+ { 0x50, 128}, // Maxwell Generation (SM 5.0) GM10x class
+ { 0x52, 128}, // Maxwell Generation (SM 5.2) GM20x class
+ { -1, -1 }
+ };
+
+ int index = 0;
+
+ while (nGpuArchCoresPerSM[index].SM != -1)
+ {
+ if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor))
+ {
+ return nGpuArchCoresPerSM[index].Cores;
+ }
+
+ index++;
+ }
+
+ // If we don't find the values, we default use the previous one to run properly
+ printf("MapSMtoCores for SM %d.%d is undefined. Default to use %d Cores/SM\n", major, minor, nGpuArchCoresPerSM[index-1].Cores);
+ return nGpuArchCoresPerSM[index-1].Cores;
+}
+// end of GPU Architecture definitions
+
+#ifdef __CUDA_RUNTIME_H__
+// General GPU Device CUDA Initialization
+inline int gpuDeviceInit(int devID)
+{
+ int device_count;
+ checkCudaErrors(cudaGetDeviceCount(&device_count));
+
+ if (device_count == 0)
+ {
+ fprintf(stderr, "gpuDeviceInit() CUDA error: no devices supporting CUDA.\n");
+ exit(EXIT_FAILURE);
+ }
+
+ if (devID < 0)
+ {
+ devID = 0;
+ }
+
+ if (devID > device_count-1)
+ {
+ fprintf(stderr, "\n");
+ fprintf(stderr, ">> %d CUDA capable GPU device(s) detected. <<\n", device_count);
+ fprintf(stderr, ">> gpuDeviceInit (-device=%d) is not a valid GPU device. <<\n", devID);
+ fprintf(stderr, "\n");
+ return -devID;
+ }
+
+ cudaDeviceProp deviceProp;
+ checkCudaErrors(cudaGetDeviceProperties(&deviceProp, devID));
+
+ if (deviceProp.computeMode == cudaComputeModeProhibited)
+ {
+ fprintf(stderr, "Error: device is running in <Compute Mode Prohibited>, no threads can use ::cudaSetDevice().\n");
+ return -1;
+ }
+
+ if (deviceProp.major < 1)
+ {
+ fprintf(stderr, "gpuDeviceInit(): GPU device does not support CUDA.\n");
+ exit(EXIT_FAILURE);
+ }
+
+ checkCudaErrors(cudaSetDevice(devID));
+ printf("gpuDeviceInit() CUDA Device [%d]: \"%s\n", devID, deviceProp.name);
+
+ return devID;
+}
+
+// This function returns the best GPU (with maximum GFLOPS)
+inline int gpuGetMaxGflopsDeviceId()
+{
+ int current_device = 0, sm_per_multiproc = 0;
+ int max_perf_device = 0;
+ int device_count = 0, best_SM_arch = 0;
+ int devices_prohibited = 0;
+
+ unsigned long long max_compute_perf = 0;
+ cudaDeviceProp deviceProp;
+ cudaGetDeviceCount(&device_count);
+
+ checkCudaErrors(cudaGetDeviceCount(&device_count));
+
+ if (device_count == 0)
+ {
+ fprintf(stderr, "gpuGetMaxGflopsDeviceId() CUDA error: no devices supporting CUDA.\n");
+ exit(EXIT_FAILURE);
+ }
+
+ // Find the best major SM Architecture GPU device
+ while (current_device < device_count)
+ {
+ cudaGetDeviceProperties(&deviceProp, current_device);
+
+ // If this GPU is not running on Compute Mode prohibited, then we can add it to the list
+ if (deviceProp.computeMode != cudaComputeModeProhibited)
+ {
+ if (deviceProp.major > 0 && deviceProp.major < 9999)
+ {
+ best_SM_arch = MAX(best_SM_arch, deviceProp.major);
+ }
+ }
+ else
+ {
+ devices_prohibited++;
+ }
+
+ current_device++;
+ }
+
+ if (devices_prohibited == device_count)
+ {
+ fprintf(stderr, "gpuGetMaxGflopsDeviceId() CUDA error: all devices have compute mode prohibited.\n");
+ exit(EXIT_FAILURE);
+ }
+
+ // Find the best CUDA capable GPU device
+ current_device = 0;
+
+ while (current_device < device_count)
+ {
+ cudaGetDeviceProperties(&deviceProp, current_device);
+
+ // If this GPU is not running on Compute Mode prohibited, then we can add it to the list
+ if (deviceProp.computeMode != cudaComputeModeProhibited)
+ {
+ if (deviceProp.major == 9999 && deviceProp.minor == 9999)
+ {
+ sm_per_multiproc = 1;
+ }
+ else
+ {
+ sm_per_multiproc = _ConvertSMVer2Cores(deviceProp.major, deviceProp.minor);
+ }
+
+ unsigned long long compute_perf = (unsigned long long) deviceProp.multiProcessorCount * sm_per_multiproc * deviceProp.clockRate;
+
+ if (compute_perf > max_compute_perf)
+ {
+ // If we find GPU with SM major > 2, search only these
+ if (best_SM_arch > 2)
+ {
+ // If our device==dest_SM_arch, choose this, or else pass
+ if (deviceProp.major == best_SM_arch)
+ {
+ max_compute_perf = compute_perf;
+ max_perf_device = current_device;
+ }
+ }
+ else
+ {
+ max_compute_perf = compute_perf;
+ max_perf_device = current_device;
+ }
+ }
+ }
+
+ ++current_device;
+ }
+
+ return max_perf_device;
+}
+
+
+// Initialization code to find the best CUDA Device
+inline int findCudaDevice(int argc, const char **argv)
+{
+ cudaDeviceProp deviceProp;
+ int devID = 0;
+
+ // If the command-line has a device number specified, use it
+ if (checkCmdLineFlag(argc, argv, "device"))
+ {
+ devID = getCmdLineArgumentInt(argc, argv, "device=");
+
+ if (devID < 0)
+ {
+ printf("Invalid command line parameter\n ");
+ exit(EXIT_FAILURE);
+ }
+ else
+ {
+ devID = gpuDeviceInit(devID);
+
+ if (devID < 0)
+ {
+ printf("exiting...\n");
+ exit(EXIT_FAILURE);
+ }
+ }
+ }
+ else
+ {
+ // Otherwise pick the device with highest Gflops/s
+ devID = gpuGetMaxGflopsDeviceId();
+ checkCudaErrors(cudaSetDevice(devID));
+ checkCudaErrors(cudaGetDeviceProperties(&deviceProp, devID));
+ printf("GPU Device %d: \"%s\" with compute capability %d.%d\n\n", devID, deviceProp.name, deviceProp.major, deviceProp.minor);
+ }
+
+ return devID;
+}
+
+// General check for CUDA GPU SM Capabilities
+inline bool checkCudaCapabilities(int major_version, int minor_version)
+{
+ cudaDeviceProp deviceProp;
+ deviceProp.major = 0;
+ deviceProp.minor = 0;
+ int dev;
+
+ checkCudaErrors(cudaGetDevice(&dev));
+ checkCudaErrors(cudaGetDeviceProperties(&deviceProp, dev));
+
+ if ((deviceProp.major > major_version) ||
+ (deviceProp.major == major_version && deviceProp.minor >= minor_version))
+ {
+ printf(" Device %d: <%16s >, Compute SM %d.%d detected\n", dev, deviceProp.name, deviceProp.major, deviceProp.minor);
+ return true;
+ }
+ else
+ {
+ printf(" No GPU device was found that can support CUDA compute capability %d.%d.\n", major_version, minor_version);
+ return false;
+ }
+}
+#endif
+
+// end of CUDA Helper Functions
+
+
+#endif
diff --git a/goconv/inc/helper_string.h b/goconv/inc/helper_string.h
new file mode 100644
index 0000000..9b68cc7
--- /dev/null
+++ b/goconv/inc/helper_string.h
@@ -0,0 +1,526 @@
+/**
+ * Copyright 1993-2013 NVIDIA Corporation. All rights reserved.
+ *
+ * Please refer to the NVIDIA end user license agreement (EULA) associated
+ * with this source code for terms and conditions that govern your use of
+ * this software. Any use, reproduction, disclosure, or distribution of
+ * this software and related documentation outside the terms of the EULA
+ * is strictly prohibited.
+ *
+ */
+
+// These are helper functions for the SDK samples (string parsing, timers, etc)
+#ifndef STRING_HELPER_H
+#define STRING_HELPER_H
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <fstream>
+#include <string>
+
+#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
+#ifndef _CRT_SECURE_NO_DEPRECATE
+#define _CRT_SECURE_NO_DEPRECATE
+#endif
+#ifndef STRCASECMP
+#define STRCASECMP _stricmp
+#endif
+#ifndef STRNCASECMP
+#define STRNCASECMP _strnicmp
+#endif
+#ifndef STRCPY
+#define STRCPY(sFilePath, nLength, sPath) strcpy_s(sFilePath, nLength, sPath)
+#endif
+
+#ifndef FOPEN
+#define FOPEN(fHandle,filename,mode) fopen_s(&fHandle, filename, mode)
+#endif
+#ifndef FOPEN_FAIL
+#define FOPEN_FAIL(result) (result != 0)
+#endif
+#ifndef SSCANF
+#define SSCANF sscanf_s
+#endif
+#ifndef SPRINTF
+#define SPRINTF sprintf_s
+#endif
+#else // Linux Includes
+#include <string.h>
+#include <strings.h>
+
+#ifndef STRCASECMP
+#define STRCASECMP strcasecmp
+#endif
+#ifndef STRNCASECMP
+#define STRNCASECMP strncasecmp
+#endif
+#ifndef STRCPY
+#define STRCPY(sFilePath, nLength, sPath) strcpy(sFilePath, sPath)
+#endif
+
+#ifndef FOPEN
+#define FOPEN(fHandle,filename,mode) (fHandle = fopen(filename, mode))
+#endif
+#ifndef FOPEN_FAIL
+#define FOPEN_FAIL(result) (result == NULL)
+#endif
+#ifndef SSCANF
+#define SSCANF sscanf
+#endif
+#ifndef SPRINTF
+#define SPRINTF sprintf
+#endif
+#endif
+
+#ifndef EXIT_WAIVED
+#define EXIT_WAIVED 2
+#endif
+
+// CUDA Utility Helper Functions
+inline int stringRemoveDelimiter(char delimiter, const char *string)
+{
+ int string_start = 0;
+
+ while (string[string_start] == delimiter)
+ {
+ string_start++;
+ }
+
+ if (string_start >= (int)strlen(string)-1)
+ {
+ return 0;
+ }
+
+ return string_start;
+}
+
+inline int getFileExtension(char *filename, char **extension)
+{
+ int string_length = (int)strlen(filename);
+
+ while (filename[string_length--] != '.')
+ {
+ if (string_length == 0)
+ break;
+ }
+
+ if (string_length > 0) string_length += 2;
+
+ if (string_length == 0)
+ *extension = NULL;
+ else
+ *extension = &filename[string_length];
+
+ return string_length;
+}
+
+
+inline bool checkCmdLineFlag(const int argc, const char **argv, const char *string_ref)
+{
+ bool bFound = false;
+
+ if (argc >= 1)
+ {
+ for (int i=1; i < argc; i++)
+ {
+ int string_start = stringRemoveDelimiter('-', argv[i]);
+ const char *string_argv = &argv[i][string_start];
+
+ const char *equal_pos = strchr(string_argv, '=');
+ int argv_length = (int)(equal_pos == 0 ? strlen(string_argv) : equal_pos - string_argv);
+
+ int length = (int)strlen(string_ref);
+
+ if (length == argv_length && !STRNCASECMP(string_argv, string_ref, length))
+ {
+ bFound = true;
+ continue;
+ }
+ }
+ }
+
+ return bFound;
+}
+
+// This function wraps the CUDA Driver API into a template function
+template <class T>
+inline bool getCmdLineArgumentValue(const int argc, const char **argv, const char *string_ref, T *value)
+{
+ bool bFound = false;
+
+ if (argc >= 1)
+ {
+ for (int i=1; i < argc; i++)
+ {
+ int string_start = stringRemoveDelimiter('-', argv[i]);
+ const char *string_argv = &argv[i][string_start];
+ int length = (int)strlen(string_ref);
+
+ if (!STRNCASECMP(string_argv, string_ref, length))
+ {
+ if (length+1 <= (int)strlen(string_argv))
+ {
+ int auto_inc = (string_argv[length] == '=') ? 1 : 0;
+ *value = (T)atoi(&string_argv[length + auto_inc]);
+ }
+
+ bFound = true;
+ i=argc;
+ }
+ }
+ }
+
+ return bFound;
+}
+
+inline int getCmdLineArgumentInt(const int argc, const char **argv, const char *string_ref)
+{
+ bool bFound = false;
+ int value = -1;
+
+ if (argc >= 1)
+ {
+ for (int i=1; i < argc; i++)
+ {
+ int string_start = stringRemoveDelimiter('-', argv[i]);
+ const char *string_argv = &argv[i][string_start];
+ int length = (int)strlen(string_ref);
+
+ if (!STRNCASECMP(string_argv, string_ref, length))
+ {
+ if (length+1 <= (int)strlen(string_argv))
+ {
+ int auto_inc = (string_argv[length] == '=') ? 1 : 0;
+ value = atoi(&string_argv[length + auto_inc]);
+ }
+ else
+ {
+ value = 0;
+ }
+
+ bFound = true;
+ continue;
+ }
+ }
+ }
+
+ if (bFound)
+ {
+ return value;
+ }
+ else
+ {
+ return 0;
+ }
+}
+
+inline float getCmdLineArgumentFloat(const int argc, const char **argv, const char *string_ref)
+{
+ bool bFound = false;
+ float value = -1;
+
+ if (argc >= 1)
+ {
+ for (int i=1; i < argc; i++)
+ {
+ int string_start = stringRemoveDelimiter('-', argv[i]);
+ const char *string_argv = &argv[i][string_start];
+ int length = (int)strlen(string_ref);
+
+ if (!STRNCASECMP(string_argv, string_ref, length))
+ {
+ if (length+1 <= (int)strlen(string_argv))
+ {
+ int auto_inc = (string_argv[length] == '=') ? 1 : 0;
+ value = (float)atof(&string_argv[length + auto_inc]);
+ }
+ else
+ {
+ value = 0.f;
+ }
+
+ bFound = true;
+ continue;
+ }
+ }
+ }
+
+ if (bFound)
+ {
+ return value;
+ }
+ else
+ {
+ return 0;
+ }
+}
+
+inline bool getCmdLineArgumentString(const int argc, const char **argv,
+ const char *string_ref, char **string_retval)
+{
+ bool bFound = false;
+
+ if (argc >= 1)
+ {
+ for (int i=1; i < argc; i++)
+ {
+ int string_start = stringRemoveDelimiter('-', argv[i]);
+ char *string_argv = (char *)&argv[i][string_start];
+ int length = (int)strlen(string_ref);
+
+ if (!STRNCASECMP(string_argv, string_ref, length))
+ {
+ *string_retval = &string_argv[length+1];
+ bFound = true;
+ continue;
+ }
+ }
+ }
+
+ if (!bFound)
+ {
+ *string_retval = NULL;
+ }
+
+ return bFound;
+}
+
+//////////////////////////////////////////////////////////////////////////////
+//! Find the path for a file assuming that
+//! files are found in the searchPath.
+//!
+//! @return the path if succeeded, otherwise 0
+//! @param filename name of the file
+//! @param executable_path optional absolute path of the executable
+//////////////////////////////////////////////////////////////////////////////
+inline char *sdkFindFilePath(const char *filename, const char *executable_path)
+{
+ // <executable_name> defines a variable that is replaced with the name of the executable
+
+ // Typical relative search paths to locate needed companion files (e.g. sample input data, or JIT source files)
+ // The origin for the relative search may be the .exe file, a .bat file launching an .exe, a browser .exe launching the .exe or .bat, etc
+ const char *searchPath[] =
+ {
+ "./", // same dir
+ "./common/", // "/common/" subdir
+ "./common/data/", // "/common/data/" subdir
+ "./data/", // "/data/" subdir
+ "./src/", // "/src/" subdir
+ "./src/<executable_name>/data/", // "/src/<executable_name>/data/" subdir
+ "./inc/", // "/inc/" subdir
+ "./0_Simple/", // "/0_Simple/" subdir
+ "./1_Utilities/", // "/1_Utilities/" subdir
+ "./2_Graphics/", // "/2_Graphics/" subdir
+ "./3_Imaging/", // "/3_Imaging/" subdir
+ "./4_Finance/", // "/4_Finance/" subdir
+ "./5_Simulations/", // "/5_Simulations/" subdir
+ "./6_Advanced/", // "/6_Advanced/" subdir
+ "./7_CUDALibraries/", // "/7_CUDALibraries/" subdir
+ "./8_Android/", // "/8_Android/" subdir
+ "./samples/", // "/samples/" subdir
+
+ "./0_Simple/<executable_name>/data/", // "/0_Simple/<executable_name>/data/" subdir
+ "./1_Utilities/<executable_name>/data/", // "/1_Utilities/<executable_name>/data/" subdir
+ "./2_Graphics/<executable_name>/data/", // "/2_Graphics/<executable_name>/data/" subdir
+ "./3_Imaging/<executable_name>/data/", // "/3_Imaging/<executable_name>/data/" subdir
+ "./4_Finance/<executable_name>/data/", // "/4_Finance/<executable_name>/data/" subdir
+ "./5_Simulations/<executable_name>/data/", // "/5_Simulations/<executable_name>/data/" subdir
+ "./6_Advanced/<executable_name>/data/", // "/6_Advanced/<executable_name>/data/" subdir
+ "./7_CUDALibraries/<executable_name>/", // "/7_CUDALibraries/<executable_name>/" subdir
+ "./7_CUDALibraries/<executable_name>/data/", // "/7_CUDALibraries/<executable_name>/data/" subdir
+
+ "../", // up 1 in tree
+ "../common/", // up 1 in tree, "/common/" subdir
+ "../common/data/", // up 1 in tree, "/common/data/" subdir
+ "../data/", // up 1 in tree, "/data/" subdir
+ "../src/", // up 1 in tree, "/src/" subdir
+ "../inc/", // up 1 in tree, "/inc/" subdir
+
+ "../0_Simple/<executable_name>/data/", // up 1 in tree, "/0_Simple/<executable_name>/" subdir
+ "../1_Utilities/<executable_name>/data/", // up 1 in tree, "/1_Utilities/<executable_name>/" subdir
+ "../2_Graphics/<executable_name>/data/", // up 1 in tree, "/2_Graphics/<executable_name>/" subdir
+ "../3_Imaging/<executable_name>/data/", // up 1 in tree, "/3_Imaging/<executable_name>/" subdir
+ "../4_Finance/<executable_name>/data/", // up 1 in tree, "/4_Finance/<executable_name>/" subdir
+ "../5_Simulations/<executable_name>/data/", // up 1 in tree, "/5_Simulations/<executable_name>/" subdir
+ "../6_Advanced/<executable_name>/data/", // up 1 in tree, "/6_Advanced/<executable_name>/" subdir
+ "../7_CUDALibraries/<executable_name>/data/",// up 1 in tree, "/7_CUDALibraries/<executable_name>/" subdir
+ "../8_Android/<executable_name>/data/", // up 1 in tree, "/8_Android/<executable_name>/" subdir
+ "../samples/<executable_name>/data/", // up 1 in tree, "/samples/<executable_name>/" subdir
+ "../../", // up 2 in tree
+ "../../common/", // up 2 in tree, "/common/" subdir
+ "../../common/data/", // up 2 in tree, "/common/data/" subdir
+ "../../data/", // up 2 in tree, "/data/" subdir
+ "../../src/", // up 2 in tree, "/src/" subdir
+ "../../inc/", // up 2 in tree, "/inc/" subdir
+ "../../sandbox/<executable_name>/data/", // up 2 in tree, "/sandbox/<executable_name>/" subdir
+ "../../0_Simple/<executable_name>/data/", // up 2 in tree, "/0_Simple/<executable_name>/" subdir
+ "../../1_Utilities/<executable_name>/data/", // up 2 in tree, "/1_Utilities/<executable_name>/" subdir
+ "../../2_Graphics/<executable_name>/data/", // up 2 in tree, "/2_Graphics/<executable_name>/" subdir
+ "../../3_Imaging/<executable_name>/data/", // up 2 in tree, "/3_Imaging/<executable_name>/" subdir
+ "../../4_Finance/<executable_name>/data/", // up 2 in tree, "/4_Finance/<executable_name>/" subdir
+ "../../5_Simulations/<executable_name>/data/", // up 2 in tree, "/5_Simulations/<executable_name>/" subdir
+ "../../6_Advanced/<executable_name>/data/", // up 2 in tree, "/6_Advanced/<executable_name>/" subdir
+ "../../7_CUDALibraries/<executable_name>/data/", // up 2 in tree, "/7_CUDALibraries/<executable_name>/" subdir
+ "../../8_Android/<executable_name>/data/", // up 2 in tree, "/8_Android/<executable_name>/" subdir
+ "../../samples/<executable_name>/data/", // up 2 in tree, "/samples/<executable_name>/" subdir
+ "../../../", // up 3 in tree
+ "../../../src/<executable_name>/", // up 3 in tree, "/src/<executable_name>/" subdir
+ "../../../src/<executable_name>/data/", // up 3 in tree, "/src/<executable_name>/data/" subdir
+ "../../../src/<executable_name>/src/", // up 3 in tree, "/src/<executable_name>/src/" subdir
+ "../../../src/<executable_name>/inc/", // up 3 in tree, "/src/<executable_name>/inc/" subdir
+ "../../../sandbox/<executable_name>/", // up 3 in tree, "/sandbox/<executable_name>/" subdir
+ "../../../sandbox/<executable_name>/data/", // up 3 in tree, "/sandbox/<executable_name>/data/" subdir
+ "../../../sandbox/<executable_name>/src/", // up 3 in tree, "/sandbox/<executable_name>/src/" subdir
+ "../../../sandbox/<executable_name>/inc/", // up 3 in tree, "/sandbox/<executable_name>/inc/" subdir
+ "../../../0_Simple/<executable_name>/data/", // up 3 in tree, "/0_Simple/<executable_name>/" subdir
+ "../../../1_Utilities/<executable_name>/data/", // up 3 in tree, "/1_Utilities/<executable_name>/" subdir
+ "../../../2_Graphics/<executable_name>/data/", // up 3 in tree, "/2_Graphics/<executable_name>/" subdir
+ "../../../3_Imaging/<executable_name>/data/", // up 3 in tree, "/3_Imaging/<executable_name>/" subdir
+ "../../../4_Finance/<executable_name>/data/", // up 3 in tree, "/4_Finance/<executable_name>/" subdir
+ "../../../5_Simulations/<executable_name>/data/", // up 3 in tree, "/5_Simulations/<executable_name>/" subdir
+ "../../../6_Advanced/<executable_name>/data/", // up 3 in tree, "/6_Advanced/<executable_name>/" subdir
+ "../../../7_CUDALibraries/<executable_name>/data/", // up 3 in tree, "/7_CUDALibraries/<executable_name>/" subdir
+ "../../../8_Android/<executable_name>/data/", // up 3 in tree, "/8_Android/<executable_name>/" subdir
+ "../../../0_Simple/<executable_name>/", // up 3 in tree, "/0_Simple/<executable_name>/" subdir
+ "../../../1_Utilities/<executable_name>/", // up 3 in tree, "/1_Utilities/<executable_name>/" subdir
+ "../../../2_Graphics/<executable_name>/", // up 3 in tree, "/2_Graphics/<executable_name>/" subdir
+ "../../../3_Imaging/<executable_name>/", // up 3 in tree, "/3_Imaging/<executable_name>/" subdir
+ "../../../4_Finance/<executable_name>/", // up 3 in tree, "/4_Finance/<executable_name>/" subdir
+ "../../../5_Simulations/<executable_name>/", // up 3 in tree, "/5_Simulations/<executable_name>/" subdir
+ "../../../6_Advanced/<executable_name>/", // up 3 in tree, "/6_Advanced/<executable_name>/" subdir
+ "../../../7_CUDALibraries/<executable_name>/", // up 3 in tree, "/7_CUDALibraries/<executable_name>/" subdir
+ "../../../8_Android/<executable_name>/", // up 3 in tree, "/8_Android/<executable_name>/" subdir
+ "../../../samples/<executable_name>/data/", // up 3 in tree, "/samples/<executable_name>/" subdir
+ "../../../common/", // up 3 in tree, "../../../common/" subdir
+ "../../../common/data/", // up 3 in tree, "../../../common/data/" subdir
+ "../../../data/", // up 3 in tree, "../../../data/" subdir
+ "../../../../", // up 4 in tree
+ "../../../../src/<executable_name>/", // up 4 in tree, "/src/<executable_name>/" subdir
+ "../../../../src/<executable_name>/data/", // up 4 in tree, "/src/<executable_name>/data/" subdir
+ "../../../../src/<executable_name>/src/", // up 4 in tree, "/src/<executable_name>/src/" subdir
+ "../../../../src/<executable_name>/inc/", // up 4 in tree, "/src/<executable_name>/inc/" subdir
+ "../../../../sandbox/<executable_name>/", // up 4 in tree, "/sandbox/<executable_name>/" subdir
+ "../../../../sandbox/<executable_name>/data/", // up 4 in tree, "/sandbox/<executable_name>/data/" subdir
+ "../../../../sandbox/<executable_name>/src/", // up 4 in tree, "/sandbox/<executable_name>/src/" subdir
+ "../../../../sandbox/<executable_name>/inc/", // up 4 in tree, "/sandbox/<executable_name>/inc/" subdir
+ "../../../../0_Simple/<executable_name>/data/", // up 4 in tree, "/0_Simple/<executable_name>/" subdir
+ "../../../../1_Utilities/<executable_name>/data/", // up 4 in tree, "/1_Utilities/<executable_name>/" subdir
+ "../../../../2_Graphics/<executable_name>/data/", // up 4 in tree, "/2_Graphics/<executable_name>/" subdir
+ "../../../../3_Imaging/<executable_name>/data/", // up 4 in tree, "/3_Imaging/<executable_name>/" subdir
+ "../../../../4_Finance/<executable_name>/data/", // up 4 in tree, "/4_Finance/<executable_name>/" subdir
+ "../../../../5_Simulations/<executable_name>/data/",// up 4 in tree, "/5_Simulations/<executable_name>/" subdir
+ "../../../../6_Advanced/<executable_name>/data/", // up 4 in tree, "/6_Advanced/<executable_name>/" subdir
+ "../../../../7_CUDALibraries/<executable_name>/data/", // up 4 in tree, "/7_CUDALibraries/<executable_name>/" subdir
+ "../../../../8_Android/<executable_name>/data/", // up 4 in tree, "/8_Android/<executable_name>/" subdir
+ "../../../../0_Simple/<executable_name>/", // up 4 in tree, "/0_Simple/<executable_name>/" subdir
+ "../../../../1_Utilities/<executable_name>/", // up 4 in tree, "/1_Utilities/<executable_name>/" subdir
+ "../../../../2_Graphics/<executable_name>/", // up 4 in tree, "/2_Graphics/<executable_name>/" subdir
+ "../../../../3_Imaging/<executable_name>/", // up 4 in tree, "/3_Imaging/<executable_name>/" subdir
+ "../../../../4_Finance/<executable_name>/", // up 4 in tree, "/4_Finance/<executable_name>/" subdir
+ "../../../../5_Simulations/<executable_name>/",// up 4 in tree, "/5_Simulations/<executable_name>/" subdir
+ "../../../../6_Advanced/<executable_name>/", // up 4 in tree, "/6_Advanced/<executable_name>/" subdir
+ "../../../../7_CUDALibraries/<executable_name>/", // up 4 in tree, "/7_CUDALibraries/<executable_name>/" subdir
+ "../../../../8_Android/<executable_name>/", // up 4 in tree, "/8_Android/<executable_name>/" subdir
+ "../../../../samples/<executable_name>/data/", // up 4 in tree, "/samples/<executable_name>/" subdir
+ "../../../../common/", // up 4 in tree, "../../../common/" subdir
+ "../../../../common/data/", // up 4 in tree, "../../../common/data/" subdir
+ "../../../../data/", // up 4 in tree, "../../../data/" subdir
+ "../../../../../", // up 5 in tree
+ "../../../../../src/<executable_name>/", // up 5 in tree, "/src/<executable_name>/" subdir
+ "../../../../../src/<executable_name>/data/", // up 5 in tree, "/src/<executable_name>/data/" subdir
+ "../../../../../src/<executable_name>/src/", // up 5 in tree, "/src/<executable_name>/src/" subdir
+ "../../../../../src/<executable_name>/inc/", // up 5 in tree, "/src/<executable_name>/inc/" subdir
+ "../../../../../sandbox/<executable_name>/", // up 5 in tree, "/sandbox/<executable_name>/" subdir
+ "../../../../../sandbox/<executable_name>/data/", // up 5 in tree, "/sandbox/<executable_name>/data/" subdir
+ "../../../../../sandbox/<executable_name>/src/", // up 5 in tree, "/sandbox/<executable_name>/src/" subdir
+ "../../../../../sandbox/<executable_name>/inc/", // up 5 in tree, "/sandbox/<executable_name>/inc/" subdir
+ "../../../../../0_Simple/<executable_name>/data/", // up 5 in tree, "/0_Simple/<executable_name>/" subdir
+ "../../../../../1_Utilities/<executable_name>/data/", // up 5 in tree, "/1_Utilities/<executable_name>/" subdir
+ "../../../../../2_Graphics/<executable_name>/data/", // up 5 in tree, "/2_Graphics/<executable_name>/" subdir
+ "../../../../../3_Imaging/<executable_name>/data/", // up 5 in tree, "/3_Imaging/<executable_name>/" subdir
+ "../../../../../4_Finance/<executable_name>/data/", // up 5 in tree, "/4_Finance/<executable_name>/" subdir
+ "../../../../../5_Simulations/<executable_name>/data/",// up 5 in tree, "/5_Simulations/<executable_name>/" subdir
+ "../../../../../6_Advanced/<executable_name>/data/", // up 5 in tree, "/6_Advanced/<executable_name>/" subdir
+ "../../../../../7_CUDALibraries/<executable_name>/data/", // up 5 in tree, "/7_CUDALibraries/<executable_name>/" subdir
+ "../../../../../8_Android/<executable_name>/data/", // up 5 in tree, "/8_Android/<executable_name>/" subdir
+ "../../../../../samples/<executable_name>/data/", // up 5 in tree, "/samples/<executable_name>/" subdir
+ "../../../../../common/", // up 5 in tree, "../../../common/" subdir
+ "../../../../../common/data/", // up 5 in tree, "../../../common/data/" subdir
+ };
+
+ // Extract the executable name
+ std::string executable_name;
+
+ if (executable_path != 0)
+ {
+ executable_name = std::string(executable_path);
+
+#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
+ // Windows path delimiter
+ size_t delimiter_pos = executable_name.find_last_of('\\');
+ executable_name.erase(0, delimiter_pos + 1);
+
+ if (executable_name.rfind(".exe") != std::string::npos)
+ {
+ // we strip .exe, only if the .exe is found
+ executable_name.resize(executable_name.size() - 4);
+ }
+
+#else
+ // Linux & OSX path delimiter
+ size_t delimiter_pos = executable_name.find_last_of('/');
+ executable_name.erase(0,delimiter_pos+1);
+#endif
+ }
+
+ // Loop over all search paths and return the first hit
+ for (unsigned int i = 0; i < sizeof(searchPath)/sizeof(char *); ++i)
+ {
+ std::string path(searchPath[i]);
+ size_t executable_name_pos = path.find("<executable_name>");
+
+ // If there is executable_name variable in the searchPath
+ // replace it with the value
+ if (executable_name_pos != std::string::npos)
+ {
+ if (executable_path != 0)
+ {
+ path.replace(executable_name_pos, strlen("<executable_name>"), executable_name);
+ }
+ else
+ {
+ // Skip this path entry if no executable argument is given
+ continue;
+ }
+ }
+
+#ifdef _DEBUG
+ printf("sdkFindFilePath <%s> in %s\n", filename, path.c_str());
+#endif
+
+ // Test if the file exists
+ path.append(filename);
+ FILE *fp;
+ FOPEN(fp, path.c_str(), "rb");
+
+ if (fp != NULL)
+ {
+ fclose(fp);
+ // File found
+ // returning an allocated array here for backwards compatibility reasons
+ char *file_path = (char *) malloc(path.length() + 1);
+ STRCPY(file_path, path.length() + 1, path.c_str());
+ return file_path;
+ }
+
+ if (fp)
+ {
+ fclose(fp);
+ }
+ }
+
+ // File not found
+ return 0;
+}
+
+#endif
diff --git a/gohumantrack/gohumantrack.go b/gohumantrack/gohumantrack.go
index b0a8198..8c65fc8 100644
--- a/gohumantrack/gohumantrack.go
+++ b/gohumantrack/gohumantrack.go
@@ -17,12 +17,15 @@
void *create_batch_image(const int size){
c_img *imgs = (c_img*)malloc(size * sizeof(c_img));
+ for(int i = 0; i < size; i++){
+ imgs[i].data_ = NULL;
+ }
return imgs;
}
int fill_images(void *imgs, const int size, const int index, void *data, const int w, const int h, const int c){
if(!imgs || !data || size <= index) return -1;
c_img *images = (c_img*)imgs;
- images[index].data_ = data;
+ images[index].data_ = (unsigned char*)data;
images[index].w_ = w;
images[index].h_ = h;
images[index].c_ = c;
@@ -36,9 +39,9 @@
}
return ret;
}
-void *process(void *handle, void *imgs, const int size){
+void *process(void *handle, void *imgs, const int size, void *result){
c_img *images = (c_img*)imgs;
- c_fgRet *res = init_fgres(size);
+ c_fgRet *res = (c_fgRet *)result;
int ret = c_human_tracker_process(handle, images, size, res);
if (ret != 0)
return NULL;
@@ -95,6 +98,7 @@
// HumanTracker struct
type HumanTracker struct {
handle unsafe.Pointer
+ result unsafe.Pointer
batchSize int
}
@@ -105,7 +109,8 @@
}
p := C.c_human_tracker_create(C.int(gpu), C.int(batchSize), C.int(flag))
if p != nil {
- return &HumanTracker{p, batchSize}
+ res := C.init_fgres(C.int(batchSize))
+ return &HumanTracker{p, res, batchSize}
}
return nil
}
@@ -114,6 +119,9 @@
func (h *HumanTracker) Free() {
if h.handle != nil {
C.c_human_tracker_release(&h.handle)
+ }
+ if h.result != nil {
+ C.free(h.result)
}
}
@@ -146,11 +154,10 @@
}
}
- cRet := C.process(h.handle, cImgs, C.int(h.batchSize))
+ cRet := C.process(h.handle, cImgs, C.int(h.batchSize), h.result)
if cRet == nil {
return nil, errors.New("create C results error")
}
- defer C.free(unsafe.Pointer(cRet))
var result []FgResult
p := uintptr(cRet)
diff --git a/main.go b/main.go
index 411172c..44c902a 100644
--- a/main.go
+++ b/main.go
@@ -6,7 +6,7 @@
"time"
"track/gohumantrack"
-
+ "track/goconv"
"basic.com/valib/goffmpeg.git"
)
@@ -19,6 +19,54 @@
flag.StringVar(&url1, "cam1", "", "url")
flag.StringVar(&url2, "cam2", "", "url")
}
+
+func run(cam1, cam2 *goffmpeg.GoFFMPEG, tracker *gohumantrack.HumanTracker) bool {
+ data1, ow1, oh1, _ := cam1.GetYUV()
+ data2, ow2, oh2, _ := cam2.GetYUV()
+ if ow1 > 0 && oh1 > 0 && ow2 > 0 && oh2 > 0 {
+ bgr1 := goconv.YUV2BGR(data1, ow1, oh1)
+ bgr2 := goconv.YUV2BGR(data2, ow2, oh2)
+ if bgr1 == nil || bgr2 == nil{
+ return false
+ }
+ var images []gohumantrack.ImageHumanTracker
+ img := gohumantrack.ImageHumanTracker{
+ Data: bgr1,
+ Width: ow1,
+ Height: oh1,
+ Channel: 3,
+ }
+ images = append(images, img)
+ img = gohumantrack.ImageHumanTracker{
+ Data: bgr2,
+ Width: ow2,
+ Height: oh2,
+ Channel: 3,
+ }
+ images = append(images, img)
+ res, err := tracker.Process(images)
+ if err == nil {
+ for _, v := range res {
+ fmt.Printf("result size: %d\n", v.FgNum)
+ for i := 0; i < int(v.FgNum); i++ {
+ r := v.Fginfo[i]
+ if r.Confidence > 0 {
+ fmt.Printf(" Index %d Rect: %dx%dx%dx%d", i, r.Left, r.Top, r.Right, r.Bottom)
+ fmt.Printf(" Confidence: %f", r.Confidence*100)
+ fmt.Printf(" Center: %dx%d", r.X, r.Y)
+ fmt.Printf(" ID: %d\n", r.ID)
+ }
+ }
+
+ }
+ } else {
+ fmt.Println("process error: ", err)
+ }
+ return true
+ }
+ return false
+}
+
func main() {
flag.Parse()
fmt.Println("url1: ", url1, " url2: ", url2)
@@ -42,45 +90,7 @@
cam2.CloseStream()
for {
- data1, ow1, oh1, _ := cam1.GetYUV()
- data2, ow2, oh2, _ := cam2.GetYUV()
- if ow1 > 0 && oh1 > 0 && ow2 > 0 && oh2 > 0 {
-
- var images []gohumantrack.ImageHumanTracker
- img := gohumantrack.ImageHumanTracker{
- Data: data1,
- Width: ow1,
- Height: oh1,
- Channel: 3,
- }
- images = append(images, img)
- img = gohumantrack.ImageHumanTracker{
- Data: data2,
- Width: ow2,
- Height: oh2,
- Channel: 3,
- }
- images = append(images, img)
- res, err := tracker.Process(images)
- if err == nil {
- for _, v := range res {
- fmt.Printf("result size: %d\n", v.FgNum)
- for i := 0; i < int(v.FgNum); i++ {
- r := v.Fginfo[i]
- if r.Confidence > 0 {
- fmt.Printf(" Index %d Rect: %dx%dx%dx%d", i, r.Left, r.Top, r.Right, r.Bottom)
- fmt.Printf(" Confidence: %f", r.Confidence*100)
- fmt.Printf(" Center: %dx%d", r.X, r.Y)
- fmt.Printf(" ID: %d\n", r.ID)
- }
- }
-
- }
- } else {
- fmt.Println("process error: ", err)
- }
- } else {
- // fmt.Println("cam1 size: ", ow1, "x", oh1, " cam2 size: ", ow2, "x", oh2)
+ if !run(cam1, cam2, tracker){
time.Sleep(38 * time.Millisecond)
}
diff --git a/runtime/libcffmpeg.so b/runtime/libcffmpeg.so
index 707acbb..3989dea 100755
--- a/runtime/libcffmpeg.so
+++ b/runtime/libcffmpeg.so
Binary files differ
--
Gitblit v1.8.0