From 7e6f863a5928b8481975e9fdf6460dde2c0b14bf Mon Sep 17 00:00:00 2001 From: zhangmeng <775834166@qq.com> Date: 星期六, 19 十月 2019 14:29:39 +0800 Subject: [PATCH] remove files --- /dev/null | 54 --------------------------- goconv.go | 7 --- 2 files changed, 0 insertions(+), 61 deletions(-) diff --git a/csrc/gpu-conv/CUDALERP.cu b/csrc/gpu-conv/CUDALERP.cu deleted file mode 100644 index ee44fee..0000000 --- a/csrc/gpu-conv/CUDALERP.cu +++ /dev/null @@ -1,95 +0,0 @@ -/******************************************************************* -* CUDALERP.cu -* CUDALERP -* -* Author: Kareem Omar -* kareem.omar@uah.edu -* https://github.com/komrad36 -* -* Last updated Jan 7, 2016 -*******************************************************************/ -// -// The file CUDALERP.h exposes two extremely high performance GPU -// resize operations, -// CUDALERP (bilinear interpolation), and -// CUDANERP (nearest neighbor interpolation), for 8-bit unsigned -// integer (i.e. grayscale) data. -// -// For 32-bit float data, see the CUDAFLERP project instead. -// -// CUDALERP offers superior accuracy to CUDA's built-in texture -// interpolator at comparable performance. The accuracy if compiled -// with -use-fast-math off is nearly equivalent to my CPU interpolator, -// KLERP, while still being as fast as the built-in interpolation. -// -// Particularly for large images, CUDALERP dramatically outperforms -// even the highly tuned CPU AVX2 versions. -// -// All functionality is contained in the header 'CUDALERP.h' and -// the source file 'CUDALERP.cu' and has no external dependencies at all. -// -// Note that these are intended for computer vision use(hence the speed) -// and are designed for grayscale images. -// -// The file 'main.cpp' is an example and speed test driver. -// - -#include "CUDALERP.h" - -__global__ void -#ifndef __INTELLISENSE__ -__launch_bounds__(256, 0) -#endif -CUDANERP_kernel(const cudaTextureObject_t d_img_tex, const float gxs, const float gys, uint8_t* __restrict const d_out, const int neww) { - uint32_t x = (blockIdx.x << 9) + (threadIdx.x << 1); - const uint32_t y = blockIdx.y; - const float fy = y*gys; -#pragma unroll - for (int i = 0; i < 2; ++i, ++x) { - const float fx = x*gxs; - float res = 255.0f*tex2D<float>(d_img_tex, fx, fy); - if (x < neww) d_out[y*neww + x] = res; - } -} - -__global__ void -#ifndef __INTELLISENSE__ -__launch_bounds__(256, 0) -#endif -CUDALERP_kernel(const cudaTextureObject_t d_img_tex, const float gxs, const float gys, uint8_t* __restrict const d_out, const int neww) { - uint32_t x = (blockIdx.x << 9) + (threadIdx.x << 1); - const uint32_t y = blockIdx.y; - const float fy = (y + 0.5f)*gys - 0.5f; - const float wt_y = fy - floor(fy); - const float invwt_y = 1.0f - wt_y; -#pragma unroll - for (int i = 0; i < 2; ++i, ++x) { - const float fx = (x + 0.5f)*gxs - 0.5f; - // less accurate and not really much (or any) faster - // ----------------- - // const float res = tex2D<float>(d_img_tex, fx, fy); - // ----------------- - const float4 f = tex2Dgather<float4>(d_img_tex, fx + 0.5f, fy + 0.5f); - const float wt_x = fx - floor(fx); - const float invwt_x = 1.0f - wt_x; - const float xa = invwt_x*f.w + wt_x*f.z; - const float xb = invwt_x*f.x + wt_x*f.y; - const float res = 255.0f*(invwt_y*xa + wt_y*xb) + 0.5f; - // ----------------- - if (x < neww) d_out[y*neww + x] = res; - } -} - -void CUDANERP(const cudaTextureObject_t d_img_tex, const int oldw, const int oldh, uint8_t* __restrict const d_out, const uint32_t neww, const uint32_t newh) { - const float gxs = static_cast<float>(oldw) / static_cast<float>(neww); - const float gys = static_cast<float>(oldh) / static_cast<float>(newh); - CUDANERP_kernel<<<{((neww - 1) >> 9) + 1, newh}, 256>>>(d_img_tex, gxs, gys, d_out, neww); - cudaDeviceSynchronize(); -} - -void CUDALERP(const cudaTextureObject_t d_img_tex, const int oldw, const int oldh, uint8_t* __restrict const d_out, const uint32_t neww, const uint32_t newh) { - const float gxs = static_cast<float>(oldw) / static_cast<float>(neww); - const float gys = static_cast<float>(oldh) / static_cast<float>(newh); - CUDALERP_kernel<<<{((neww - 1) >> 9) + 1, newh}, 256>>>(d_img_tex, gxs, gys, d_out, neww); - cudaDeviceSynchronize(); -} diff --git a/csrc/gpu-conv/CUDALERP.h b/csrc/gpu-conv/CUDALERP.h deleted file mode 100644 index 1645cb9..0000000 --- a/csrc/gpu-conv/CUDALERP.h +++ /dev/null @@ -1,54 +0,0 @@ -/******************************************************************* -* CUDALERP.h -* CUDALERP -* -* Author: Kareem Omar -* kareem.omar@uah.edu -* https://github.com/komrad36 -* -* Last updated Jan 7, 2016 -*******************************************************************/ -// -// The file CUDALERP.h exposes two extremely high performance GPU -// resize operations, -// CUDALERP (bilinear interpolation), and -// CUDANERP (nearest neighbor interpolation), for 8-bit unsigned -// integer (i.e. grayscale) data. -// -// For 32-bit float data, see the CUDAFLERP project instead. -// -// CUDALERP offers superior accuracy to CUDA's built-in texture -// interpolator at comparable performance. The accuracy if compiled -// with -use-fast-math off is nearly equivalent to my CPU interpolator, -// KLERP, while still being as fast as the built-in interpolation. -// -// Particularly for large images, CUDALERP dramatically outperforms -// even the highly tuned CPU AVX2 versions. -// -// All functionality is contained in the header 'CUDALERP.h' and -// the source file 'CUDALERP.cu' and has no external dependencies at all. -// -// Note that these are intended for computer vision use(hence the speed) -// and are designed for grayscale images. -// -// The file 'main.cpp' is an example and speed test driver. -// - -#pragma once - -#include "cuda_runtime.h" - -#include <cstdint> - -#ifdef __INTELLISENSE__ -#include <algorithm> -#define asm(x) -#include "device_launch_parameters.h" -#define __CUDACC__ -#include "device_functions.h" -#undef __CUDACC__ -#endif - -void CUDALERP(const cudaTextureObject_t d_img_tex, const int oldw, const int oldh, uint8_t* __restrict const d_out, const uint32_t neww, const uint32_t newh); - -void CUDANERP(const cudaTextureObject_t d_img_tex, const int oldw, const int oldh, uint8_t* __restrict const d_out, const uint32_t neww, const uint32_t newh); diff --git a/goconv.go b/goconv.go index b81ec36..55869ba 100644 --- a/goconv.go +++ b/goconv.go @@ -121,10 +121,3 @@ } /////////////// for conv - -// ConvGPU conv gpu resize -func ConvGPU(in []byte, w, h, dstW, dstH int) []byte { - - return nil - -} -- Gitblit v1.8.0