/******************************************************************* * CUDALERP.h * CUDALERP * * Author: Kareem Omar * kareem.omar@uah.edu * https://github.com/komrad36 * * Last updated Jan 7, 2016 *******************************************************************/ // // The file CUDALERP.h exposes two extremely high performance GPU // resize operations, // CUDALERP (bilinear interpolation), and // CUDANERP (nearest neighbor interpolation), for 8-bit unsigned // integer (i.e. grayscale) data. // // For 32-bit float data, see the CUDAFLERP project instead. // // CUDALERP offers superior accuracy to CUDA's built-in texture // interpolator at comparable performance. The accuracy if compiled // with -use-fast-math off is nearly equivalent to my CPU interpolator, // KLERP, while still being as fast as the built-in interpolation. // // Particularly for large images, CUDALERP dramatically outperforms // even the highly tuned CPU AVX2 versions. // // All functionality is contained in the header 'CUDALERP.h' and // the source file 'CUDALERP.cu' and has no external dependencies at all. // // Note that these are intended for computer vision use(hence the speed) // and are designed for grayscale images. // // The file 'main.cpp' is an example and speed test driver. // #pragma once #include "cuda_runtime.h" #include #ifdef __INTELLISENSE__ #include #define asm(x) #include "device_launch_parameters.h" #define __CUDACC__ #include "device_functions.h" #undef __CUDACC__ #endif void CUDALERP(const cudaTextureObject_t d_img_tex, const int oldw, const int oldh, uint8_t* __restrict const d_out, const uint32_t neww, const uint32_t newh); void CUDANERP(const cudaTextureObject_t d_img_tex, const int oldw, const int oldh, uint8_t* __restrict const d_out, const uint32_t neww, const uint32_t newh);