/*******************************************************************
|
* CUDALERP.h
|
* CUDALERP
|
*
|
* Author: Kareem Omar
|
* kareem.omar@uah.edu
|
* https://github.com/komrad36
|
*
|
* Last updated Jan 7, 2016
|
*******************************************************************/
|
//
|
// The file CUDALERP.h exposes two extremely high performance GPU
|
// resize operations,
|
// CUDALERP (bilinear interpolation), and
|
// CUDANERP (nearest neighbor interpolation), for 8-bit unsigned
|
// integer (i.e. grayscale) data.
|
//
|
// For 32-bit float data, see the CUDAFLERP project instead.
|
//
|
// CUDALERP offers superior accuracy to CUDA's built-in texture
|
// interpolator at comparable performance. The accuracy if compiled
|
// with -use-fast-math off is nearly equivalent to my CPU interpolator,
|
// KLERP, while still being as fast as the built-in interpolation.
|
//
|
// Particularly for large images, CUDALERP dramatically outperforms
|
// even the highly tuned CPU AVX2 versions.
|
//
|
// All functionality is contained in the header 'CUDALERP.h' and
|
// the source file 'CUDALERP.cu' and has no external dependencies at all.
|
//
|
// Note that these are intended for computer vision use(hence the speed)
|
// and are designed for grayscale images.
|
//
|
// The file 'main.cpp' is an example and speed test driver.
|
//
|
|
#pragma once
|
|
#include "cuda_runtime.h"
|
|
#include <cstdint>
|
|
#ifdef __INTELLISENSE__
|
#include <algorithm>
|
#define asm(x)
|
#include "device_launch_parameters.h"
|
#define __CUDACC__
|
#include "device_functions.h"
|
#undef __CUDACC__
|
#endif
|
|
void CUDALERP(const cudaTextureObject_t d_img_tex, const int oldw, const int oldh, uint8_t* __restrict const d_out, const uint32_t neww, const uint32_t newh);
|
|
void CUDANERP(const cudaTextureObject_t d_img_tex, const int oldw, const int oldh, uint8_t* __restrict const d_out, const uint32_t neww, const uint32_t newh);
|