From 168af40fe9a3cc81c6ee16b3e81f154780c36bdb Mon Sep 17 00:00:00 2001 From: Scheaven <xuepengqiang> Date: 星期四, 03 六月 2021 15:03:27 +0800 Subject: [PATCH] up new v4 --- lib/detecter_tools/darknet/crop_layer_kernels.cu | 444 +++++++++++++++++++++++++++--------------------------- 1 files changed, 222 insertions(+), 222 deletions(-) diff --git a/lib/detecter_tools/darknet/crop_layer_kernels.cu b/lib/detecter_tools/darknet/crop_layer_kernels.cu index f684a80..85783bc 100644 --- a/lib/detecter_tools/darknet/crop_layer_kernels.cu +++ b/lib/detecter_tools/darknet/crop_layer_kernels.cu @@ -1,222 +1,222 @@ -#include <cuda_runtime.h> -#include <curand.h> -#include <cublas_v2.h> - -#include "crop_layer.h" -#include "utils.h" -#include "dark_cuda.h" -#include "image.h" - -__device__ float get_pixel_kernel(float *image, int w, int h, int x, int y, int c) -{ - if(x < 0 || x >= w || y < 0 || y >= h) return 0; - return image[x + w*(y + c*h)]; -} - -__device__ float3 rgb_to_hsv_kernel(float3 rgb) -{ - float r = rgb.x; - float g = rgb.y; - float b = rgb.z; - - float h, s, v; - float max = (r > g) ? ( (r > b) ? r : b) : ( (g > b) ? g : b); - float min = (r < g) ? ( (r < b) ? r : b) : ( (g < b) ? g : b); - float delta = max - min; - v = max; - if(max == 0){ - s = 0; - h = -1; - }else{ - s = delta/max; - if(r == max){ - h = (g - b) / delta; - } else if (g == max) { - h = 2 + (b - r) / delta; - } else { - h = 4 + (r - g) / delta; - } - if (h < 0) h += 6; - } - return make_float3(h, s, v); -} - -__device__ float3 hsv_to_rgb_kernel(float3 hsv) -{ - float h = hsv.x; - float s = hsv.y; - float v = hsv.z; - - float r, g, b; - float f, p, q, t; - - if (s == 0) { - r = g = b = v; - } else { - int index = (int) floorf(h); - f = h - index; - p = v*(1-s); - q = v*(1-s*f); - t = v*(1-s*(1-f)); - if(index == 0){ - r = v; g = t; b = p; - } else if(index == 1){ - r = q; g = v; b = p; - } else if(index == 2){ - r = p; g = v; b = t; - } else if(index == 3){ - r = p; g = q; b = v; - } else if(index == 4){ - r = t; g = p; b = v; - } else { - r = v; g = p; b = q; - } - } - r = (r < 0) ? 0 : ((r > 1) ? 1 : r); - g = (g < 0) ? 0 : ((g > 1) ? 1 : g); - b = (b < 0) ? 0 : ((b > 1) ? 1 : b); - return make_float3(r, g, b); -} - -__device__ float bilinear_interpolate_kernel(float *image, int w, int h, float x, float y, int c) -{ - int ix = (int) floorf(x); - int iy = (int) floorf(y); - - float dx = x - ix; - float dy = y - iy; - - float val = (1-dy) * (1-dx) * get_pixel_kernel(image, w, h, ix, iy, c) + - dy * (1-dx) * get_pixel_kernel(image, w, h, ix, iy+1, c) + - (1-dy) * dx * get_pixel_kernel(image, w, h, ix+1, iy, c) + - dy * dx * get_pixel_kernel(image, w, h, ix+1, iy+1, c); - return val; -} - -__global__ void levels_image_kernel(float *image, float *rand, int batch, int w, int h, int train, float saturation, float exposure, float translate, float scale, float shift) -{ - int size = batch * w * h; - int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - if(id >= size) return; - int x = id % w; - id /= w; - int y = id % h; - id /= h; - float rshift = rand[0]; - float gshift = rand[1]; - float bshift = rand[2]; - float r0 = rand[8*id + 0]; - float r1 = rand[8*id + 1]; - float r2 = rand[8*id + 2]; - float r3 = rand[8*id + 3]; - - saturation = r0*(saturation - 1) + 1; - saturation = (r1 > .5) ? 1./saturation : saturation; - exposure = r2*(exposure - 1) + 1; - exposure = (r3 > .5) ? 1./exposure : exposure; - - size_t offset = id * h * w * 3; - image += offset; - float r = image[x + w*(y + h*0)]; - float g = image[x + w*(y + h*1)]; - float b = image[x + w*(y + h*2)]; - float3 rgb = make_float3(r,g,b); - if(train){ - float3 hsv = rgb_to_hsv_kernel(rgb); - hsv.y *= saturation; - hsv.z *= exposure; - rgb = hsv_to_rgb_kernel(hsv); - } else { - shift = 0; - } - image[x + w*(y + h*0)] = rgb.x*scale + translate + (rshift - .5)*shift; - image[x + w*(y + h*1)] = rgb.y*scale + translate + (gshift - .5)*shift; - image[x + w*(y + h*2)] = rgb.z*scale + translate + (bshift - .5)*shift; -} - -__global__ void forward_crop_layer_kernel(float *input, float *rand, int size, int c, int h, int w, int crop_height, int crop_width, int train, int flip, float angle, float *output) -{ - int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; - if(id >= size) return; - - float cx = w/2.; - float cy = h/2.; - - int count = id; - int j = id % crop_width; - id /= crop_width; - int i = id % crop_height; - id /= crop_height; - int k = id % c; - id /= c; - int b = id; - - float r4 = rand[8*b + 4]; - float r5 = rand[8*b + 5]; - float r6 = rand[8*b + 6]; - float r7 = rand[8*b + 7]; - - float dw = (w - crop_width)*r4; - float dh = (h - crop_height)*r5; - flip = (flip && (r6 > .5)); - angle = 2*angle*r7 - angle; - if(!train){ - dw = (w - crop_width)/2.; - dh = (h - crop_height)/2.; - flip = 0; - angle = 0; - } - - input += w*h*c*b; - - float x = (flip) ? w - dw - j - 1 : j + dw; - float y = i + dh; - - float rx = cos(angle)*(x-cx) - sin(angle)*(y-cy) + cx; - float ry = sin(angle)*(x-cx) + cos(angle)*(y-cy) + cy; - - output[count] = bilinear_interpolate_kernel(input, w, h, rx, ry, k); -} - -extern "C" void forward_crop_layer_gpu(crop_layer layer, network_state state) -{ - cuda_random(layer.rand_gpu, layer.batch*8); - - float radians = layer.angle*3.14159265/180.; - - float scale = 2; - float translate = -1; - if(layer.noadjust){ - scale = 1; - translate = 0; - } - - int size = layer.batch * layer.w * layer.h; - - levels_image_kernel<<<cuda_gridsize(size), BLOCK, 0, get_cuda_stream() >>>(state.input, layer.rand_gpu, layer.batch, layer.w, layer.h, state.train, layer.saturation, layer.exposure, translate, scale, layer.shift); - CHECK_CUDA(cudaPeekAtLastError()); - - size = layer.batch*layer.c*layer.out_w*layer.out_h; - - forward_crop_layer_kernel<<<cuda_gridsize(size), BLOCK, 0, get_cuda_stream() >>>(state.input, layer.rand_gpu, size, layer.c, layer.h, layer.w, layer.out_h, layer.out_w, state.train, layer.flip, radians, layer.output_gpu); - CHECK_CUDA(cudaPeekAtLastError()); - -/* - cuda_pull_array(layer.output_gpu, layer.output, size); - image im = float_to_image(layer.crop_width, layer.crop_height, layer.c, layer.output + 0*(size/layer.batch)); - image im2 = float_to_image(layer.crop_width, layer.crop_height, layer.c, layer.output + 1*(size/layer.batch)); - image im3 = float_to_image(layer.crop_width, layer.crop_height, layer.c, layer.output + 2*(size/layer.batch)); - - translate_image(im, -translate); - scale_image(im, 1/scale); - translate_image(im2, -translate); - scale_image(im2, 1/scale); - translate_image(im3, -translate); - scale_image(im3, 1/scale); - - show_image(im, "cropped"); - show_image(im2, "cropped2"); - show_image(im3, "cropped3"); - cvWaitKey(0); - */ -} +#include <cuda_runtime.h> +#include <curand.h> +#include <cublas_v2.h> + +#include "crop_layer.h" +#include "utils.h" +#include "dark_cuda.h" +#include "image.h" + +__device__ float get_pixel_kernel(float *image, int w, int h, int x, int y, int c) +{ + if(x < 0 || x >= w || y < 0 || y >= h) return 0; + return image[x + w*(y + c*h)]; +} + +__device__ float3 rgb_to_hsv_kernel(float3 rgb) +{ + float r = rgb.x; + float g = rgb.y; + float b = rgb.z; + + float h, s, v; + float max = (r > g) ? ( (r > b) ? r : b) : ( (g > b) ? g : b); + float min = (r < g) ? ( (r < b) ? r : b) : ( (g < b) ? g : b); + float delta = max - min; + v = max; + if(max == 0){ + s = 0; + h = -1; + }else{ + s = delta/max; + if(r == max){ + h = (g - b) / delta; + } else if (g == max) { + h = 2 + (b - r) / delta; + } else { + h = 4 + (r - g) / delta; + } + if (h < 0) h += 6; + } + return make_float3(h, s, v); +} + +__device__ float3 hsv_to_rgb_kernel(float3 hsv) +{ + float h = hsv.x; + float s = hsv.y; + float v = hsv.z; + + float r, g, b; + float f, p, q, t; + + if (s == 0) { + r = g = b = v; + } else { + int index = (int) floorf(h); + f = h - index; + p = v*(1-s); + q = v*(1-s*f); + t = v*(1-s*(1-f)); + if(index == 0){ + r = v; g = t; b = p; + } else if(index == 1){ + r = q; g = v; b = p; + } else if(index == 2){ + r = p; g = v; b = t; + } else if(index == 3){ + r = p; g = q; b = v; + } else if(index == 4){ + r = t; g = p; b = v; + } else { + r = v; g = p; b = q; + } + } + r = (r < 0) ? 0 : ((r > 1) ? 1 : r); + g = (g < 0) ? 0 : ((g > 1) ? 1 : g); + b = (b < 0) ? 0 : ((b > 1) ? 1 : b); + return make_float3(r, g, b); +} + +__device__ float bilinear_interpolate_kernel(float *image, int w, int h, float x, float y, int c) +{ + int ix = (int) floorf(x); + int iy = (int) floorf(y); + + float dx = x - ix; + float dy = y - iy; + + float val = (1-dy) * (1-dx) * get_pixel_kernel(image, w, h, ix, iy, c) + + dy * (1-dx) * get_pixel_kernel(image, w, h, ix, iy+1, c) + + (1-dy) * dx * get_pixel_kernel(image, w, h, ix+1, iy, c) + + dy * dx * get_pixel_kernel(image, w, h, ix+1, iy+1, c); + return val; +} + +__global__ void levels_image_kernel(float *image, float *rand, int batch, int w, int h, int train, float saturation, float exposure, float translate, float scale, float shift) +{ + int size = batch * w * h; + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(id >= size) return; + int x = id % w; + id /= w; + int y = id % h; + id /= h; + float rshift = rand[0]; + float gshift = rand[1]; + float bshift = rand[2]; + float r0 = rand[8*id + 0]; + float r1 = rand[8*id + 1]; + float r2 = rand[8*id + 2]; + float r3 = rand[8*id + 3]; + + saturation = r0*(saturation - 1) + 1; + saturation = (r1 > .5) ? 1./saturation : saturation; + exposure = r2*(exposure - 1) + 1; + exposure = (r3 > .5) ? 1./exposure : exposure; + + size_t offset = id * h * w * 3; + image += offset; + float r = image[x + w*(y + h*0)]; + float g = image[x + w*(y + h*1)]; + float b = image[x + w*(y + h*2)]; + float3 rgb = make_float3(r,g,b); + if(train){ + float3 hsv = rgb_to_hsv_kernel(rgb); + hsv.y *= saturation; + hsv.z *= exposure; + rgb = hsv_to_rgb_kernel(hsv); + } else { + shift = 0; + } + image[x + w*(y + h*0)] = rgb.x*scale + translate + (rshift - .5)*shift; + image[x + w*(y + h*1)] = rgb.y*scale + translate + (gshift - .5)*shift; + image[x + w*(y + h*2)] = rgb.z*scale + translate + (bshift - .5)*shift; +} + +__global__ void forward_crop_layer_kernel(float *input, float *rand, int size, int c, int h, int w, int crop_height, int crop_width, int train, int flip, float angle, float *output) +{ + int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; + if(id >= size) return; + + float cx = w/2.; + float cy = h/2.; + + int count = id; + int j = id % crop_width; + id /= crop_width; + int i = id % crop_height; + id /= crop_height; + int k = id % c; + id /= c; + int b = id; + + float r4 = rand[8*b + 4]; + float r5 = rand[8*b + 5]; + float r6 = rand[8*b + 6]; + float r7 = rand[8*b + 7]; + + float dw = (w - crop_width)*r4; + float dh = (h - crop_height)*r5; + flip = (flip && (r6 > .5)); + angle = 2*angle*r7 - angle; + if(!train){ + dw = (w - crop_width)/2.; + dh = (h - crop_height)/2.; + flip = 0; + angle = 0; + } + + input += w*h*c*b; + + float x = (flip) ? w - dw - j - 1 : j + dw; + float y = i + dh; + + float rx = cos(angle)*(x-cx) - sin(angle)*(y-cy) + cx; + float ry = sin(angle)*(x-cx) + cos(angle)*(y-cy) + cy; + + output[count] = bilinear_interpolate_kernel(input, w, h, rx, ry, k); +} + +extern "C" void forward_crop_layer_gpu(crop_layer layer, network_state state) +{ + cuda_random(layer.rand_gpu, layer.batch*8); + + float radians = layer.angle*3.14159265/180.; + + float scale = 2; + float translate = -1; + if(layer.noadjust){ + scale = 1; + translate = 0; + } + + int size = layer.batch * layer.w * layer.h; + + levels_image_kernel<<<cuda_gridsize(size), BLOCK, 0, get_cuda_stream() >>>(state.input, layer.rand_gpu, layer.batch, layer.w, layer.h, state.train, layer.saturation, layer.exposure, translate, scale, layer.shift); + CHECK_CUDA(cudaPeekAtLastError()); + + size = layer.batch*layer.c*layer.out_w*layer.out_h; + + forward_crop_layer_kernel<<<cuda_gridsize(size), BLOCK, 0, get_cuda_stream() >>>(state.input, layer.rand_gpu, size, layer.c, layer.h, layer.w, layer.out_h, layer.out_w, state.train, layer.flip, radians, layer.output_gpu); + CHECK_CUDA(cudaPeekAtLastError()); + +/* + cuda_pull_array(layer.output_gpu, layer.output, size); + image im = float_to_image(layer.crop_width, layer.crop_height, layer.c, layer.output + 0*(size/layer.batch)); + image im2 = float_to_image(layer.crop_width, layer.crop_height, layer.c, layer.output + 1*(size/layer.batch)); + image im3 = float_to_image(layer.crop_width, layer.crop_height, layer.c, layer.output + 2*(size/layer.batch)); + + translate_image(im, -translate); + scale_image(im, 1/scale); + translate_image(im2, -translate); + scale_image(im2, 1/scale); + translate_image(im3, -translate); + scale_image(im3, 1/scale); + + show_image(im, "cropped"); + show_image(im2, "cropped2"); + show_image(im3, "cropped3"); + cvWaitKey(0); + */ +} -- Gitblit v1.8.0