Algorithm/deepHeadPose.git

			@@ -1,18 +1,84 @@
			import numpy as np
			import torch
			import cv2
			from torch.utils.data.dataset import Dataset
			import os
			import numpy as np
			import cv2
			import pandas as pd

			import torch
			from torch.utils.data.dataset import Dataset
			from torchvision import transforms

			from PIL import Image, ImageFilter

			import utils
			from torchvision import transforms

			def stack_grayscale_tensor(tensor):
			tensor = torch.cat([tensor, tensor, tensor], 0)
			return tensor
			def get_list_from_filenames(file_path):
			# input: relative path to .txt file with file names
			# output: list of relative path names
			with open(file_path) as f:
			lines = f.read().splitlines()
			return lines

			class Synhead(Dataset):
			def __init__(self, data_dir, csv_path, transform, test=False):
			column_names = ['path', 'bbox_x_min', 'bbox_y_min', 'bbox_x_max', 'bbox_y_max', 'yaw', 'pitch', 'roll']
			tmp_df = pd.read_csv(csv_path, sep=',', names=column_names, index_col=False, encoding="utf-8-sig")
			self.data_dir = data_dir
			self.transform = transform
			self.X_train = tmp_df['path']
			self.y_train = tmp_df[['bbox_x_min', 'bbox_y_min', 'bbox_x_max', 'bbox_y_max', 'yaw', 'pitch', 'roll']]
			self.length = len(tmp_df)
			self.test = test

			def __getitem__(self, index):
			path = os.path.join(self.data_dir, self.X_train.iloc[index]).strip('.jpg') + '.png'
			img = Image.open(path)
			img = img.convert('RGB')

			x_min, y_min, x_max, y_max, yaw, pitch, roll = self.y_train.iloc[index]
			x_min = float(x_min); x_max = float(x_max)
			y_min = float(y_min); y_max = float(y_max)
			yaw = -float(yaw); pitch = float(pitch); roll = float(roll)

			# k = 0.2 to 0.40
			k = np.random.random_sample() * 0.2 + 0.2
			x_min -= 0.6 * k * abs(x_max - x_min)
			y_min -= 2 * k * abs(y_max - y_min)
			x_max += 0.6 * k * abs(x_max - x_min)
			y_max += 0.6 * k * abs(y_max - y_min)

			width, height = img.size
			# Crop the face
			img = img.crop((int(x_min), int(y_min), int(x_max), int(y_max)))

			# Flip?
			rnd = np.random.random_sample()
			if rnd < 0.5:
			yaw = -yaw
			roll = -roll
			img = img.transpose(Image.FLIP_LEFT_RIGHT)

			# Blur?
			rnd = np.random.random_sample()
			if rnd < 0.05:
			img = img.filter(ImageFilter.BLUR)

			# Bin values
			bins = np.array(range(-99, 102, 3))
			binned_pose = np.digitize([yaw, pitch, roll], bins) - 1

			labels = torch.LongTensor(binned_pose)
			cont_labels = torch.FloatTensor([yaw, pitch, roll])

			if self.transform is not None:
			img = self.transform(img)

			return img, labels, cont_labels, self.X_train[index]

			def __len__(self):
			return self.length

			class Pose_300W_LP(Dataset):
			# Head pose from 300W-LP dataset
			def __init__(self, data_dir, filename_path, transform, img_ext='.jpg', annot_ext='.mat', image_mode='RGB'):
			self.data_dir = data_dir
			self.transform = transform
			@@ -32,14 +98,13 @@
			mat_path = os.path.join(self.data_dir, self.y_train[index] + self.annot_ext)
			shape_path = os.path.join(self.data_dir, self.y_train[index] + '_shape.npy')

			# Crop the face
			# Crop the face loosely
			pt2d = utils.get_pt2d_from_mat(mat_path)
			x_min = min(pt2d[0,:])
			y_min = min(pt2d[1,:])
			x_max = max(pt2d[0,:])
			y_max = max(pt2d[1,:])

			# k = 0.35 was being used beforehand
			# k = 0.2 to 0.40
			k = np.random.random_sample() * 0.2 + 0.2
			x_min -= 0.6 * k * abs(x_max - x_min)
			@@ -74,6 +139,7 @@
			# Get shape
			shape = np.load(shape_path)

			# Get target tensors
			labels = torch.LongTensor(np.concatenate((binned_pose, shape), axis = 0))
			cont_labels = torch.FloatTensor([yaw, pitch, roll])

			@@ -87,6 +153,7 @@
			return self.length

			class Pose_300W_LP_random_ds(Dataset):
			# 300W-LP dataset with random downsampling
			def __init__(self, data_dir, filename_path, transform, img_ext='.jpg', annot_ext='.mat', image_mode='RGB'):
			self.data_dir = data_dir
			self.transform = transform
			@@ -106,7 +173,7 @@
			mat_path = os.path.join(self.data_dir, self.y_train[index] + self.annot_ext)
			shape_path = os.path.join(self.data_dir, self.y_train[index] + '_shape.npy')

			# Crop the face
			# Crop the face loosely
			pt2d = utils.get_pt2d_from_mat(mat_path)
			x_min = min(pt2d[0,:])
			y_min = min(pt2d[1,:])
			@@ -122,13 +189,11 @@
			img = img.crop((int(x_min), int(y_min), int(x_max), int(y_max)))

			# We get the pose in radians
			pose = utils.get_ypr_from_mat(mat_path)
			# And convert to degrees.
			pitch = pose[0] * 180 / np.pi
			pose = utils.get_ypr_fro # Head pose from AFLW2000 datasetp.pi
			yaw = pose[1] * 180 / np.pi
			roll = pose[2] * 180 / np.pi

			ds = np.random.randint(1,11)
			ds = 1 + np.random.randint(0,4) * 5
			original_size = img.size
			img = img.resize((img.size[0] / ds, img.size[1] / ds), resample=Image.NEAREST)
			img = img.resize((original_size[0], original_size[1]), resample=Image.NEAREST)
			@@ -152,6 +217,7 @@
			# Get shape
			shape = np.load(shape_path)

			# Get target tensors
			labels = torch.LongTensor(np.concatenate((binned_pose, shape), axis = 0))
			cont_labels = torch.FloatTensor([yaw, pitch, roll])

			@@ -159,88 +225,6 @@
			img = self.transform(img)

			return img, labels, cont_labels, self.X_train[index]

			def __len__(self):
			# 122,450
			return self.length

			class Pose_300W_LP_SR(Dataset):
			def __init__(self, data_dir, filename_path, transform, img_ext='.jpg', annot_ext='.mat', image_mode='RGB'):
			self.data_dir = data_dir
			self.transform = transform
			self.img_ext = img_ext
			self.annot_ext = annot_ext

			filename_list = get_list_from_filenames(filename_path)

			self.X_train = filename_list
			self.y_train = filename_list
			self.image_mode = image_mode
			self.length = len(filename_list)

			def __getitem__(self, index):
			img = Image.open(os.path.join(self.data_dir, self.X_train[index] + self.img_ext))
			img = img.convert(self.image_mode)
			mat_path = os.path.join(self.data_dir, self.y_train[index] + self.annot_ext)

			# Crop the face
			pt2d = utils.get_pt2d_from_mat(mat_path)
			x_min = min(pt2d[0,:])
			y_min = min(pt2d[1,:])
			x_max = max(pt2d[0,:])
			y_max = max(pt2d[1,:])

			# k = 0.2 to 0.40
			k = np.random.random_sample() * 0.2 + 0.2
			x_min -= 0.6 * k * abs(x_max - x_min)
			y_min -= 2 * k * abs(y_max - y_min)
			x_max += 0.6 * k * abs(x_max - x_min)
			y_max += 0.6 * k * abs(y_max - y_min)
			img = img.crop((int(x_min), int(y_min), int(x_max), int(y_max)))

			# We get the pose in radians
			pose = utils.get_ypr_from_mat(mat_path)
			# And convert to degrees.
			pitch = pose[0] * 180 / np.pi
			yaw = pose[1] * 180 / np.pi
			roll = pose[2] * 180 / np.pi

			rnd = np.random.random_sample()
			if rnd < 0.5:
			ds = 10
			original_size = img.size
			img = img.resize((img.size[0] / ds, img.size[1] / ds), resample=Image.NEAREST)
			img = img.resize((original_size[0], original_size[1]), resample=Image.NEAREST)

			# Flip?
			rnd = np.random.random_sample()
			if rnd < 0.5:
			yaw = -yaw
			roll = -roll
			img = img.transpose(Image.FLIP_LEFT_RIGHT)

			# Blur?
			rnd = np.random.random_sample()
			if rnd < 0.05:
			img = img.filter(ImageFilter.BLUR)

			img_ycc = img.convert('YCbCr')

			# Bin values
			bins = np.array(range(-99, 102, 3))
			binned_pose = np.digitize([yaw, pitch, roll], bins) - 1

			labels = torch.LongTensor(np.concatenate((binned_pose, shape), axis = 0))
			cont_labels = torch.FloatTensor([yaw, pitch, roll])

			# Transforms
			img = transforms.Scale(240)(img)
			img = transforms.RandomCrop(224)(img)
			img_ycc = img.convert('YCbCr')
			img = transforms.ToTensor()
			img_ycc = transforms.ToTensor()

			return img, img_ycc, labels, cont_labels, self.X_train[index]

			def __len__(self):
			# 122,450
			@@ -265,7 +249,7 @@
			img = img.convert(self.image_mode)
			mat_path = os.path.join(self.data_dir, self.y_train[index] + self.annot_ext)

			# Crop the face
			# Crop the face loosely
			pt2d = utils.get_pt2d_from_mat(mat_path)

			x_min = min(pt2d[0,:])
			@@ -301,6 +285,7 @@
			return self.length

			class AFLW2000_ds(Dataset):
			# AFLW2000 dataset with fixed downsampling
			def __init__(self, data_dir, filename_path, transform, img_ext='.jpg', annot_ext='.mat', image_mode='RGB'):
			self.data_dir = data_dir
			self.transform = transform
			@@ -319,7 +304,7 @@
			img = img.convert(self.image_mode)
			mat_path = os.path.join(self.data_dir, self.y_train[index] + self.annot_ext)

			# Crop the face
			# Crop the face loosely
			pt2d = utils.get_pt2d_from_mat(mat_path)
			x_min = min(pt2d[0,:])
			y_min = min(pt2d[1,:])
			@@ -333,7 +318,7 @@
			y_max += 0.6 * k * abs(y_max - y_min)
			img = img.crop((int(x_min), int(y_min), int(x_max), int(y_max)))

			ds = 3
			ds = 3 # downsampling factor
			original_size = img.size
			img = img.resize((img.size[0] / ds, img.size[1] / ds), resample=Image.NEAREST)
			img = img.resize((original_size[0], original_size[1]), resample=Image.NEAREST)
			@@ -359,6 +344,7 @@
			return self.length

			class AFLW_aug(Dataset):
			# AFLW dataset with flipping
			def __init__(self, data_dir, filename_path, transform, img_ext='.jpg', annot_ext='.txt', image_mode='RGB'):
			self.data_dir = data_dir
			self.transform = transform
			@@ -385,7 +371,7 @@
			yaw = pose[0] * 180 / np.pi
			pitch = pose[1] * 180 / np.pi
			roll = pose[2] * 180 / np.pi
			# Something weird with the roll in AFLW
			# Fix the roll in AFLW
			roll *= -1

			# Augment
			@@ -395,21 +381,6 @@
			yaw = -yaw
			roll = -roll
			img = img.transpose(Image.FLIP_LEFT_RIGHT)

			# Blur?
			# rnd = np.random.random_sample()
			# if rnd < 0.05:
			# img = img.filter(ImageFilter.BLUR)
			# if rnd < 0.025:
			# img = img.filter(ImageFilter.BLUR)
			#
			# rnd = np.random.random_sample()
			# if rnd < 0.05:
			# nb = np.random.randint(1,5)
			# img = img.rotate(-nb)
			# elif rnd > 0.95:
			# nb = np.random.randint(1,5)
			# img = img.rotate(nb)

			# Bin values
			bins = np.array(range(-99, 102, 3))
			@@ -453,7 +424,7 @@
			yaw = pose[0] * 180 / np.pi
			pitch = pose[1] * 180 / np.pi
			roll = pose[2] * 180 / np.pi
			# Something weird with the roll in AFLW
			# Fix the roll in AFLW
			roll *= -1
			# Bin values
			bins = np.array(range(-99, 102, 3))
			@@ -497,7 +468,7 @@
			line = annot.readline().split(' ')
			yaw, pitch, roll = [float(line[1]), float(line[2]), float(line[3])]

			# Crop the face
			# Crop the face loosely
			k = 0.32
			x1 = float(line[4])
			y1 = float(line[5])
			@@ -602,11 +573,3 @@
			def __len__(self):
			# 15,667
			return self.length


			def get_list_from_filenames(file_path):
			# input: relative path to .txt file with file names
			# output: list of relative path names
			with open(file_path) as f:
			lines = f.read().splitlines()
			return lines