Algorithm/deepHeadPose.git

			@@ -1,17 +1,84 @@
			import numpy as np
			import torch
			import cv2
			from torch.utils.data.dataset import Dataset
			import os
			from PIL import Image
			import numpy as np
			import cv2
			import pandas as pd

			import torch
			from torch.utils.data.dataset import Dataset
			from torchvision import transforms

			from PIL import Image, ImageFilter

			import utils

			def stack_grayscale_tensor(tensor):
			tensor = torch.cat([tensor, tensor, tensor], 0)
			return tensor
			def get_list_from_filenames(file_path):
			# input: relative path to .txt file with file names
			# output: list of relative path names
			with open(file_path) as f:
			lines = f.read().splitlines()
			return lines

			class Synhead(Dataset):
			def __init__(self, data_dir, csv_path, transform, test=False):
			column_names = ['path', 'bbox_x_min', 'bbox_y_min', 'bbox_x_max', 'bbox_y_max', 'yaw', 'pitch', 'roll']
			tmp_df = pd.read_csv(csv_path, sep=',', names=column_names, index_col=False, encoding="utf-8-sig")
			self.data_dir = data_dir
			self.transform = transform
			self.X_train = tmp_df['path']
			self.y_train = tmp_df[['bbox_x_min', 'bbox_y_min', 'bbox_x_max', 'bbox_y_max', 'yaw', 'pitch', 'roll']]
			self.length = len(tmp_df)
			self.test = test

			def __getitem__(self, index):
			path = os.path.join(self.data_dir, self.X_train.iloc[index]).strip('.jpg') + '.png'
			img = Image.open(path)
			img = img.convert('RGB')

			x_min, y_min, x_max, y_max, yaw, pitch, roll = self.y_train.iloc[index]
			x_min = float(x_min); x_max = float(x_max)
			y_min = float(y_min); y_max = float(y_max)
			yaw = -float(yaw); pitch = float(pitch); roll = float(roll)

			# k = 0.2 to 0.40
			k = np.random.random_sample() * 0.2 + 0.2
			x_min -= 0.6 * k * abs(x_max - x_min)
			y_min -= 2 * k * abs(y_max - y_min)
			x_max += 0.6 * k * abs(x_max - x_min)
			y_max += 0.6 * k * abs(y_max - y_min)

			width, height = img.size
			# Crop the face
			img = img.crop((int(x_min), int(y_min), int(x_max), int(y_max)))

			# Flip?
			rnd = np.random.random_sample()
			if rnd < 0.5:
			yaw = -yaw
			roll = -roll
			img = img.transpose(Image.FLIP_LEFT_RIGHT)

			# Blur?
			rnd = np.random.random_sample()
			if rnd < 0.05:
			img = img.filter(ImageFilter.BLUR)

			# Bin values
			bins = np.array(range(-99, 102, 3))
			binned_pose = np.digitize([yaw, pitch, roll], bins) - 1

			labels = torch.LongTensor(binned_pose)
			cont_labels = torch.FloatTensor([yaw, pitch, roll])

			if self.transform is not None:
			img = self.transform(img)

			return img, labels, cont_labels, self.X_train[index]

			def __len__(self):
			return self.length

			class Pose_300W_LP(Dataset):
			# Head pose from 300W-LP dataset
			def __init__(self, data_dir, filename_path, transform, img_ext='.jpg', annot_ext='.mat', image_mode='RGB'):
			self.data_dir = data_dir
			self.transform = transform
			@@ -29,20 +96,20 @@
			img = Image.open(os.path.join(self.data_dir, self.X_train[index] + self.img_ext))
			img = img.convert(self.image_mode)
			mat_path = os.path.join(self.data_dir, self.y_train[index] + self.annot_ext)
			shape_path = os.path.join(self.data_dir, self.y_train[index] + '_shape.npy')

			# Crop the face
			# Crop the face loosely
			pt2d = utils.get_pt2d_from_mat(mat_path)
			x_min = min(pt2d[0,:])
			y_min = min(pt2d[1,:])
			x_max = max(pt2d[0,:])
			y_max = max(pt2d[1,:])

			k = 0.15
			x_min -= k * abs(x_max - x_min)
			y_min -= 4 * k * abs(y_max - y_min)
			x_max += k * abs(x_max - x_min)
			y_max += 0.4 * k * abs(y_max - y_min)
			# k = 0.2 to 0.40
			k = np.random.random_sample() * 0.2 + 0.2
			x_min -= 0.6 * k * abs(x_max - x_min)
			y_min -= 2 * k * abs(y_max - y_min)
			x_max += 0.6 * k * abs(x_max - x_min)
			y_max += 0.6 * k * abs(y_max - y_min)
			img = img.crop((int(x_min), int(y_min), int(x_max), int(y_max)))

			# We get the pose in radians
			@@ -51,19 +118,106 @@
			pitch = pose[0] * 180 / np.pi
			yaw = pose[1] * 180 / np.pi
			roll = pose[2] * 180 / np.pi

			# Flip?
			rnd = np.random.random_sample()
			if rnd < 0.5:
			yaw = -yaw
			roll = -roll
			img = img.transpose(Image.FLIP_LEFT_RIGHT)

			# Blur?
			rnd = np.random.random_sample()
			if rnd < 0.05:
			img = img.filter(ImageFilter.BLUR)

			# Bin values
			bins = np.array(range(-99, 102, 3))
			binned_pose = np.digitize([yaw, pitch, roll], bins) - 1

			# Get shape
			shape = np.load(shape_path)

			labels = torch.LongTensor(np.concatenate((binned_pose, shape), axis = 0))
			# Get target tensors
			labels = binned_pose
			cont_labels = torch.FloatTensor([yaw, pitch, roll])

			if self.transform is not None:
			img = self.transform(img)

			return img, labels, self.X_train[index]
			return img, labels, cont_labels, self.X_train[index]

			def __len__(self):
			# 122,450
			return self.length

			class Pose_300W_LP_random_ds(Dataset):
			# 300W-LP dataset with random downsampling
			def __init__(self, data_dir, filename_path, transform, img_ext='.jpg', annot_ext='.mat', image_mode='RGB'):
			self.data_dir = data_dir
			self.transform = transform
			self.img_ext = img_ext
			self.annot_ext = annot_ext

			filename_list = get_list_from_filenames(filename_path)

			self.X_train = filename_list
			self.y_train = filename_list
			self.image_mode = image_mode
			self.length = len(filename_list)

			def __getitem__(self, index):
			img = Image.open(os.path.join(self.data_dir, self.X_train[index] + self.img_ext))
			img = img.convert(self.image_mode)
			mat_path = os.path.join(self.data_dir, self.y_train[index] + self.annot_ext)

			# Crop the face loosely
			pt2d = utils.get_pt2d_from_mat(mat_path)
			x_min = min(pt2d[0,:])
			y_min = min(pt2d[1,:])
			x_max = max(pt2d[0,:])
			y_max = max(pt2d[1,:])

			# k = 0.2 to 0.40
			k = np.random.random_sample() * 0.2 + 0.2
			x_min -= 0.6 * k * abs(x_max - x_min)
			y_min -= 2 * k * abs(y_max - y_min)
			x_max += 0.6 * k * abs(x_max - x_min)
			y_max += 0.6 * k * abs(y_max - y_min)
			img = img.crop((int(x_min), int(y_min), int(x_max), int(y_max)))

			# We get the pose in radians
			pose = utils.get_ypr_from_mat(mat_path)
			pitch = pose[0] * 180 / np.pi
			yaw = pose[1] * 180 / np.pi
			roll = pose[2] * 180 / np.pi

			ds = 1 + np.random.randint(0,4) * 5
			original_size = img.size
			img = img.resize((img.size[0] / ds, img.size[1] / ds), resample=Image.NEAREST)
			img = img.resize((original_size[0], original_size[1]), resample=Image.NEAREST)

			# Flip?
			rnd = np.random.random_sample()
			if rnd < 0.5:
			yaw = -yaw
			roll = -roll
			img = img.transpose(Image.FLIP_LEFT_RIGHT)

			# Blur?
			rnd = np.random.random_sample()
			if rnd < 0.05:
			img = img.filter(ImageFilter.BLUR)

			# Bin values
			bins = np.array(range(-99, 102, 3))
			binned_pose = np.digitize([yaw, pitch, roll], bins) - 1

			# Get target tensors
			labels = binned_pose
			cont_labels = torch.FloatTensor([yaw, pitch, roll])

			if self.transform is not None:
			img = self.transform(img)

			return img, labels, cont_labels, self.X_train[index]

			def __len__(self):
			# 122,450
			@@ -88,18 +242,19 @@
			img = img.convert(self.image_mode)
			mat_path = os.path.join(self.data_dir, self.y_train[index] + self.annot_ext)

			# Crop the face
			# Crop the face loosely
			pt2d = utils.get_pt2d_from_mat(mat_path)

			x_min = min(pt2d[0,:])
			y_min = min(pt2d[1,:])
			x_max = max(pt2d[0,:])
			y_max = max(pt2d[1,:])

			k = 0.15
			x_min -= k * abs(x_max - x_min)
			y_min -= 4 * k * abs(y_max - y_min)
			x_max += k * abs(x_max - x_min)
			y_max += 0.4 * k * abs(y_max - y_min)
			k = 0.20
			x_min -= 2 * k * abs(x_max - x_min)
			y_min -= 2 * k * abs(y_max - y_min)
			x_max += 2 * k * abs(x_max - x_min)
			y_max += 0.6 * k * abs(y_max - y_min)
			img = img.crop((int(x_min), int(y_min), int(x_max), int(y_max)))

			# We get the pose in radians
			@@ -111,14 +266,128 @@
			# Bin values
			bins = np.array(range(-99, 102, 3))
			labels = torch.LongTensor(np.digitize([yaw, pitch, roll], bins) - 1)
			cont_labels = torch.FloatTensor([yaw, pitch, roll])

			if self.transform is not None:
			img = self.transform(img)

			return img, labels, self.X_train[index]
			return img, labels, cont_labels, self.X_train[index]

			def __len__(self):
			# 2,000
			return self.length

			class AFLW2000_ds(Dataset):
			# AFLW2000 dataset with fixed downsampling
			def __init__(self, data_dir, filename_path, transform, img_ext='.jpg', annot_ext='.mat', image_mode='RGB'):
			self.data_dir = data_dir
			self.transform = transform
			self.img_ext = img_ext
			self.annot_ext = annot_ext

			filename_list = get_list_from_filenames(filename_path)

			self.X_train = filename_list
			self.y_train = filename_list
			self.image_mode = image_mode
			self.length = len(filename_list)

			def __getitem__(self, index):
			img = Image.open(os.path.join(self.data_dir, self.X_train[index] + self.img_ext))
			img = img.convert(self.image_mode)
			mat_path = os.path.join(self.data_dir, self.y_train[index] + self.annot_ext)

			# Crop the face loosely
			pt2d = utils.get_pt2d_from_mat(mat_path)
			x_min = min(pt2d[0,:])
			y_min = min(pt2d[1,:])
			x_max = max(pt2d[0,:])
			y_max = max(pt2d[1,:])

			k = 0.20
			x_min -= 2 * k * abs(x_max - x_min)
			y_min -= 2 * k * abs(y_max - y_min)
			x_max += 2 * k * abs(x_max - x_min)
			y_max += 0.6 * k * abs(y_max - y_min)
			img = img.crop((int(x_min), int(y_min), int(x_max), int(y_max)))

			ds = 3 # downsampling factor
			original_size = img.size
			img = img.resize((img.size[0] / ds, img.size[1] / ds), resample=Image.NEAREST)
			img = img.resize((original_size[0], original_size[1]), resample=Image.NEAREST)

			# We get the pose in radians
			pose = utils.get_ypr_from_mat(mat_path)
			# And convert to degrees.
			pitch = pose[0] * 180 / np.pi
			yaw = pose[1] * 180 / np.pi
			roll = pose[2] * 180 / np.pi
			# Bin values
			bins = np.array(range(-99, 102, 3))
			labels = torch.LongTensor(np.digitize([yaw, pitch, roll], bins) - 1)
			cont_labels = torch.FloatTensor([yaw, pitch, roll])

			if self.transform is not None:
			img = self.transform(img)

			return img, labels, cont_labels, self.X_train[index]

			def __len__(self):
			# 2,000
			return self.length

			class AFLW_aug(Dataset):
			# AFLW dataset with flipping
			def __init__(self, data_dir, filename_path, transform, img_ext='.jpg', annot_ext='.txt', image_mode='RGB'):
			self.data_dir = data_dir
			self.transform = transform
			self.img_ext = img_ext
			self.annot_ext = annot_ext

			filename_list = get_list_from_filenames(filename_path)

			self.X_train = filename_list
			self.y_train = filename_list
			self.image_mode = image_mode
			self.length = len(filename_list)

			def __getitem__(self, index):
			img = Image.open(os.path.join(self.data_dir, self.X_train[index] + self.img_ext))
			img = img.convert(self.image_mode)
			txt_path = os.path.join(self.data_dir, self.y_train[index] + self.annot_ext)

			# We get the pose in radians
			annot = open(txt_path, 'r')
			line = annot.readline().split(' ')
			pose = [float(line[1]), float(line[2]), float(line[3])]
			# And convert to degrees.
			yaw = pose[0] * 180 / np.pi
			pitch = pose[1] * 180 / np.pi
			roll = pose[2] * 180 / np.pi
			# Fix the roll in AFLW
			roll *= -1

			# Augment
			# Flip?
			rnd = np.random.random_sample()
			if rnd < 0.5:
			yaw = -yaw
			roll = -roll
			img = img.transpose(Image.FLIP_LEFT_RIGHT)

			# Bin values
			bins = np.array(range(-99, 102, 3))
			labels = torch.LongTensor(np.digitize([yaw, pitch, roll], bins) - 1)
			cont_labels = torch.FloatTensor([yaw, pitch, roll])

			if self.transform is not None:
			img = self.transform(img)

			return img, labels, cont_labels, self.X_train[index]

			def __len__(self):
			# train: 18,863
			# test: 1,966
			return self.length

			class AFLW(Dataset):
			@@ -148,17 +417,17 @@
			yaw = pose[0] * 180 / np.pi
			pitch = pose[1] * 180 / np.pi
			roll = pose[2] * 180 / np.pi
			# Something weird with the roll in AFLW
			if yaw < 0:
			roll *= -1
			# Fix the roll in AFLW
			roll *= -1
			# Bin values
			bins = np.array(range(-99, 102, 3))
			labels = torch.LongTensor(np.digitize([yaw, pitch, roll], bins) - 1)
			cont_labels = torch.FloatTensor([yaw, pitch, roll])

			if self.transform is not None:
			img = self.transform(img)

			return img, labels, self.X_train[index]
			return img, labels, cont_labels, self.X_train[index]

			def __len__(self):
			# train: 18,863
			@@ -192,32 +461,108 @@
			line = annot.readline().split(' ')
			yaw, pitch, roll = [float(line[1]), float(line[2]), float(line[3])]

			# Crop the face
			margin = 40
			x_min = float(line[4]) - margin
			y_min = float(line[5]) - margin
			x_max = float(line[6]) + margin
			y_max = float(line[7]) + margin
			# Crop the face loosely
			k = 0.32
			x1 = float(line[4])
			y1 = float(line[5])
			x2 = float(line[6])
			y2 = float(line[7])
			x1 -= 0.8 * k * abs(x2 - x1)
			y1 -= 2 * k * abs(y2 - y1)
			x2 += 0.8 * k * abs(x2 - x1)
			y2 += 1 * k * abs(y2 - y1)

			img = img.crop((int(x_min), int(y_min), int(x_max), int(y_max)))
			img = img.crop((int(x1), int(y1), int(x2), int(y2)))

			# Bin values
			bins = np.array(range(-99, 102, 3))
			labels = torch.LongTensor(np.digitize([yaw, pitch, roll], bins) - 1)
			cont_labels = torch.FloatTensor([yaw, pitch, roll])

			if self.transform is not None:
			img = self.transform(img)

			return img, labels, self.X_train[index]
			return img, labels, cont_labels, self.X_train[index]

			def __len__(self):
			# Around 200
			return self.length

			class BIWI(Dataset):
			def __init__(self, data_dir, filename_path, transform, img_ext='.png', annot_ext='.txt', image_mode='RGB'):
			self.data_dir = data_dir
			self.transform = transform
			self.img_ext = img_ext
			self.annot_ext = annot_ext

			def get_list_from_filenames(file_path):
			# input: relative path to .txt file with file names
			# output: list of relative path names
			with open(file_path) as f:
			lines = f.read().splitlines()
			return lines
			filename_list = get_list_from_filenames(filename_path)

			self.X_train = filename_list
			self.y_train = filename_list
			self.image_mode = image_mode
			self.length = len(filename_list)

			def __getitem__(self, index):
			img = Image.open(os.path.join(self.data_dir, self.X_train[index] + '_rgb' + self.img_ext))
			img = img.convert(self.image_mode)
			pose_path = os.path.join(self.data_dir, self.y_train[index] + '_pose' + self.annot_ext)

			y_train_list = self.y_train[index].split('/')
			bbox_path = os.path.join(self.data_dir, y_train_list[0] + '/dockerface-' + y_train_list[-1] + '_rgb' + self.annot_ext)

			# Load bounding box
			bbox = open(bbox_path, 'r')
			line = bbox.readline().split(' ')
			if len(line) < 4:
			x_min, y_min, x_max, y_max = 0, 0, img.size[0], img.size[1]
			else:
			x_min, y_min, x_max, y_max = [float(line[1]), float(line[2]), float(line[3]), float(line[4])]
			bbox.close()

			# Load pose in degrees
			pose_annot = open(pose_path, 'r')
			R = []
			for line in pose_annot:
			line = line.strip('\n').split(' ')
			l = []
			if line[0] != '':
			for nb in line:
			if nb == '':
			continue
			l.append(float(nb))
			R.append(l)

			R = np.array(R)
			T = R[3,:]
			R = R[:3,:]
			pose_annot.close()

			R = np.transpose(R)

			roll = -np.arctan2(R[1][0], R[0][0]) * 180 / np.pi
			yaw = -np.arctan2(-R[2][0], np.sqrt(R[2][1] 2 + R[2][2] 2)) * 180 / np.pi
			pitch = np.arctan2(R[2][1], R[2][2]) * 180 / np.pi

			# Loosely crop face
			k = 0.35
			x_min -= 0.6 * k * abs(x_max - x_min)
			y_min -= k * abs(y_max - y_min)
			x_max += 0.6 * k * abs(x_max - x_min)
			y_max += 0.6 * k * abs(y_max - y_min)
			img = img.crop((int(x_min), int(y_min), int(x_max), int(y_max)))

			# Bin values
			bins = np.array(range(-99, 102, 3))
			binned_pose = np.digitize([yaw, pitch, roll], bins) - 1

			labels = torch.LongTensor(binned_pose)
			cont_labels = torch.FloatTensor([yaw, pitch, roll])

			if self.transform is not None:
			img = self.transform(img)

			return img, labels, cont_labels, self.X_train[index]

			def __len__(self):
			# 15,667
			return self.length