From 2f6778c2db9ce1a887f04fdc85ad0d5db4ba84b8 Mon Sep 17 00:00:00 2001
From: natanielruiz <nruiz9@gatech.edu>
Date: 星期一, 30 十月 2017 06:15:30 +0800
Subject: [PATCH] Cleaned up a bit
---
/dev/null | 40 -----
code/datasets.py | 56 ++++----
code/hopenet.py | 24 +--
code/train_alexnet.py | 70 +++------
code/train_resnet50_regression.py | 55 ++-----
code/train_preangles.py | 94 +++---------
code/test_preangles.py | 43 ++---
7 files changed, 116 insertions(+), 266 deletions(-)
diff --git a/code/datasets.py b/code/datasets.py
index b2b9ca3..a28c584 100644
--- a/code/datasets.py
+++ b/code/datasets.py
@@ -1,18 +1,24 @@
-import numpy as np
-import torch
-import cv2
-from torch.utils.data.dataset import Dataset
import os
+import numpy as np
+import cv2
+
+import torch
+from torch.utils.data.dataset import Dataset
+from torchvision import transforms
+
from PIL import Image, ImageFilter
import utils
-from torchvision import transforms
-def stack_grayscale_tensor(tensor):
- tensor = torch.cat([tensor, tensor, tensor], 0)
- return tensor
+def get_list_from_filenames(file_path):
+ # input: relative path to .txt file with file names
+ # output: list of relative path names
+ with open(file_path) as f:
+ lines = f.read().splitlines()
+ return lines
class Pose_300W_LP(Dataset):
+ # Head pose from 300W-LP dataset
def __init__(self, data_dir, filename_path, transform, img_ext='.jpg', annot_ext='.mat', image_mode='RGB'):
self.data_dir = data_dir
self.transform = transform
@@ -32,14 +38,13 @@
mat_path = os.path.join(self.data_dir, self.y_train[index] + self.annot_ext)
shape_path = os.path.join(self.data_dir, self.y_train[index] + '_shape.npy')
- # Crop the face
+ # Crop the face loosely
pt2d = utils.get_pt2d_from_mat(mat_path)
x_min = min(pt2d[0,:])
y_min = min(pt2d[1,:])
x_max = max(pt2d[0,:])
y_max = max(pt2d[1,:])
- # k = 0.35 was being used beforehand
# k = 0.2 to 0.40
k = np.random.random_sample() * 0.2 + 0.2
x_min -= 0.6 * k * abs(x_max - x_min)
@@ -74,6 +79,7 @@
# Get shape
shape = np.load(shape_path)
+ # Get target tensors
labels = torch.LongTensor(np.concatenate((binned_pose, shape), axis = 0))
cont_labels = torch.FloatTensor([yaw, pitch, roll])
@@ -87,6 +93,7 @@
return self.length
class Pose_300W_LP_random_ds(Dataset):
+ # 300W-LP dataset with random downsampling
def __init__(self, data_dir, filename_path, transform, img_ext='.jpg', annot_ext='.mat', image_mode='RGB'):
self.data_dir = data_dir
self.transform = transform
@@ -106,7 +113,7 @@
mat_path = os.path.join(self.data_dir, self.y_train[index] + self.annot_ext)
shape_path = os.path.join(self.data_dir, self.y_train[index] + '_shape.npy')
- # Crop the face
+ # Crop the face loosely
pt2d = utils.get_pt2d_from_mat(mat_path)
x_min = min(pt2d[0,:])
y_min = min(pt2d[1,:])
@@ -122,9 +129,7 @@
img = img.crop((int(x_min), int(y_min), int(x_max), int(y_max)))
# We get the pose in radians
- pose = utils.get_ypr_from_mat(mat_path)
- # And convert to degrees.
- pitch = pose[0] * 180 / np.pi
+ pose = utils.get_ypr_fro # Head pose from AFLW2000 datasetp.pi
yaw = pose[1] * 180 / np.pi
roll = pose[2] * 180 / np.pi
@@ -152,6 +157,7 @@
# Get shape
shape = np.load(shape_path)
+ # Get target tensors
labels = torch.LongTensor(np.concatenate((binned_pose, shape), axis = 0))
cont_labels = torch.FloatTensor([yaw, pitch, roll])
@@ -183,7 +189,7 @@
img = img.convert(self.image_mode)
mat_path = os.path.join(self.data_dir, self.y_train[index] + self.annot_ext)
- # Crop the face
+ # Crop the face loosely
pt2d = utils.get_pt2d_from_mat(mat_path)
x_min = min(pt2d[0,:])
@@ -219,6 +225,7 @@
return self.length
class AFLW2000_ds(Dataset):
+ # AFLW2000 dataset with fixed downsampling
def __init__(self, data_dir, filename_path, transform, img_ext='.jpg', annot_ext='.mat', image_mode='RGB'):
self.data_dir = data_dir
self.transform = transform
@@ -237,7 +244,7 @@
img = img.convert(self.image_mode)
mat_path = os.path.join(self.data_dir, self.y_train[index] + self.annot_ext)
- # Crop the face
+ # Crop the face loosely
pt2d = utils.get_pt2d_from_mat(mat_path)
x_min = min(pt2d[0,:])
y_min = min(pt2d[1,:])
@@ -251,7 +258,7 @@
y_max += 0.6 * k * abs(y_max - y_min)
img = img.crop((int(x_min), int(y_min), int(x_max), int(y_max)))
- ds = 3
+ ds = 3 # downsampling factor
original_size = img.size
img = img.resize((img.size[0] / ds, img.size[1] / ds), resample=Image.NEAREST)
img = img.resize((original_size[0], original_size[1]), resample=Image.NEAREST)
@@ -277,6 +284,7 @@
return self.length
class AFLW_aug(Dataset):
+ # AFLW dataset with flipping
def __init__(self, data_dir, filename_path, transform, img_ext='.jpg', annot_ext='.txt', image_mode='RGB'):
self.data_dir = data_dir
self.transform = transform
@@ -303,7 +311,7 @@
yaw = pose[0] * 180 / np.pi
pitch = pose[1] * 180 / np.pi
roll = pose[2] * 180 / np.pi
- # Something weird with the roll in AFLW
+ # Fix the roll in AFLW
roll *= -1
# Augment
@@ -356,7 +364,7 @@
yaw = pose[0] * 180 / np.pi
pitch = pose[1] * 180 / np.pi
roll = pose[2] * 180 / np.pi
- # Something weird with the roll in AFLW
+ # Fix the roll in AFLW
roll *= -1
# Bin values
bins = np.array(range(-99, 102, 3))
@@ -400,7 +408,7 @@
line = annot.readline().split(' ')
yaw, pitch, roll = [float(line[1]), float(line[2]), float(line[3])]
- # Crop the face
+ # Crop the face loosely
k = 0.32
x1 = float(line[4])
y1 = float(line[5])
@@ -505,11 +513,3 @@
def __len__(self):
# 15,667
return self.length
-
-
-def get_list_from_filenames(file_path):
- # input: relative path to .txt file with file names
- # output: list of relative path names
- with open(file_path) as f:
- lines = f.read().splitlines()
- return lines
diff --git a/code/hopenet.py b/code/hopenet.py
index 129ff63..0a98a66 100644
--- a/code/hopenet.py
+++ b/code/hopenet.py
@@ -5,8 +5,9 @@
import torch.nn.functional as F
class Hopenet(nn.Module):
- # This is just Hopenet with 3 output layers for yaw, pitch and roll.
- def __init__(self, block, layers, num_bins, iter_ref):
+ # Hopenet with 3 output layers for yaw, pitch and roll
+ # Predicts Euler angles by binning and regression with the expected value
+ def __init__(self, block, layers, num_bins):
self.inplanes = 64
super(Hopenet, self).__init__()
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
@@ -23,12 +24,11 @@
self.fc_pitch = nn.Linear(512 * block.expansion, num_bins)
self.fc_roll = nn.Linear(512 * block.expansion, num_bins)
- self.softmax = nn.Softmax()
self.fc_finetune = nn.Linear(512 * block.expansion + 3, 3)
+ # Used to get the expected value of angle from bins
+ self.softmax = nn.Softmax()
self.idx_tensor = Variable(torch.FloatTensor(range(66))).cuda()
-
- self.iter_ref = iter_ref
for m in self.modules():
if isinstance(m, nn.Conv2d):
@@ -81,18 +81,12 @@
yaw = yaw.view(yaw.size(0), 1)
pitch = pitch.view(pitch.size(0), 1)
roll = roll.view(roll.size(0), 1)
- angles = []
preangles = torch.cat([yaw, pitch, roll], 1)
- angles.append(preangles)
- # angles predicts the residual
- for idx in xrange(self.iter_ref):
- angles.append(self.fc_finetune(torch.cat((angles[idx], x), 1)))
-
- return pre_yaw, pre_pitch, pre_roll, angles
+ return pre_yaw, pre_pitch, pre_roll, preangles
class ResNet(nn.Module):
-
+ # ResNet for regression of 3 Euler angles.
def __init__(self, block, layers, num_classes=1000):
self.inplanes = 64
super(ResNet, self).__init__()
@@ -147,11 +141,11 @@
x = self.avgpool(x)
x = x.view(x.size(0), -1)
x = self.fc_angles(x)
-
return x
class AlexNet(nn.Module):
-
+ # AlexNet laid out as a Hopenet - classify Euler angles in bins and
+ # regress the expected value.
def __init__(self, num_bins):
super(AlexNet, self).__init__()
self.features = nn.Sequential(
diff --git a/code/test_AFW.py b/code/test_AFW.py
deleted file mode 100644
index ab0571f..0000000
--- a/code/test_AFW.py
+++ /dev/null
@@ -1,157 +0,0 @@
-import numpy as np
-import torch
-import torch.nn as nn
-from torch.autograd import Variable
-from torch.utils.data import DataLoader
-from torchvision import transforms
-import torch.backends.cudnn as cudnn
-import torchvision
-import torch.nn.functional as F
-
-import cv2
-import matplotlib.pyplot as plt
-import sys
-import os
-import argparse
-
-import datasets
-import hopenet
-import utils
-
-def parse_args():
- """Parse input arguments."""
- parser = argparse.ArgumentParser(description='Head pose estimation using the Hopenet network.')
- parser.add_argument('--gpu', dest='gpu_id', help='GPU device id to use [0]',
- default=0, type=int)
- parser.add_argument('--data_dir', dest='data_dir', help='Directory path for data.',
- default='', type=str)
- parser.add_argument('--filename_list', dest='filename_list', help='Path to text file containing relative paths for every example.',
- default='', type=str)
- parser.add_argument('--snapshot', dest='snapshot', help='Name of model snapshot.',
- default='', type=str)
- parser.add_argument('--batch_size', dest='batch_size', help='Batch size.',
- default=1, type=int)
- parser.add_argument('--save_viz', dest='save_viz', help='Save images with pose cube.',
- default=False, type=bool)
- parser.add_argument('--iter_ref', dest='iter_ref', default=1, type=int)
- parser.add_argument('--margin', dest='margin', help='Accuracy margin.', default=22.5,
- type=float)
-
- args = parser.parse_args()
-
- return args
-
-if __name__ == '__main__':
- args = parse_args()
-
- cudnn.enabled = True
- gpu = args.gpu_id
- snapshot_path = args.snapshot
-
- # ResNet101 with 3 outputs.
- # model = hopenet.Hopenet(torchvision.models.resnet.Bottleneck, [3, 4, 23, 3], 66)
- # ResNet50
- model = hopenet.Hopenet(torchvision.models.resnet.Bottleneck, [3, 4, 6, 3], 66, args.iter_ref)
- # ResNet18
- # model = hopenet.Hopenet(torchvision.models.resnet.BasicBlock, [2, 2, 2, 2], 66)
-
- print 'Loading snapshot.'
- # Load snapshot
- saved_state_dict = torch.load(snapshot_path)
- model.load_state_dict(saved_state_dict)
-
- print 'Loading data.'
-
- transformations = transforms.Compose([transforms.Scale(224),
- transforms.CenterCrop(224), transforms.ToTensor()])
-
- pose_dataset = datasets.AFW(args.data_dir, args.filename_list,
- transformations)
- test_loader = torch.utils.data.DataLoader(dataset=pose_dataset,
- batch_size=args.batch_size,
- num_workers=2)
-
- model.cuda(gpu)
-
- print 'Ready to test network.'
-
- # Test the Model
- model.eval() # Change model to 'eval' mode (BN uses moving mean/var).
- total = 0
- n_margins = 20
- yaw_correct = np.zeros(n_margins)
- pitch_correct = np.zeros(n_margins)
- roll_correct = np.zeros(n_margins)
-
- idx_tensor = [idx for idx in xrange(66)]
- idx_tensor = torch.FloatTensor(idx_tensor).cuda(gpu)
-
- yaw_error = .0
- pitch_error = .0
- roll_error = .0
-
- l1loss = torch.nn.L1Loss(size_average=False)
-
- yaw_correct = .0
- yaw_margin = args.margin
-
- for i, (images, labels, name) in enumerate(test_loader):
- images = Variable(images).cuda(gpu)
- total += labels.size(0)
- label_yaw = labels[:,0].float() * 3 - 99
- label_pitch = labels[:,1].float() * 3 - 99
- label_roll = labels[:,2].float() * 3 - 99
-
- pre_yaw, pre_pitch, pre_roll, angles = model(images)
- yaw = angles[0][:,0].cpu().data
- pitch = angles[0][:,1].cpu().data
- roll = angles[0][:,2].cpu().data
-
- for idx in xrange(1,args.iter_ref+1):
- yaw += angles[idx][:,0].cpu().data
- pitch += angles[idx][:,1].cpu().data
- roll += angles[idx][:,2].cpu().data
-
- yaw = yaw * 3 - 99
- pitch = pitch * 3 - 99
- roll = roll * 3 - 99
- # Mean absolute error
- yaw_error += torch.sum(torch.abs(yaw - label_yaw))
- pitch_error += torch.sum(torch.abs(pitch - label_pitch))
- roll_error += torch.sum(torch.abs(roll - label_roll))
-
- # Yaw accuracy
- yaw_tensor_error = torch.abs(yaw - label_yaw).numpy()
-
- yaw_correct += np.where(yaw_tensor_error <= yaw_margin)[0].shape[0]
-
- if yaw_tensor_error[0] > yaw_margin:
- print name[0] + ' ' + str(yaw[0]) + ' ' + str(label_yaw[0]) + ' ' + str(yaw_tensor_error[0])
-
- # Binned Accuracy
- # for er in xrange(n_margins):
- # yaw_bpred[er] += (label_yaw[0] in range(yaw_bpred[0,0] - er, yaw_bpred[0,0] + er + 1))
- # pitch_bpred[er] += (label_pitch[0] in range(pitch_bpred[0,0] - er, pitch_bpred[0,0] + er + 1))
- # roll_bpred[er] += (label_roll[0] in range(roll_bpred[0,0] - er, roll_bpred[0,0] + er + 1))
-
- # print label_yaw[0], yaw_bpred[0,0]
-
- # Save images with pose cube.
- # TODO: fix for larger batch size
- if args.save_viz:
- name = name[0]
- cv2_img = cv2.imread(os.path.join(args.data_dir, name + '.jpg'))
- #print os.path.join('output/images', name + '.jpg')
- #print label_yaw[0] * 3 - 99, label_pitch[0] * 3 - 99, label_roll[0] * 3 - 99
- #print yaw_predicted * 3 - 99, pitch_predicted * 3 - 99, roll_predicted * 3 - 99
- utils.plot_pose_cube(cv2_img, yaw[0], pitch[0], roll[0])
- cv2.imwrite(os.path.join('output/images', name + '.jpg'), cv2_img)
-
- print('Test error in degrees of the model on the ' + str(total) +
- ' test images. Yaw: %.4f, Pitch: %.4f, Roll: %.4f' % (yaw_error / total,
- pitch_error / total, roll_error / total))
- print ('Yaw accuracy (<= ' + str(yaw_margin) + ' degrees) is %.4f' % (yaw_correct / total))
-
- # Binned accuracy
- # for idx in xrange(len(yaw_correct)):
- # print yaw_correct[idx] / total, pitch_correct[idx] / total, roll_correct[idx] / total
diff --git a/code/test_preangles.py b/code/test_preangles.py
index cfee8d1..3d70bb0 100644
--- a/code/test_preangles.py
+++ b/code/test_preangles.py
@@ -1,4 +1,9 @@
+import sys, os, argparse
+
import numpy as np
+import cv2
+import matplotlib.pyplot as plt
+
import torch
import torch.nn as nn
from torch.autograd import Variable
@@ -8,15 +13,7 @@
import torchvision
import torch.nn.functional as F
-import cv2
-import matplotlib.pyplot as plt
-import sys
-import os
-import argparse
-
-import datasets
-import hopenet
-import utils
+import datasets, hopenet, utils
def parse_args():
"""Parse input arguments."""
@@ -46,12 +43,8 @@
gpu = args.gpu_id
snapshot_path = args.snapshot
- # ResNet101 with 3 outputs.
- # model = hopenet.Hopenet(torchvision.models.resnet.Bottleneck, [3, 4, 23, 3], 66)
- # ResNet50
- model = hopenet.Hopenet(torchvision.models.resnet.Bottleneck, [3, 4, 6, 3], 66, 0)
- # ResNet18
- # model = hopenet.Hopenet(torchvision.models.resnet.BasicBlock, [2, 2, 2, 2], 66)
+ # ResNet50 structure
+ model = hopenet.Hopenet(torchvision.models.resnet.Bottleneck, [3, 4, 6, 3], 66)
print 'Loading snapshot.'
# Load snapshot
@@ -64,18 +57,18 @@
transforms.CenterCrop(224), transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
- if args.dataset == 'AFLW2000':
- pose_dataset = datasets.AFLW2000(args.data_dir, args.filename_list,
- transformations)
- elif args.dataset == 'AFLW2000_ds':
- pose_dataset = datasets.AFLW2000_ds(args.data_dir, args.filename_list,
- transformations)
+ if args.dataset == 'Pose_300W_LP':
+ pose_dataset = datasets.Pose_300W_LP(args.data_dir, args.filename_list, transformations)
+ elif args.dataset == 'Pose_300W_LP_random_ds':
+ pose_dataset = datasets.Pose_300W_LP_random_ds(args.data_dir, args.filename_list, transformations)
+ elif args.dataset == 'AFLW2000':
+ pose_dataset = datasets.AFLW2000(args.data_dir, args.filename_list, transformations)
elif args.dataset == 'BIWI':
pose_dataset = datasets.BIWI(args.data_dir, args.filename_list, transformations)
elif args.dataset == 'AFLW':
pose_dataset = datasets.AFLW(args.data_dir, args.filename_list, transformations)
- elif args.dataset == 'Pose_300W_LP':
- pose_dataset = datasets.Pose_300W_LP(args.data_dir, args.filename_list, transformations)
+ elif args.dataset == 'AFLW_aug':
+ pose_dataset = datasets.AFLW_aug(args.data_dir, args.filename_list, transformations)
elif args.dataset == 'AFW':
pose_dataset = datasets.AFW(args.data_dir, args.filename_list, transformations)
else:
@@ -93,9 +86,6 @@
model.eval() # Change model to 'eval' mode (BN uses moving mean/var).
total = 0
- idx_tensor = [idx for idx in xrange(66)]
- idx_tensor = torch.FloatTensor(idx_tensor).cuda(gpu)
-
yaw_error = .0
pitch_error = .0
roll_error = .0
@@ -105,6 +95,7 @@
for i, (images, labels, cont_labels, name) in enumerate(test_loader):
images = Variable(images).cuda(gpu)
total += cont_labels.size(0)
+
label_yaw = cont_labels[:,0].float()
label_pitch = cont_labels[:,1].float()
label_roll = cont_labels[:,2].float()
diff --git a/code/train_alexnet.py b/code/train_alexnet.py
index 5f60211..9254ee7 100644
--- a/code/train_alexnet.py
+++ b/code/train_alexnet.py
@@ -1,4 +1,9 @@
+import sys, os, argparse, time
+
import numpy as np
+import cv2
+import matplotlib.pyplot as plt
+
import torch
import torch.nn as nn
from torch.autograd import Variable
@@ -8,17 +13,8 @@
import torch.backends.cudnn as cudnn
import torch.nn.functional as F
-import cv2
-import matplotlib.pyplot as plt
-import sys
-import os
-import argparse
-
-import datasets
-import hopenet
+import datasets, hopenet
import torch.utils.model_zoo as model_zoo
-
-import time
model_urls = {
'alexnet': 'https://download.pytorch.org/models/alexnet-owt-4df8aa71.pth',
@@ -43,16 +39,12 @@
parser.add_argument('--alpha', dest='alpha', help='Regression loss coefficient.',
default=0.001, type=float)
parser.add_argument('--dataset', dest='dataset', help='Dataset type.', default='Pose_300W_LP', type=str)
-
args = parser.parse_args()
return args
def get_ignored_params(model):
# Generator function that yields ignored params.
- b = []
- b.append(model.features[0])
- b.append(model.features[1])
- b.append(model.features[2])
+ b = [model.features[0], model.features[1], model.features[2]]
for i in range(len(b)):
for module_name, module in b[i].named_modules():
if 'bn' in module_name:
@@ -75,10 +67,7 @@
yield param
def get_fc_params(model):
- b = []
- b.append(model.fc_yaw)
- b.append(model.fc_pitch)
- b.append(model.fc_roll)
+ b = [model.fc_yaw, model.fc_pitch, model.fc_roll]
for i in range(len(b)):
for module_name, module in b[i].named_modules():
for name, param in module.named_parameters():
@@ -87,11 +76,8 @@
def load_filtered_state_dict(model, snapshot):
# By user apaszke from discuss.pytorch.org
model_dict = model.state_dict()
- # 1. filter out unnecessary keys
snapshot = {k: v for k, v in snapshot.items() if k in model_dict}
- # 2. overwrite entries in the existing state dict
model_dict.update(snapshot)
- # 3. load the new state dict
model.load_state_dict(model_dict)
if __name__ == '__main__':
@@ -116,6 +102,8 @@
if args.dataset == 'Pose_300W_LP':
pose_dataset = datasets.Pose_300W_LP(args.data_dir, args.filename_list, transformations)
+ elif args.dataset == 'Pose_300W_LP_random_ds':
+ pose_dataset = datasets.Pose_300W_LP_random_ds(args.data_dir, args.filename_list, transformations)
elif args.dataset == 'AFLW2000':
pose_dataset = datasets.AFLW2000(args.data_dir, args.filename_list, transformations)
elif args.dataset == 'BIWI':
@@ -141,48 +129,38 @@
# Regression loss coefficient
alpha = args.alpha
- idx_tensor = [idx for idx in xrange(66)]
- idx_tensor = Variable(torch.FloatTensor(idx_tensor)).cuda(gpu)
-
optimizer = torch.optim.Adam([{'params': get_ignored_params(model), 'lr': 0},
{'params': get_non_ignored_params(model), 'lr': args.lr},
{'params': get_fc_params(model), 'lr': args.lr * 5}],
lr = args.lr)
print 'Ready to train network.'
- print 'First phase of training.'
for epoch in range(num_epochs):
- # start = time.time()
for i, (images, labels, cont_labels, name) in enumerate(train_loader):
- # print i
- # print 'start: ', time.time() - start
images = Variable(images).cuda(gpu)
+
+ # Binned labels
label_yaw = Variable(labels[:,0]).cuda(gpu)
label_pitch = Variable(labels[:,1]).cuda(gpu)
label_roll = Variable(labels[:,2]).cuda(gpu)
- label_angles = Variable(cont_labels[:,:3]).cuda(gpu)
+ # Continuous labels
label_yaw_cont = Variable(cont_labels[:,0]).cuda(gpu)
label_pitch_cont = Variable(cont_labels[:,1]).cuda(gpu)
label_roll_cont = Variable(cont_labels[:,2]).cuda(gpu)
- optimizer.zero_grad()
- model.zero_grad()
+ # Forward pass
+ yaw, pitch, roll, angles = model(images)
- pre_yaw, pre_pitch, pre_roll = model(images)
# Cross entropy loss
- loss_yaw = criterion(pre_yaw, label_yaw)
- loss_pitch = criterion(pre_pitch, label_pitch)
- loss_roll = criterion(pre_roll, label_roll)
+ loss_yaw = criterion(yaw, label_yaw)
+ loss_pitch = criterion(pitch, label_pitch)
+ loss_roll = criterion(roll, label_roll)
# MSE loss
- yaw_predicted = softmax(pre_yaw)
- pitch_predicted = softmax(pre_pitch)
- roll_predicted = softmax(pre_roll)
-
- yaw_predicted = torch.sum(yaw_predicted * idx_tensor, 1) * 3 - 99
- pitch_predicted = torch.sum(pitch_predicted * idx_tensor, 1) * 3 - 99
- roll_predicted = torch.sum(roll_predicted * idx_tensor, 1) * 3 - 99
+ yaw_predicted = angles[:,0]
+ pitch_predicted = angles[:,1]
+ roll_predicted = angles[:,2]
loss_reg_yaw = reg_criterion(yaw_predicted, label_yaw_cont)
loss_reg_pitch = reg_criterion(pitch_predicted, label_pitch_cont)
@@ -195,17 +173,13 @@
loss_seq = [loss_yaw, loss_pitch, loss_roll]
grad_seq = [torch.Tensor(1).cuda(gpu) for _ in range(len(loss_seq))]
+ optimizer.zero_grad()
torch.autograd.backward(loss_seq, grad_seq)
optimizer.step()
-
- # print 'end: ', time.time() - start
if (i+1) % 100 == 0:
print ('Epoch [%d/%d], Iter [%d/%d] Losses: Yaw %.4f, Pitch %.4f, Roll %.4f'
%(epoch+1, num_epochs, i+1, len(pose_dataset)//batch_size, loss_yaw.data[0], loss_pitch.data[0], loss_roll.data[0]))
- # if epoch == 0:
- # torch.save(model.state_dict(),
- # 'output/snapshots/' + args.output_string + '_iter_'+ str(i+1) + '.pkl')
# Save models at numbered epochs.
if epoch % 1 == 0 and epoch < num_epochs:
diff --git a/code/train_preangles.py b/code/train_preangles.py
index ffceee2..1fe626c 100644
--- a/code/train_preangles.py
+++ b/code/train_preangles.py
@@ -1,4 +1,9 @@
+import sys, os, argparse, time
+
import numpy as np
+import cv2
+import matplotlib.pyplot as plt
+
import torch
import torch.nn as nn
from torch.autograd import Variable
@@ -8,25 +13,8 @@
import torch.backends.cudnn as cudnn
import torch.nn.functional as F
-import cv2
-import matplotlib.pyplot as plt
-import sys
-import os
-import argparse
-
-import datasets
-import hopenet
+import datasets, hopenet
import torch.utils.model_zoo as model_zoo
-
-import time
-
-model_urls = {
- 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
- 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
- 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
- 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
- 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
-}
def parse_args():
"""Parse input arguments."""
@@ -53,10 +41,7 @@
def get_ignored_params(model):
# Generator function that yields ignored params.
- b = []
- b.append(model.conv1)
- b.append(model.bn1)
- b.append(model.fc_finetune)
+ b = [model.conv1, model.bn1, model.fc_finetune]
for i in range(len(b)):
for module_name, module in b[i].named_modules():
if 'bn' in module_name:
@@ -66,11 +51,7 @@
def get_non_ignored_params(model):
# Generator function that yields params that will be optimized.
- b = []
- b.append(model.layer1)
- b.append(model.layer2)
- b.append(model.layer3)
- b.append(model.layer4)
+ b = [model.layer1, model.layer2, model.layer3, model.layer4]
for i in range(len(b)):
for module_name, module in b[i].named_modules():
if 'bn' in module_name:
@@ -79,10 +60,8 @@
yield param
def get_fc_params(model):
- b = []
- b.append(model.fc_yaw)
- b.append(model.fc_pitch)
- b.append(model.fc_roll)
+ # Generator function that yields fc layer params.
+ b = [model.fc_yaw, model.fc_pitch, model.fc_roll]
for i in range(len(b)):
for module_name, module in b[i].named_modules():
for name, param in module.named_parameters():
@@ -91,11 +70,8 @@
def load_filtered_state_dict(model, snapshot):
# By user apaszke from discuss.pytorch.org
model_dict = model.state_dict()
- # 1. filter out unnecessary keys
snapshot = {k: v for k, v in snapshot.items() if k in model_dict}
- # 2. overwrite entries in the existing state dict
model_dict.update(snapshot)
- # 3. load the new state dict
model.load_state_dict(model_dict)
if __name__ == '__main__':
@@ -109,13 +85,9 @@
if not os.path.exists('output/snapshots'):
os.makedirs('output/snapshots')
- # ResNet101 with 3 outputs
- # model = hopenet.Hopenet(torchvision.models.resnet.Bottleneck, [3, 4, 23, 3], 66)
- # ResNet50
- model = hopenet.Hopenet(torchvision.models.resnet.Bottleneck, [3, 4, 6, 3], 66, 0)
- # ResNet18
- # model = hopenet.Hopenet(torchvision.models.resnet.BasicBlock, [2, 2, 2, 2], 66)
- load_filtered_state_dict(model, model_zoo.load_url(model_urls['resnet50']))
+ # ResNet50 structure
+ model = hopenet.Hopenet(torchvision.models.resnet.Bottleneck, [3, 4, 6, 3], 66)
+ load_filtered_state_dict(model, model_zoo.load_url('https://download.pytorch.org/models/resnet50-19c8e357.pth'))
print 'Loading data.'
@@ -140,20 +112,17 @@
else:
print 'Error: not a valid dataset name'
sys.exit()
+
train_loader = torch.utils.data.DataLoader(dataset=pose_dataset,
batch_size=batch_size,
shuffle=True,
num_workers=2)
model.cuda(gpu)
- softmax = nn.Softmax().cuda(gpu)
criterion = nn.CrossEntropyLoss().cuda(gpu)
reg_criterion = nn.MSELoss().cuda(gpu)
# Regression loss coefficient
alpha = args.alpha
-
- idx_tensor = [idx for idx in xrange(66)]
- idx_tensor = Variable(torch.FloatTensor(idx_tensor)).cuda(gpu)
optimizer = torch.optim.Adam([{'params': get_ignored_params(model), 'lr': 0},
{'params': get_non_ignored_params(model), 'lr': args.lr},
@@ -161,39 +130,32 @@
lr = args.lr)
print 'Ready to train network.'
- print 'First phase of training.'
for epoch in range(num_epochs):
- # start = time.time()
for i, (images, labels, cont_labels, name) in enumerate(train_loader):
- # print i
- # print 'start: ', time.time() - start
images = Variable(images).cuda(gpu)
+
+ # Binned labels
label_yaw = Variable(labels[:,0]).cuda(gpu)
label_pitch = Variable(labels[:,1]).cuda(gpu)
label_roll = Variable(labels[:,2]).cuda(gpu)
- label_angles = Variable(cont_labels[:,:3]).cuda(gpu)
+ # Continuous labels
label_yaw_cont = Variable(cont_labels[:,0]).cuda(gpu)
label_pitch_cont = Variable(cont_labels[:,1]).cuda(gpu)
label_roll_cont = Variable(cont_labels[:,2]).cuda(gpu)
- optimizer.zero_grad()
- model.zero_grad()
+ # Forward pass
+ yaw, pitch, roll, angles = model(images)
- pre_yaw, pre_pitch, pre_roll, angles = model(images)
# Cross entropy loss
- loss_yaw = criterion(pre_yaw, label_yaw)
- loss_pitch = criterion(pre_pitch, label_pitch)
- loss_roll = criterion(pre_roll, label_roll)
+ loss_yaw = criterion(yaw, label_yaw)
+ loss_pitch = criterion(pitch, label_pitch)
+ loss_roll = criterion(roll, label_roll)
# MSE loss
- yaw_predicted = softmax(pre_yaw)
- pitch_predicted = softmax(pre_pitch)
- roll_predicted = softmax(pre_roll)
-
- yaw_predicted = torch.sum(yaw_predicted * idx_tensor, 1) * 3 - 99
- pitch_predicted = torch.sum(pitch_predicted * idx_tensor, 1) * 3 - 99
- roll_predicted = torch.sum(roll_predicted * idx_tensor, 1) * 3 - 99
+ yaw_predicted = angles[:,0]
+ pitch_predicted = angles[:,1]
+ roll_predicted = angles[:,2]
loss_reg_yaw = reg_criterion(yaw_predicted, label_yaw_cont)
loss_reg_pitch = reg_criterion(pitch_predicted, label_pitch_cont)
@@ -206,17 +168,13 @@
loss_seq = [loss_yaw, loss_pitch, loss_roll]
grad_seq = [torch.Tensor(1).cuda(gpu) for _ in range(len(loss_seq))]
+ optimizer.zero_grad()
torch.autograd.backward(loss_seq, grad_seq)
optimizer.step()
-
- # print 'end: ', time.time() - start
if (i+1) % 100 == 0:
print ('Epoch [%d/%d], Iter [%d/%d] Losses: Yaw %.4f, Pitch %.4f, Roll %.4f'
%(epoch+1, num_epochs, i+1, len(pose_dataset)//batch_size, loss_yaw.data[0], loss_pitch.data[0], loss_roll.data[0]))
- # if epoch == 0:
- # torch.save(model.state_dict(),
- # 'output/snapshots/' + args.output_string + '_iter_'+ str(i+1) + '.pkl')
# Save models at numbered epochs.
if epoch % 1 == 0 and epoch < num_epochs:
diff --git a/code/train_resnet50_regression.py b/code/train_resnet50_regression.py
index a67a6f2..04d27c7 100644
--- a/code/train_resnet50_regression.py
+++ b/code/train_resnet50_regression.py
@@ -1,4 +1,9 @@
+import sys, os, argparse, time
+
import numpy as np
+import cv2
+import matplotlib.pyplot as plt
+
import torch
import torch.nn as nn
from torch.autograd import Variable
@@ -8,25 +13,8 @@
import torch.backends.cudnn as cudnn
import torch.nn.functional as F
-import cv2
-import matplotlib.pyplot as plt
-import sys
-import os
-import argparse
-
-import datasets
-import hopenet
+import datasets, hopenet
import torch.utils.model_zoo as model_zoo
-
-import time
-
-model_urls = {
- 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
- 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
- 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
- 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
- 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
-}
def parse_args():
"""Parse input arguments."""
@@ -51,9 +39,7 @@
def get_ignored_params(model):
# Generator function that yields ignored params.
- b = []
- b.append(model.conv1)
- b.append(model.bn1)
+ b = [model.conv1, model.bn1]
for i in range(len(b)):
for module_name, module in b[i].named_modules():
if 'bn' in module_name:
@@ -63,11 +49,7 @@
def get_non_ignored_params(model):
# Generator function that yields params that will be optimized.
- b = []
- b.append(model.layer1)
- b.append(model.layer2)
- b.append(model.layer3)
- b.append(model.layer4)
+ b = [model.layer1, model.layer2, model.layer3, model.layer4]
for i in range(len(b)):
for module_name, module in b[i].named_modules():
if 'bn' in module_name:
@@ -76,8 +58,8 @@
yield param
def get_fc_params(model):
- b = []
- b.append(model.fc_angles)
+ # Generator function that yields fc layer params.
+ b = [model.fc_angles]
for i in range(len(b)):
for module_name, module in b[i].named_modules():
for name, param in module.named_parameters():
@@ -86,11 +68,8 @@
def load_filtered_state_dict(model, snapshot):
# By user apaszke from discuss.pytorch.org
model_dict = model.state_dict()
- # 1. filter out unnecessary keys
snapshot = {k: v for k, v in snapshot.items() if k in model_dict}
- # 2. overwrite entries in the existing state dict
model_dict.update(snapshot)
- # 3. load the new state dict
model.load_state_dict(model_dict)
if __name__ == '__main__':
@@ -106,8 +85,7 @@
# ResNet50
model = hopenet.ResNet(torchvision.models.resnet.Bottleneck, [3, 4, 6, 3], 3)
-
- load_filtered_state_dict(model, model_zoo.load_url(model_urls['resnet50']))
+ load_filtered_state_dict(model, model_zoo.load_url('https://download.pytorch.org/models/resnet50-19c8e357.pth'))
print 'Loading data.'
@@ -117,6 +95,8 @@
if args.dataset == 'Pose_300W_LP':
pose_dataset = datasets.Pose_300W_LP(args.data_dir, args.filename_list, transformations)
+ elif args.dataset == 'Pose_300W_LP_random_ds':
+ pose_dataset = datasets.Pose_300W_LP_random_ds(args.data_dir, args.filename_list, transformations)
elif args.dataset == 'AFLW2000':
pose_dataset = datasets.AFLW2000(args.data_dir, args.filename_list, transformations)
elif args.dataset == 'BIWI':
@@ -150,23 +130,16 @@
images = Variable(images).cuda(gpu)
label_angles = Variable(cont_labels[:,:3]).cuda(gpu)
-
- optimizer.zero_grad()
- model.zero_grad()
-
angles = model(images)
loss = criterion(angles, label_angles)
-
+ optimizer.zero_grad()
loss.backward()
optimizer.step()
if (i+1) % 100 == 0:
print ('Epoch [%d/%d], Iter [%d/%d] Loss: %.4f'
%(epoch+1, num_epochs, i+1, len(pose_dataset)//batch_size, loss.data[0]))
- # if epoch == 0:
- # torch.save(model.state_dict(),
- # 'output/snapshots/' + args.output_string + '_iter_'+ str(i+1) + '.pkl')
# Save models at numbered epochs.
if epoch % 1 == 0 and epoch < num_epochs:
diff --git a/code/vdsr.py b/code/vdsr.py
deleted file mode 100755
index 1c4f163..0000000
--- a/code/vdsr.py
+++ /dev/null
@@ -1,40 +0,0 @@
-import torch
-import torch.nn as nn
-from math import sqrt
-
-class Conv_ReLU_Block(nn.Module):
- def __init__(self):
- super(Conv_ReLU_Block, self).__init__()
- self.conv = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1, bias=False)
- self.relu = nn.ReLU(inplace=True)
-
- def forward(self, x):
- return self.relu(self.conv(x))
-
-class Net(nn.Module):
- def __init__(self):
- super(Net, self).__init__()
- self.residual_layer = self.make_layer(Conv_ReLU_Block, 18)
- self.input = nn.Conv2d(in_channels=1, out_channels=64, kernel_size=3, stride=1, padding=1, bias=False)
- self.output = nn.Conv2d(in_channels=64, out_channels=1, kernel_size=3, stride=1, padding=1, bias=False)
- self.relu = nn.ReLU(inplace=True)
-
- for m in self.modules():
- if isinstance(m, nn.Conv2d):
- n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
- m.weight.data.normal_(0, sqrt(2. / n))
-
- def make_layer(self, block, num_of_layer):
- layers = []
- for _ in range(num_of_layer):
- layers.append(block())
- return nn.Sequential(*layers)
-
- def forward(self, x):
- residual = x
- out = self.relu(self.input(x))
- out = self.residual_layer(out)
- out = self.output(out)
- out = torch.add(out,residual)
- return out
-
\ No newline at end of file
--
Gitblit v1.8.0