From 9a02f63f4d5692399a95cb889e8f7629a165c28e Mon Sep 17 00:00:00 2001
From: natanielruiz <nataniel777@hotmail.com>
Date: 星期四, 21 九月 2017 05:56:20 +0800
Subject: [PATCH] next
---
code/datasets.py | 19 ++---
code/hopenet.py | 85 ----------------------------
code/train_preangles.py | 36 ++++++------
code/test_preangles.py | 2
4 files changed, 28 insertions(+), 114 deletions(-)
diff --git a/code/datasets.py b/code/datasets.py
index 4007369..5825105 100644
--- a/code/datasets.py
+++ b/code/datasets.py
@@ -38,7 +38,9 @@
x_max = max(pt2d[0,:])
y_max = max(pt2d[1,:])
- k = 0.35
+ # k = 0.35 was being used beforehand
+ # k = 0.2 to 0.40
+ k = np.random.random_sample() * 0.2 + 0.2
x_min -= 0.6 * k * abs(x_max - x_min)
y_min -= 2 * k * abs(y_max - y_min)
x_max += 0.6 * k * abs(x_max - x_min)
@@ -59,15 +61,10 @@
roll = -roll
img = img.transpose(Image.FLIP_LEFT_RIGHT)
- # Rotate?
- # rnd = np.random.random_sample()
- # if rnd < 0.5:
- # if roll >= 0:
- # img = img.rotate(30)
- # roll -= 30
- # else:
- # img = img.rotate(-30)
- # roll += 30
+ # Blur?
+ rnd = np.random.random_sample()
+ if rnd < 0.05:
+ img = img.filter(ImageFilter.BLUR)
# Bin values
bins = np.array(range(-99, 102, 3))
@@ -82,7 +79,7 @@
if self.transform is not None:
img = self.transform(img)
- return img, labels, cont_labels, elf.X_train[index]
+ return img, labels, cont_labels, self.X_train[index]
def __len__(self):
# 122,450
diff --git a/code/hopenet.py b/code/hopenet.py
index 4aa0dfb..b02beec 100644
--- a/code/hopenet.py
+++ b/code/hopenet.py
@@ -125,88 +125,3 @@
angles.append(self.fc_finetune(torch.cat((preangles, x), 1)))
return pre_yaw, pre_pitch, pre_roll, angles
-
-class Hopenet_shape(nn.Module):
- # This is just Hopenet with 3 output layers for yaw, pitch and roll.
- def __init__(self, block, layers, num_bins, shape_bins):
- self.inplanes = 64
- super(Hopenet_shape, self).__init__()
- self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
- bias=False)
- self.bn1 = nn.BatchNorm2d(64)
- self.relu = nn.ReLU(inplace=True)
- self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
- self.layer1 = self._make_layer(block, 64, layers[0])
- self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
- self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
- self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
- self.avgpool = nn.AvgPool2d(7)
- self.fc_yaw = nn.Linear(512 * block.expansion, num_bins)
- self.fc_pitch = nn.Linear(512 * block.expansion, num_bins)
- self.fc_roll = nn.Linear(512 * block.expansion, num_bins)
- self.fc_shape_0 = nn.Linear(512 * block.expansion, shape_bins)
- self.fc_shape_1 = nn.Linear(512 * block.expansion, shape_bins)
- self.fc_shape_2 = nn.Linear(512 * block.expansion, shape_bins)
- self.fc_shape_3 = nn.Linear(512 * block.expansion, shape_bins)
- self.fc_shape_4 = nn.Linear(512 * block.expansion, shape_bins)
- self.fc_shape_5 = nn.Linear(512 * block.expansion, shape_bins)
- self.fc_shape_6 = nn.Linear(512 * block.expansion, shape_bins)
- self.fc_shape_7 = nn.Linear(512 * block.expansion, shape_bins)
- self.fc_shape_8 = nn.Linear(512 * block.expansion, shape_bins)
- self.fc_shape_9 = nn.Linear(512 * block.expansion, shape_bins)
-
- for m in self.modules():
- if isinstance(m, nn.Conv2d):
- n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
- m.weight.data.normal_(0, math.sqrt(2. / n))
- elif isinstance(m, nn.BatchNorm2d):
- m.weight.data.fill_(1)
- m.bias.data.zero_()
-
- def _make_layer(self, block, planes, blocks, stride=1):
- downsample = None
- if stride != 1 or self.inplanes != planes * block.expansion:
- downsample = nn.Sequential(
- nn.Conv2d(self.inplanes, planes * block.expansion,
- kernel_size=1, stride=stride, bias=False),
- nn.BatchNorm2d(planes * block.expansion),
- )
-
- layers = []
- layers.append(block(self.inplanes, planes, stride, downsample))
- self.inplanes = planes * block.expansion
- for i in range(1, blocks):
- layers.append(block(self.inplanes, planes))
-
- return nn.Sequential(*layers)
-
- def forward(self, x):
- x = self.conv1(x)
- x = self.bn1(x)
- x = self.relu(x)
- x = self.maxpool(x)
-
- x = self.layer1(x)
- x = self.layer2(x)
- x = self.layer3(x)
- x = self.layer4(x)
-
- x = self.avgpool(x)
- x = x.view(x.size(0), -1)
- yaw = self.fc_yaw(x)
- pitch = self.fc_pitch(x)
- roll = self.fc_roll(x)
-
- shape = []
- shape.append(self.fc_shape_0(x))
- shape.append(self.fc_shape_1(x))
- shape.append(self.fc_shape_2(x))
- shape.append(self.fc_shape_3(x))
- shape.append(self.fc_shape_4(x))
- shape.append(self.fc_shape_5(x))
- shape.append(self.fc_shape_6(x))
- shape.append(self.fc_shape_7(x))
- shape.append(self.fc_shape_8(x))
- shape.append(self.fc_shape_9(x))
-
- return yaw, pitch, roll, shape
diff --git a/code/test_preangles.py b/code/test_preangles.py
index 2cf251e..08561fb 100644
--- a/code/test_preangles.py
+++ b/code/test_preangles.py
@@ -71,6 +71,8 @@
pose_dataset = datasets.BIWI(args.data_dir, args.filename_list, transformations)
elif args.dataset == 'AFLW':
pose_dataset = datasets.AFLW(args.data_dir, args.filename_list, transformations)
+ elif args.dataset == 'Pose_300W_LP':
+ pose_dataset = datasets.Pose_300W_LP(args.data_dir, args.filename_list, transformations)
elif args.dataset == 'AFW':
pose_dataset = datasets.AFW(args.data_dir, args.filename_list, transformations)
else:
diff --git a/code/train_preangles.py b/code/train_preangles.py
index afb98c8..6622d3f 100644
--- a/code/train_preangles.py
+++ b/code/train_preangles.py
@@ -18,6 +18,8 @@
import hopenet
import torch.utils.model_zoo as model_zoo
+import time
+
model_urls = {
'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
@@ -32,8 +34,6 @@
parser.add_argument('--gpu', dest='gpu_id', help='GPU device id to use [0]',
default=0, type=int)
parser.add_argument('--num_epochs', dest='num_epochs', help='Maximum number of training epochs.',
- default=5, type=int)
- parser.add_argument('--num_epochs_ft', dest='num_epochs_ft', help='Maximum number of finetuning epochs.',
default=5, type=int)
parser.add_argument('--batch_size', dest='batch_size', help='Batch size.',
default=16, type=int)
@@ -103,7 +103,6 @@
cudnn.enabled = True
num_epochs = args.num_epochs
- num_epochs_ft = args.num_epochs_ft
batch_size = args.batch_size
gpu = args.gpu_id
@@ -123,7 +122,6 @@
transformations = transforms.Compose([transforms.Scale(240),
transforms.RandomCrop(224), transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
-
if args.dataset == 'Pose_300W_LP':
pose_dataset = datasets.Pose_300W_LP(args.data_dir, args.filename_list, transformations)
@@ -146,9 +144,9 @@
num_workers=2)
model.cuda(gpu)
- softmax = nn.Softmax()
- criterion = nn.CrossEntropyLoss().cuda()
- reg_criterion = nn.MSELoss().cuda()
+ softmax = nn.Softmax().cuda(gpu)
+ criterion = nn.CrossEntropyLoss().cuda(gpu)
+ reg_criterion = nn.MSELoss().cuda(gpu)
# Regression loss coefficient
alpha = args.alpha
@@ -161,25 +159,26 @@
lr = args.lr)
print 'Ready to train network.'
-
print 'First phase of training.'
for epoch in range(num_epochs):
+ start = time.time()
for i, (images, labels, cont_labels, name) in enumerate(train_loader):
- images = Variable(images.cuda(gpu))
- label_yaw = Variable(labels[:,0].cuda(gpu))
- label_pitch = Variable(labels[:,1].cuda(gpu))
- label_roll = Variable(labels[:,2].cuda(gpu))
+ print i
+ print 'start: ', time.time() - start
+ images = Variable(images).cuda(gpu)
+ label_yaw = Variable(labels[:,0]).cuda(gpu)
+ label_pitch = Variable(labels[:,1]).cuda(gpu)
+ label_roll = Variable(labels[:,2]).cuda(gpu)
- label_angles = Variable(cont_labels[:,:3].cuda(gpu))
- label_yaw_cont = Variable(cont_labels[:,0].cuda(gpu))
- label_pitch_cont = Variable(cont_labels[:,1].cuda(gpu))
- label_roll_cont = Variable(cont_labels[:,2].cuda(gpu))
+ label_angles = Variable(cont_labels[:,:3]).cuda(gpu)
+ label_yaw_cont = Variable(cont_labels[:,0]).cuda(gpu)
+ label_pitch_cont = Variable(cont_labels[:,1]).cuda(gpu)
+ label_roll_cont = Variable(cont_labels[:,2]).cuda(gpu)
optimizer.zero_grad()
model.zero_grad()
pre_yaw, pre_pitch, pre_roll, angles = model(images)
-
# Cross entropy loss
loss_yaw = criterion(pre_yaw, label_yaw)
loss_pitch = criterion(pre_pitch, label_pitch)
@@ -198,7 +197,6 @@
loss_reg_pitch = reg_criterion(pitch_predicted, label_pitch_cont)
loss_reg_roll = reg_criterion(roll_predicted, label_roll_cont)
- # print yaw_predicted, label_yaw.float(), loss_reg_yaw
# Total loss
loss_yaw += alpha * loss_reg_yaw
loss_pitch += alpha * loss_reg_pitch
@@ -209,6 +207,8 @@
torch.autograd.backward(loss_seq, grad_seq)
optimizer.step()
+ print 'end: ', time.time() - start
+
if (i+1) % 100 == 0:
print ('Epoch [%d/%d], Iter [%d/%d] Losses: Yaw %.4f, Pitch %.4f, Roll %.4f'
%(epoch+1, num_epochs, i+1, len(pose_dataset)//batch_size, loss_yaw.data[0], loss_pitch.data[0], loss_roll.data[0]))
--
Gitblit v1.8.0