From 9a02f63f4d5692399a95cb889e8f7629a165c28e Mon Sep 17 00:00:00 2001 From: natanielruiz <nataniel777@hotmail.com> Date: 星期四, 21 九月 2017 05:56:20 +0800 Subject: [PATCH] next --- code/datasets.py | 19 ++--- code/hopenet.py | 85 ---------------------------- code/train_preangles.py | 36 ++++++------ code/test_preangles.py | 2 4 files changed, 28 insertions(+), 114 deletions(-) diff --git a/code/datasets.py b/code/datasets.py index 4007369..5825105 100644 --- a/code/datasets.py +++ b/code/datasets.py @@ -38,7 +38,9 @@ x_max = max(pt2d[0,:]) y_max = max(pt2d[1,:]) - k = 0.35 + # k = 0.35 was being used beforehand + # k = 0.2 to 0.40 + k = np.random.random_sample() * 0.2 + 0.2 x_min -= 0.6 * k * abs(x_max - x_min) y_min -= 2 * k * abs(y_max - y_min) x_max += 0.6 * k * abs(x_max - x_min) @@ -59,15 +61,10 @@ roll = -roll img = img.transpose(Image.FLIP_LEFT_RIGHT) - # Rotate? - # rnd = np.random.random_sample() - # if rnd < 0.5: - # if roll >= 0: - # img = img.rotate(30) - # roll -= 30 - # else: - # img = img.rotate(-30) - # roll += 30 + # Blur? + rnd = np.random.random_sample() + if rnd < 0.05: + img = img.filter(ImageFilter.BLUR) # Bin values bins = np.array(range(-99, 102, 3)) @@ -82,7 +79,7 @@ if self.transform is not None: img = self.transform(img) - return img, labels, cont_labels, elf.X_train[index] + return img, labels, cont_labels, self.X_train[index] def __len__(self): # 122,450 diff --git a/code/hopenet.py b/code/hopenet.py index 4aa0dfb..b02beec 100644 --- a/code/hopenet.py +++ b/code/hopenet.py @@ -125,88 +125,3 @@ angles.append(self.fc_finetune(torch.cat((preangles, x), 1))) return pre_yaw, pre_pitch, pre_roll, angles - -class Hopenet_shape(nn.Module): - # This is just Hopenet with 3 output layers for yaw, pitch and roll. - def __init__(self, block, layers, num_bins, shape_bins): - self.inplanes = 64 - super(Hopenet_shape, self).__init__() - self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, - bias=False) - self.bn1 = nn.BatchNorm2d(64) - self.relu = nn.ReLU(inplace=True) - self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) - self.layer1 = self._make_layer(block, 64, layers[0]) - self.layer2 = self._make_layer(block, 128, layers[1], stride=2) - self.layer3 = self._make_layer(block, 256, layers[2], stride=2) - self.layer4 = self._make_layer(block, 512, layers[3], stride=2) - self.avgpool = nn.AvgPool2d(7) - self.fc_yaw = nn.Linear(512 * block.expansion, num_bins) - self.fc_pitch = nn.Linear(512 * block.expansion, num_bins) - self.fc_roll = nn.Linear(512 * block.expansion, num_bins) - self.fc_shape_0 = nn.Linear(512 * block.expansion, shape_bins) - self.fc_shape_1 = nn.Linear(512 * block.expansion, shape_bins) - self.fc_shape_2 = nn.Linear(512 * block.expansion, shape_bins) - self.fc_shape_3 = nn.Linear(512 * block.expansion, shape_bins) - self.fc_shape_4 = nn.Linear(512 * block.expansion, shape_bins) - self.fc_shape_5 = nn.Linear(512 * block.expansion, shape_bins) - self.fc_shape_6 = nn.Linear(512 * block.expansion, shape_bins) - self.fc_shape_7 = nn.Linear(512 * block.expansion, shape_bins) - self.fc_shape_8 = nn.Linear(512 * block.expansion, shape_bins) - self.fc_shape_9 = nn.Linear(512 * block.expansion, shape_bins) - - for m in self.modules(): - if isinstance(m, nn.Conv2d): - n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels - m.weight.data.normal_(0, math.sqrt(2. / n)) - elif isinstance(m, nn.BatchNorm2d): - m.weight.data.fill_(1) - m.bias.data.zero_() - - def _make_layer(self, block, planes, blocks, stride=1): - downsample = None - if stride != 1 or self.inplanes != planes * block.expansion: - downsample = nn.Sequential( - nn.Conv2d(self.inplanes, planes * block.expansion, - kernel_size=1, stride=stride, bias=False), - nn.BatchNorm2d(planes * block.expansion), - ) - - layers = [] - layers.append(block(self.inplanes, planes, stride, downsample)) - self.inplanes = planes * block.expansion - for i in range(1, blocks): - layers.append(block(self.inplanes, planes)) - - return nn.Sequential(*layers) - - def forward(self, x): - x = self.conv1(x) - x = self.bn1(x) - x = self.relu(x) - x = self.maxpool(x) - - x = self.layer1(x) - x = self.layer2(x) - x = self.layer3(x) - x = self.layer4(x) - - x = self.avgpool(x) - x = x.view(x.size(0), -1) - yaw = self.fc_yaw(x) - pitch = self.fc_pitch(x) - roll = self.fc_roll(x) - - shape = [] - shape.append(self.fc_shape_0(x)) - shape.append(self.fc_shape_1(x)) - shape.append(self.fc_shape_2(x)) - shape.append(self.fc_shape_3(x)) - shape.append(self.fc_shape_4(x)) - shape.append(self.fc_shape_5(x)) - shape.append(self.fc_shape_6(x)) - shape.append(self.fc_shape_7(x)) - shape.append(self.fc_shape_8(x)) - shape.append(self.fc_shape_9(x)) - - return yaw, pitch, roll, shape diff --git a/code/test_preangles.py b/code/test_preangles.py index 2cf251e..08561fb 100644 --- a/code/test_preangles.py +++ b/code/test_preangles.py @@ -71,6 +71,8 @@ pose_dataset = datasets.BIWI(args.data_dir, args.filename_list, transformations) elif args.dataset == 'AFLW': pose_dataset = datasets.AFLW(args.data_dir, args.filename_list, transformations) + elif args.dataset == 'Pose_300W_LP': + pose_dataset = datasets.Pose_300W_LP(args.data_dir, args.filename_list, transformations) elif args.dataset == 'AFW': pose_dataset = datasets.AFW(args.data_dir, args.filename_list, transformations) else: diff --git a/code/train_preangles.py b/code/train_preangles.py index afb98c8..6622d3f 100644 --- a/code/train_preangles.py +++ b/code/train_preangles.py @@ -18,6 +18,8 @@ import hopenet import torch.utils.model_zoo as model_zoo +import time + model_urls = { 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', @@ -32,8 +34,6 @@ parser.add_argument('--gpu', dest='gpu_id', help='GPU device id to use [0]', default=0, type=int) parser.add_argument('--num_epochs', dest='num_epochs', help='Maximum number of training epochs.', - default=5, type=int) - parser.add_argument('--num_epochs_ft', dest='num_epochs_ft', help='Maximum number of finetuning epochs.', default=5, type=int) parser.add_argument('--batch_size', dest='batch_size', help='Batch size.', default=16, type=int) @@ -103,7 +103,6 @@ cudnn.enabled = True num_epochs = args.num_epochs - num_epochs_ft = args.num_epochs_ft batch_size = args.batch_size gpu = args.gpu_id @@ -123,7 +122,6 @@ transformations = transforms.Compose([transforms.Scale(240), transforms.RandomCrop(224), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])]) - if args.dataset == 'Pose_300W_LP': pose_dataset = datasets.Pose_300W_LP(args.data_dir, args.filename_list, transformations) @@ -146,9 +144,9 @@ num_workers=2) model.cuda(gpu) - softmax = nn.Softmax() - criterion = nn.CrossEntropyLoss().cuda() - reg_criterion = nn.MSELoss().cuda() + softmax = nn.Softmax().cuda(gpu) + criterion = nn.CrossEntropyLoss().cuda(gpu) + reg_criterion = nn.MSELoss().cuda(gpu) # Regression loss coefficient alpha = args.alpha @@ -161,25 +159,26 @@ lr = args.lr) print 'Ready to train network.' - print 'First phase of training.' for epoch in range(num_epochs): + start = time.time() for i, (images, labels, cont_labels, name) in enumerate(train_loader): - images = Variable(images.cuda(gpu)) - label_yaw = Variable(labels[:,0].cuda(gpu)) - label_pitch = Variable(labels[:,1].cuda(gpu)) - label_roll = Variable(labels[:,2].cuda(gpu)) + print i + print 'start: ', time.time() - start + images = Variable(images).cuda(gpu) + label_yaw = Variable(labels[:,0]).cuda(gpu) + label_pitch = Variable(labels[:,1]).cuda(gpu) + label_roll = Variable(labels[:,2]).cuda(gpu) - label_angles = Variable(cont_labels[:,:3].cuda(gpu)) - label_yaw_cont = Variable(cont_labels[:,0].cuda(gpu)) - label_pitch_cont = Variable(cont_labels[:,1].cuda(gpu)) - label_roll_cont = Variable(cont_labels[:,2].cuda(gpu)) + label_angles = Variable(cont_labels[:,:3]).cuda(gpu) + label_yaw_cont = Variable(cont_labels[:,0]).cuda(gpu) + label_pitch_cont = Variable(cont_labels[:,1]).cuda(gpu) + label_roll_cont = Variable(cont_labels[:,2]).cuda(gpu) optimizer.zero_grad() model.zero_grad() pre_yaw, pre_pitch, pre_roll, angles = model(images) - # Cross entropy loss loss_yaw = criterion(pre_yaw, label_yaw) loss_pitch = criterion(pre_pitch, label_pitch) @@ -198,7 +197,6 @@ loss_reg_pitch = reg_criterion(pitch_predicted, label_pitch_cont) loss_reg_roll = reg_criterion(roll_predicted, label_roll_cont) - # print yaw_predicted, label_yaw.float(), loss_reg_yaw # Total loss loss_yaw += alpha * loss_reg_yaw loss_pitch += alpha * loss_reg_pitch @@ -209,6 +207,8 @@ torch.autograd.backward(loss_seq, grad_seq) optimizer.step() + print 'end: ', time.time() - start + if (i+1) % 100 == 0: print ('Epoch [%d/%d], Iter [%d/%d] Losses: Yaw %.4f, Pitch %.4f, Roll %.4f' %(epoch+1, num_epochs, i+1, len(pose_dataset)//batch_size, loss_yaw.data[0], loss_pitch.data[0], loss_roll.data[0])) -- Gitblit v1.8.0