From 9a02f63f4d5692399a95cb889e8f7629a165c28e Mon Sep 17 00:00:00 2001
From: natanielruiz <nataniel777@hotmail.com>
Date: 星期四, 21 九月 2017 05:56:20 +0800
Subject: [PATCH] next

---
 code/datasets.py        |   19 ++---
 code/hopenet.py         |   85 ----------------------------
 code/train_preangles.py |   36 ++++++------
 code/test_preangles.py  |    2 
 4 files changed, 28 insertions(+), 114 deletions(-)

diff --git a/code/datasets.py b/code/datasets.py
index 4007369..5825105 100644
--- a/code/datasets.py
+++ b/code/datasets.py
@@ -38,7 +38,9 @@
         x_max = max(pt2d[0,:])
         y_max = max(pt2d[1,:])
 
-        k = 0.35
+        # k = 0.35 was being used beforehand
+        # k = 0.2 to 0.40
+        k = np.random.random_sample() * 0.2 + 0.2
         x_min -= 0.6 * k * abs(x_max - x_min)
         y_min -= 2 * k * abs(y_max - y_min)
         x_max += 0.6 * k * abs(x_max - x_min)
@@ -59,15 +61,10 @@
             roll = -roll
             img = img.transpose(Image.FLIP_LEFT_RIGHT)
 
-        # Rotate?
-        # rnd = np.random.random_sample()
-        # if rnd < 0.5:
-        #     if roll >= 0:
-        #         img = img.rotate(30)
-        #         roll -= 30
-        #     else:
-        #         img = img.rotate(-30)
-        #         roll += 30
+        # Blur?
+        rnd = np.random.random_sample()
+        if rnd < 0.05:
+            img = img.filter(ImageFilter.BLUR)
 
         # Bin values
         bins = np.array(range(-99, 102, 3))
@@ -82,7 +79,7 @@
         if self.transform is not None:
             img = self.transform(img)
 
-        return img, labels, cont_labels, elf.X_train[index]
+        return img, labels, cont_labels, self.X_train[index]
 
     def __len__(self):
         # 122,450
diff --git a/code/hopenet.py b/code/hopenet.py
index 4aa0dfb..b02beec 100644
--- a/code/hopenet.py
+++ b/code/hopenet.py
@@ -125,88 +125,3 @@
             angles.append(self.fc_finetune(torch.cat((preangles, x), 1)))
 
         return pre_yaw, pre_pitch, pre_roll, angles
-
-class Hopenet_shape(nn.Module):
-    # This is just Hopenet with 3 output layers for yaw, pitch and roll.
-    def __init__(self, block, layers, num_bins, shape_bins):
-        self.inplanes = 64
-        super(Hopenet_shape, self).__init__()
-        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3,
-                               bias=False)
-        self.bn1 = nn.BatchNorm2d(64)
-        self.relu = nn.ReLU(inplace=True)
-        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
-        self.layer1 = self._make_layer(block, 64, layers[0])
-        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
-        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
-        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
-        self.avgpool = nn.AvgPool2d(7)
-        self.fc_yaw = nn.Linear(512 * block.expansion, num_bins)
-        self.fc_pitch = nn.Linear(512 * block.expansion, num_bins)
-        self.fc_roll = nn.Linear(512 * block.expansion, num_bins)
-        self.fc_shape_0 = nn.Linear(512 * block.expansion, shape_bins)
-        self.fc_shape_1 = nn.Linear(512 * block.expansion, shape_bins)
-        self.fc_shape_2 = nn.Linear(512 * block.expansion, shape_bins)
-        self.fc_shape_3 = nn.Linear(512 * block.expansion, shape_bins)
-        self.fc_shape_4 = nn.Linear(512 * block.expansion, shape_bins)
-        self.fc_shape_5 = nn.Linear(512 * block.expansion, shape_bins)
-        self.fc_shape_6 = nn.Linear(512 * block.expansion, shape_bins)
-        self.fc_shape_7 = nn.Linear(512 * block.expansion, shape_bins)
-        self.fc_shape_8 = nn.Linear(512 * block.expansion, shape_bins)
-        self.fc_shape_9 = nn.Linear(512 * block.expansion, shape_bins)
-
-        for m in self.modules():
-            if isinstance(m, nn.Conv2d):
-                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
-                m.weight.data.normal_(0, math.sqrt(2. / n))
-            elif isinstance(m, nn.BatchNorm2d):
-                m.weight.data.fill_(1)
-                m.bias.data.zero_()
-
-    def _make_layer(self, block, planes, blocks, stride=1):
-        downsample = None
-        if stride != 1 or self.inplanes != planes * block.expansion:
-            downsample = nn.Sequential(
-                nn.Conv2d(self.inplanes, planes * block.expansion,
-                          kernel_size=1, stride=stride, bias=False),
-                nn.BatchNorm2d(planes * block.expansion),
-            )
-
-        layers = []
-        layers.append(block(self.inplanes, planes, stride, downsample))
-        self.inplanes = planes * block.expansion
-        for i in range(1, blocks):
-            layers.append(block(self.inplanes, planes))
-
-        return nn.Sequential(*layers)
-
-    def forward(self, x):
-        x = self.conv1(x)
-        x = self.bn1(x)
-        x = self.relu(x)
-        x = self.maxpool(x)
-
-        x = self.layer1(x)
-        x = self.layer2(x)
-        x = self.layer3(x)
-        x = self.layer4(x)
-
-        x = self.avgpool(x)
-        x = x.view(x.size(0), -1)
-        yaw = self.fc_yaw(x)
-        pitch = self.fc_pitch(x)
-        roll = self.fc_roll(x)
-
-        shape = []
-        shape.append(self.fc_shape_0(x))
-        shape.append(self.fc_shape_1(x))
-        shape.append(self.fc_shape_2(x))
-        shape.append(self.fc_shape_3(x))
-        shape.append(self.fc_shape_4(x))
-        shape.append(self.fc_shape_5(x))
-        shape.append(self.fc_shape_6(x))
-        shape.append(self.fc_shape_7(x))
-        shape.append(self.fc_shape_8(x))
-        shape.append(self.fc_shape_9(x))
-
-        return yaw, pitch, roll, shape
diff --git a/code/test_preangles.py b/code/test_preangles.py
index 2cf251e..08561fb 100644
--- a/code/test_preangles.py
+++ b/code/test_preangles.py
@@ -71,6 +71,8 @@
         pose_dataset = datasets.BIWI(args.data_dir, args.filename_list, transformations)
     elif args.dataset == 'AFLW':
         pose_dataset = datasets.AFLW(args.data_dir, args.filename_list, transformations)
+    elif args.dataset == 'Pose_300W_LP':
+        pose_dataset = datasets.Pose_300W_LP(args.data_dir, args.filename_list, transformations)
     elif args.dataset == 'AFW':
         pose_dataset = datasets.AFW(args.data_dir, args.filename_list, transformations)
     else:
diff --git a/code/train_preangles.py b/code/train_preangles.py
index afb98c8..6622d3f 100644
--- a/code/train_preangles.py
+++ b/code/train_preangles.py
@@ -18,6 +18,8 @@
 import hopenet
 import torch.utils.model_zoo as model_zoo
 
+import time
+
 model_urls = {
     'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
     'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
@@ -32,8 +34,6 @@
     parser.add_argument('--gpu', dest='gpu_id', help='GPU device id to use [0]',
             default=0, type=int)
     parser.add_argument('--num_epochs', dest='num_epochs', help='Maximum number of training epochs.',
-          default=5, type=int)
-    parser.add_argument('--num_epochs_ft', dest='num_epochs_ft', help='Maximum number of finetuning epochs.',
           default=5, type=int)
     parser.add_argument('--batch_size', dest='batch_size', help='Batch size.',
           default=16, type=int)
@@ -103,7 +103,6 @@
 
     cudnn.enabled = True
     num_epochs = args.num_epochs
-    num_epochs_ft = args.num_epochs_ft
     batch_size = args.batch_size
     gpu = args.gpu_id
 
@@ -123,7 +122,6 @@
     transformations = transforms.Compose([transforms.Scale(240),
     transforms.RandomCrop(224), transforms.ToTensor(),
     transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
-
 
     if args.dataset == 'Pose_300W_LP':
         pose_dataset = datasets.Pose_300W_LP(args.data_dir, args.filename_list, transformations)
@@ -146,9 +144,9 @@
                                                num_workers=2)
 
     model.cuda(gpu)
-    softmax = nn.Softmax()
-    criterion = nn.CrossEntropyLoss().cuda()
-    reg_criterion = nn.MSELoss().cuda()
+    softmax = nn.Softmax().cuda(gpu)
+    criterion = nn.CrossEntropyLoss().cuda(gpu)
+    reg_criterion = nn.MSELoss().cuda(gpu)
     # Regression loss coefficient
     alpha = args.alpha
 
@@ -161,25 +159,26 @@
                                    lr = args.lr)
 
     print 'Ready to train network.'
-
     print 'First phase of training.'
     for epoch in range(num_epochs):
+        start = time.time()
         for i, (images, labels, cont_labels, name) in enumerate(train_loader):
-            images = Variable(images.cuda(gpu))
-            label_yaw = Variable(labels[:,0].cuda(gpu))
-            label_pitch = Variable(labels[:,1].cuda(gpu))
-            label_roll = Variable(labels[:,2].cuda(gpu))
+            print i
+            print 'start: ', time.time() - start
+            images = Variable(images).cuda(gpu)
+            label_yaw = Variable(labels[:,0]).cuda(gpu)
+            label_pitch = Variable(labels[:,1]).cuda(gpu)
+            label_roll = Variable(labels[:,2]).cuda(gpu)
 
-            label_angles = Variable(cont_labels[:,:3].cuda(gpu))
-            label_yaw_cont = Variable(cont_labels[:,0].cuda(gpu))
-            label_pitch_cont = Variable(cont_labels[:,1].cuda(gpu))
-            label_roll_cont = Variable(cont_labels[:,2].cuda(gpu))
+            label_angles = Variable(cont_labels[:,:3]).cuda(gpu)
+            label_yaw_cont = Variable(cont_labels[:,0]).cuda(gpu)
+            label_pitch_cont = Variable(cont_labels[:,1]).cuda(gpu)
+            label_roll_cont = Variable(cont_labels[:,2]).cuda(gpu)
 
             optimizer.zero_grad()
             model.zero_grad()
 
             pre_yaw, pre_pitch, pre_roll, angles = model(images)
-
             # Cross entropy loss
             loss_yaw = criterion(pre_yaw, label_yaw)
             loss_pitch = criterion(pre_pitch, label_pitch)
@@ -198,7 +197,6 @@
             loss_reg_pitch = reg_criterion(pitch_predicted, label_pitch_cont)
             loss_reg_roll = reg_criterion(roll_predicted, label_roll_cont)
 
-            # print yaw_predicted, label_yaw.float(), loss_reg_yaw
             # Total loss
             loss_yaw += alpha * loss_reg_yaw
             loss_pitch += alpha * loss_reg_pitch
@@ -209,6 +207,8 @@
             torch.autograd.backward(loss_seq, grad_seq)
             optimizer.step()
 
+            print 'end: ', time.time() - start
+
             if (i+1) % 100 == 0:
                 print ('Epoch [%d/%d], Iter [%d/%d] Losses: Yaw %.4f, Pitch %.4f, Roll %.4f'
                        %(epoch+1, num_epochs, i+1, len(pose_dataset)//batch_size, loss_yaw.data[0], loss_pitch.data[0], loss_roll.data[0]))

--
Gitblit v1.8.0