From 93855b2faf8b795d0058c217ee980d435f23227d Mon Sep 17 00:00:00 2001
From: natanielruiz <nataniel777@hotmail.com>
Date: 星期四, 14 九月 2017 08:54:14 +0800
Subject: [PATCH] Training on AFLW with different yaw loss multipliers

---
 code/train.py                                                         |   24 ++++
 practice/.ipynb_checkpoints/create_filtered_datasets-checkpoint.ipynb |   61 ++++++++++++
 practice/remove_KEPLER_test_split.ipynb                               |   10 +-
 code/datasets.py                                                      |    1 
 code/hopenet.py                                                       |    6 
 code/train_AFLW_preangles.py                                          |    2 
 practice/create_filtered_datasets.ipynb                               |   10 +-
 code/batch_testing.py                                                 |    6 
 code/train_preangles.py                                               |   22 +++
 code/test.py                                                          |    7 
 code/test_res.py                                                      |  140 ++++++++++++++++++++++++++++
 11 files changed, 260 insertions(+), 29 deletions(-)

diff --git a/code/batch_testing.py b/code/batch_testing.py
index db688a9..58d3b30 100644
--- a/code/batch_testing.py
+++ b/code/batch_testing.py
@@ -123,9 +123,9 @@
             label_roll = labels[:,2].float()
 
             pre_yaw, pre_pitch, pre_roll, angles = model(images)
-            yaw = angles[args.iter_ref][:,0].cpu().data
-            pitch = angles[args.iter_ref][:,1].cpu().data
-            roll = angles[args.iter_ref][:,2].cpu().data
+            yaw = angles[-1][:,0].cpu().data
+            pitch = angles[-1][:,1].cpu().data
+            roll = angles[-1][:,2].cpu().data
 
             # Mean absolute error
             yaw_error += torch.sum(torch.abs(yaw - label_yaw) * 3)
diff --git a/code/datasets.py b/code/datasets.py
index 6ed209b..589da5c 100644
--- a/code/datasets.py
+++ b/code/datasets.py
@@ -174,7 +174,6 @@
         pitch = pose[1] * 180 / np.pi
         roll = pose[2] * 180 / np.pi
         # Something weird with the roll in AFLW
-        # if yaw < 0:
         roll *= -1
         # Bin values
         bins = np.array(range(-99, 102, 3))
diff --git a/code/hopenet.py b/code/hopenet.py
index d122243..4aa0dfb 100644
--- a/code/hopenet.py
+++ b/code/hopenet.py
@@ -117,10 +117,12 @@
         pitch = pitch.view(pitch.size(0), 1)
         roll = roll.view(roll.size(0), 1)
         angles = []
-        angles.append(torch.cat([yaw, pitch, roll], 1))
+        preangles = torch.cat([yaw, pitch, roll], 1)
+        angles.append(preangles)
 
+        # angles predicts the residual
         for idx in xrange(self.iter_ref):
-            angles.append(self.fc_finetune(torch.cat((angles[idx], x), 1)))
+            angles.append(self.fc_finetune(torch.cat((preangles, x), 1)))
 
         return pre_yaw, pre_pitch, pre_roll, angles
 
diff --git a/code/test.py b/code/test.py
index 7f76714..9ff35e6 100644
--- a/code/test.py
+++ b/code/test.py
@@ -110,12 +110,11 @@
         label_roll = labels[:,2].float()
 
         pre_yaw, pre_pitch, pre_roll, angles = model(images)
-        yaw = angles[args.iter_ref][:,0].cpu().data
-        pitch = angles[args.iter_ref][:,1].cpu().data
-        roll = angles[args.iter_ref][:,2].cpu().data
+        yaw = angles[-1][:,0].cpu().data
+        pitch = angles[-1][:,1].cpu().data
+        roll = angles[-1][:,2].cpu().data
 
         # Mean absolute error
-        print yaw.numpy(), label_yaw.numpy()
         yaw_error += torch.sum(torch.abs(yaw - label_yaw) * 3)
         pitch_error += torch.sum(torch.abs(pitch - label_pitch) * 3)
         roll_error += torch.sum(torch.abs(roll - label_roll) * 3)
diff --git a/code/test_res.py b/code/test_res.py
new file mode 100644
index 0000000..124ad4d
--- /dev/null
+++ b/code/test_res.py
@@ -0,0 +1,140 @@
+import numpy as np
+import torch
+import torch.nn as nn
+from torch.autograd import Variable
+from torch.utils.data import DataLoader
+from torchvision import transforms
+import torch.backends.cudnn as cudnn
+import torchvision
+import torch.nn.functional as F
+
+import cv2
+import matplotlib.pyplot as plt
+import sys
+import os
+import argparse
+
+import datasets
+import hopenet
+import utils
+
+def parse_args():
+    """Parse input arguments."""
+    parser = argparse.ArgumentParser(description='Head pose estimation using the Hopenet network.')
+    parser.add_argument('--gpu', dest='gpu_id', help='GPU device id to use [0]',
+            default=0, type=int)
+    parser.add_argument('--data_dir', dest='data_dir', help='Directory path for data.',
+          default='', type=str)
+    parser.add_argument('--filename_list', dest='filename_list', help='Path to text file containing relative paths for every example.',
+          default='', type=str)
+    parser.add_argument('--snapshot', dest='snapshot', help='Path of model snapshot.',
+          default='', type=str)
+    parser.add_argument('--batch_size', dest='batch_size', help='Batch size.',
+          default=1, type=int)
+    parser.add_argument('--save_viz', dest='save_viz', help='Save images with pose cube.',
+          default=False, type=bool)
+    parser.add_argument('--iter_ref', dest='iter_ref', default=1, type=int)
+    parser.add_argument('--dataset', dest='dataset', help='Dataset type.', default='AFLW2000', type=str)
+
+    args = parser.parse_args()
+
+    return args
+
+if __name__ == '__main__':
+    args = parse_args()
+
+    cudnn.enabled = True
+    gpu = args.gpu_id
+    snapshot_path = args.snapshot
+
+    # ResNet101 with 3 outputs.
+    # model = hopenet.Hopenet(torchvision.models.resnet.Bottleneck, [3, 4, 23, 3], 66)
+    # ResNet50
+    model = hopenet.Hopenet(torchvision.models.resnet.Bottleneck, [3, 4, 6, 3], 66, args.iter_ref)
+    # ResNet18
+    # model = hopenet.Hopenet(torchvision.models.resnet.BasicBlock, [2, 2, 2, 2], 66)
+
+    print 'Loading snapshot.'
+    # Load snapshot
+    saved_state_dict = torch.load(snapshot_path)
+    model.load_state_dict(saved_state_dict)
+
+    print 'Loading data.'
+
+    transformations = transforms.Compose([transforms.Scale(224),
+    transforms.CenterCrop(224), transforms.ToTensor(),
+    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
+
+    if args.dataset == 'AFLW2000':
+        pose_dataset = datasets.AFLW2000(args.data_dir, args.filename_list,
+                                transformations)
+    elif args.dataset == 'BIWI':
+        pose_dataset = datasets.BIWI(args.data_dir, args.filename_list, transformations)
+    elif args.dataset == 'AFLW':
+        pose_dataset = datasets.AFLW(args.data_dir, args.filename_list, transformations)
+    elif args.dataset == 'AFW':
+        pose_dataset = datasets.AFW(args.data_dir, args.filename_list, transformations)
+    else:
+        print 'Error: not a valid dataset name'
+        sys.exit()
+    test_loader = torch.utils.data.DataLoader(dataset=pose_dataset,
+                                               batch_size=args.batch_size,
+                                               num_workers=2)
+
+    model.cuda(gpu)
+
+    print 'Ready to test network.'
+
+    # Test the Model
+    model.eval()  # Change model to 'eval' mode (BN uses moving mean/var).
+    total = 0
+    yaw_error = .0
+    pitch_error = .0
+    roll_error = .0
+
+    l1loss = torch.nn.L1Loss(size_average=False)
+
+    for i, (images, labels, name) in enumerate(test_loader):
+        images = Variable(images).cuda(gpu)
+        total += labels.size(0)
+        label_yaw = labels[:,0].float()
+        label_pitch = labels[:,1].float()
+        label_roll = labels[:,2].float()
+
+        pre_yaw, pre_pitch, pre_roll, angles = model(images)
+        yaw = angles[0][:,0].cpu().data
+        pitch = angles[0][:,1].cpu().data
+        roll = angles[0][:,2].cpu().data
+
+        for idx in xrange(1,args.iter_ref+1):
+            yaw += angles[idx][:,0].cpu().data
+            pitch += angles[idx][:,1].cpu().data
+            roll += angles[idx][:,2].cpu().data
+
+        # Mean absolute error
+        yaw_error += torch.sum(torch.abs(yaw - label_yaw) * 3)
+        pitch_error += torch.sum(torch.abs(pitch - label_pitch) * 3)
+        roll_error += torch.sum(torch.abs(roll - label_roll) * 3)
+
+        # Save images with pose cube.
+        # TODO: fix for larger batch size
+        if args.save_viz:
+            name = name[0]
+            if args.dataset == 'BIWI':
+                cv2_img = cv2.imread(os.path.join(args.data_dir, name + '_rgb.png'))
+            else:
+                cv2_img = cv2.imread(os.path.join(args.data_dir, name + '.jpg'))
+
+            if args.batch_size == 1:
+                error_string = 'y %.4f, p %.4f, r %.4f' % (torch.sum(torch.abs(yaw - label_yaw) * 3), torch.sum(torch.abs(pitch - label_pitch) * 3), torch.sum(torch.abs(roll - label_roll) * 3))
+                cv2_img = cv2.putText(cv2_img, error_string, (30, cv2_img.shape[0]- 30), fontFace=1, fontScale=2, color=(0,255,0), thickness=2)
+            utils.plot_pose_cube(cv2_img, yaw[0] * 3 - 99, pitch[0] * 3 - 99, roll[0] * 3 - 99)
+            cv2.imwrite(os.path.join('output/images', name + '.jpg'), cv2_img)
+
+    print('Test error in degrees of the model on the ' + str(total) +
+    ' test images. Yaw: %.4f, Pitch: %.4f, Roll: %.4f' % (yaw_error / total,
+    pitch_error / total, roll_error / total))
+
+    # Binned accuracy
+    # for idx in xrange(len(yaw_correct)):
+    #     print yaw_correct[idx] / total, pitch_correct[idx] / total, roll_correct[idx] / total
diff --git a/code/train.py b/code/train.py
index 6e1ae5b..03d5cf5 100644
--- a/code/train.py
+++ b/code/train.py
@@ -48,6 +48,7 @@
           default=0.001, type=float)
     parser.add_argument('--iter_ref', dest='iter_ref', help='Number of iterative refinement passes.',
           default=1, type=int)
+    parser.add_argument('--dataset', dest='dataset', help='Dataset type.', default='Pose_300W_LP', type=str)
     args = parser.parse_args()
     return args
 
@@ -124,8 +125,19 @@
     transforms.RandomCrop(224), transforms.ToTensor(),
     transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
 
-    pose_dataset = datasets.Pose_300W_LP(args.data_dir, args.filename_list,
-                                transformations)
+    if args.dataset == 'Pose_300W_LP':
+        pose_dataset = datasets.Pose_300W_LP(args.data_dir, args.filename_list, transformations)
+    elif args.dataset == 'AFLW2000':
+        pose_dataset = datasets.AFLW2000(args.data_dir, args.filename_list, transformations)
+    elif args.dataset == 'BIWI':
+        pose_dataset = datasets.BIWI(args.data_dir, args.filename_list, transformations)
+    elif args.dataset == 'AFLW':
+        pose_dataset = datasets.AFLW(args.data_dir, args.filename_list, transformations)
+    elif args.dataset == 'AFW':
+        pose_dataset = datasets.AFW(args.data_dir, args.filename_list, transformations)
+    else:
+        print 'Error: not a valid dataset name'
+        sys.exit()
     train_loader = torch.utils.data.DataLoader(dataset=pose_dataset,
                                                batch_size=batch_size,
                                                shuffle=True,
@@ -239,10 +251,14 @@
             loss_pitch += alpha * loss_reg_pitch
             loss_roll += alpha * loss_reg_roll
 
+            loss_yaw *= 0.35
+
             # Finetuning loss
             loss_seq = [loss_yaw, loss_pitch, loss_roll]
-            for idx in xrange(args.iter_ref+1):
-                loss_angles = reg_criterion(angles[idx], label_angles.float())
+            for idx in xrange(1,len(angles)):
+                label_angles_residuals = label_angles.float() - angles[0]
+                label_angles_residuals = label_angles_residuals.detach()
+                loss_angles = reg_criterion(angles[idx], label_angles_residuals)
                 loss_seq.append(loss_angles)
 
             grad_seq = [torch.Tensor(1).cuda(gpu) for _ in range(len(loss_seq))]
diff --git a/code/train_AFLW_preangles.py b/code/train_AFLW_preangles.py
index ede3439..b12149c 100644
--- a/code/train_AFLW_preangles.py
+++ b/code/train_AFLW_preangles.py
@@ -111,7 +111,7 @@
     # ResNet101 with 3 outputs
     # model = hopenet.Hopenet(torchvision.models.resnet.Bottleneck, [3, 4, 23, 3], 66)
     # ResNet50
-    model = hopenet.Hopenet(torchvision.models.resnet.Bottleneck, [3, 4, 6, 3], 66)
+    model = hopenet.Hopenet(torchvision.models.resnet.Bottleneck, [3, 4, 6, 3], 66, 0)
     # ResNet18
     # model = hopenet.Hopenet(torchvision.models.resnet.BasicBlock, [2, 2, 2, 2], 66)
     load_filtered_state_dict(model, model_zoo.load_url(model_urls['resnet50']))
diff --git a/code/train_preangles.py b/code/train_preangles.py
index 3179c24..5f23b25 100644
--- a/code/train_preangles.py
+++ b/code/train_preangles.py
@@ -46,6 +46,8 @@
     parser.add_argument('--output_string', dest='output_string', help='String appended to output snapshots.', default = '', type=str)
     parser.add_argument('--alpha', dest='alpha', help='Regression loss coefficient.',
           default=0.001, type=float)
+    parser.add_argument('--dataset', dest='dataset', help='Dataset type.', default='Pose_300W_LP', type=str)
+
     args = parser.parse_args()
     return args
 
@@ -111,7 +113,7 @@
     # ResNet101 with 3 outputs
     # model = hopenet.Hopenet(torchvision.models.resnet.Bottleneck, [3, 4, 23, 3], 66)
     # ResNet50
-    model = hopenet.Hopenet(torchvision.models.resnet.Bottleneck, [3, 4, 6, 3], 66)
+    model = hopenet.Hopenet(torchvision.models.resnet.Bottleneck, [3, 4, 6, 3], 66, 0)
     # ResNet18
     # model = hopenet.Hopenet(torchvision.models.resnet.BasicBlock, [2, 2, 2, 2], 66)
     load_filtered_state_dict(model, model_zoo.load_url(model_urls['resnet50']))
@@ -122,8 +124,20 @@
     transforms.RandomCrop(224), transforms.ToTensor(),
     transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
 
-    pose_dataset = datasets.Pose_300W_LP(args.data_dir, args.filename_list,
-                                transformations)
+
+    if args.dataset == 'Pose_300W_LP':
+        pose_dataset = datasets.Pose_300W_LP(args.data_dir, args.filename_list, transformations)
+    elif args.dataset == 'AFLW2000':
+        pose_dataset = datasets.AFLW2000(args.data_dir, args.filename_list, transformations)
+    elif args.dataset == 'BIWI':
+        pose_dataset = datasets.BIWI(args.data_dir, args.filename_list, transformations)
+    elif args.dataset == 'AFLW':
+        pose_dataset = datasets.AFLW(args.data_dir, args.filename_list, transformations)
+    elif args.dataset == 'AFW':
+        pose_dataset = datasets.AFW(args.data_dir, args.filename_list, transformations)
+    else:
+        print 'Error: not a valid dataset name'
+        sys.exit()
     train_loader = torch.utils.data.DataLoader(dataset=pose_dataset,
                                                batch_size=batch_size,
                                                shuffle=True,
@@ -183,6 +197,8 @@
             loss_pitch += alpha * loss_reg_pitch
             loss_roll += alpha * loss_reg_roll
 
+            loss_yaw *= 0.35
+
             loss_seq = [loss_yaw, loss_pitch, loss_roll]
             # loss_seq = [loss_reg_yaw, loss_reg_pitch, loss_reg_roll]
             grad_seq = [torch.Tensor(1).cuda(gpu) for _ in range(len(loss_seq))]
diff --git a/practice/.ipynb_checkpoints/create_filtered_datasets-checkpoint.ipynb b/practice/.ipynb_checkpoints/create_filtered_datasets-checkpoint.ipynb
index f564f74..970dcc7 100644
--- a/practice/.ipynb_checkpoints/create_filtered_datasets-checkpoint.ipynb
+++ b/practice/.ipynb_checkpoints/create_filtered_datasets-checkpoint.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 6,
    "metadata": {
     "collapsed": true
    },
@@ -93,6 +93,65 @@
     "            \n",
     "        print counter"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": [
+    "AFLW = '/Data/nruiz9/data/facial_landmarks/AFLW/aflw_cropped_loose/'\n",
+    "filenames = '/Data/nruiz9/data/facial_landmarks/AFLW/aflw_cropped_loose/filename_list.txt'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "601\n"
+     ]
+    }
+   ],
+   "source": [
+    "fid = open(filenames, 'r')\n",
+    "out = open(os.path.join(AFLW, 'filename_list_filtered.txt'), 'wb')\n",
+    "counter = 0\n",
+    "for line in fid:\n",
+    "    original_line = line\n",
+    "    line = line.strip('\\n')\n",
+    "    if not os.path.exists(os.path.join(AFLW, line + '.txt')):\n",
+    "        counter += 1\n",
+    "        continue\n",
+    "    annot_file = open(os.path.join(AFLW, line + '.txt'))\n",
+    "    annot = annot_file.readline().strip('\\n').split(' ')\n",
+    "    yaw = float(annot[1]) * 180 / np.pi\n",
+    "    pitch = float(annot[2]) * 180 / np.pi\n",
+    "    roll = float(annot[3]) * 180 / np.pi\n",
+    "    if abs(pitch) > 89 or abs(yaw) > 89 or abs(roll) > 89:\n",
+    "        counter += 1\n",
+    "        continue\n",
+    "    out.write(original_line)\n",
+    "\n",
+    "print counter    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": true
+   },
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
diff --git a/practice/create_filtered_datasets.ipynb b/practice/create_filtered_datasets.ipynb
index e5bbf42..a6210e9 100644
--- a/practice/create_filtered_datasets.ipynb
+++ b/practice/create_filtered_datasets.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 6,
    "metadata": {
     "collapsed": true
    },
@@ -96,7 +96,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 19,
    "metadata": {
     "collapsed": true
    },
@@ -108,7 +108,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 20,
    "metadata": {
     "collapsed": false
    },
@@ -117,7 +117,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "243\n"
+      "289\n"
      ]
     }
    ],
@@ -136,7 +136,7 @@
     "    yaw = float(annot[1]) * 180 / np.pi\n",
     "    pitch = float(annot[2]) * 180 / np.pi\n",
     "    roll = float(annot[3]) * 180 / np.pi\n",
-    "    if abs(pitch) > 99 or abs(yaw) > 99 or abs(roll) > 99:\n",
+    "    if abs(pitch) > 98 or abs(yaw) > 98 or abs(roll) > 98:\n",
     "        counter += 1\n",
     "        continue\n",
     "    out.write(original_line)\n",
diff --git a/practice/remove_KEPLER_test_split.ipynb b/practice/remove_KEPLER_test_split.ipynb
index 239e56a..6898b20 100644
--- a/practice/remove_KEPLER_test_split.ipynb
+++ b/practice/remove_KEPLER_test_split.ipynb
@@ -11,7 +11,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 27,
    "metadata": {
     "collapsed": true
    },
@@ -23,7 +23,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 34,
    "metadata": {
     "collapsed": true
    },
@@ -35,7 +35,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 35,
    "metadata": {
     "collapsed": false
    },
@@ -60,7 +60,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 36,
    "metadata": {
     "collapsed": false
    },
@@ -69,7 +69,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "954 19842\n"
+      "943 19537\n"
      ]
     }
    ],

--
Gitblit v1.8.0