From 9f2fa4a5fa2af40503c05379c9d5318e87581c7b Mon Sep 17 00:00:00 2001
From: natanielruiz <nataniel777@hotmail.com>
Date: 星期五, 08 十二月 2017 02:10:14 +0800
Subject: [PATCH] Removed shape loading which fixes a bug with loading AFLW2000 and 300W-LP for training and testing.
---
code/test_on_video.py | 144 +++++++++++++++++++++++++++++-------------------
1 files changed, 87 insertions(+), 57 deletions(-)
diff --git a/code/test_on_video.py b/code/test_on_video.py
index 20dfaac..7c51348 100644
--- a/code/test_on_video.py
+++ b/code/test_on_video.py
@@ -1,4 +1,9 @@
+import sys, os, argparse
+
import numpy as np
+import cv2
+import matplotlib.pyplot as plt
+
import torch
import torch.nn as nn
from torch.autograd import Variable
@@ -8,10 +13,6 @@
import torchvision
import torch.nn.functional as F
from PIL import Image
-
-import cv2
-import matplotlib.pyplot as plt
-import sys, os, argparse
import datasets, hopenet, utils
@@ -25,6 +26,8 @@
parser.add_argument('--video', dest='video_path', help='Path of video')
parser.add_argument('--bboxes', dest='bboxes', help='Bounding box annotations of frames')
parser.add_argument('--output_string', dest='output_string', help='String appended to output file')
+ parser.add_argument('--n_frames', dest='n_frames', help='Number of frames', type=int)
+ parser.add_argument('--fps', dest='fps', help='Frames per second of source video', type=float, default=30.)
args = parser.parse_args()
return args
@@ -45,12 +48,8 @@
if not os.path.exists(args.video_path):
sys.exit('Video does not exist')
- # ResNet101 with 3 outputs.
- # model = hopenet.Hopenet(torchvision.models.resnet.Bottleneck, [3, 4, 23, 3], 66)
- # ResNet50
+ # ResNet50 structure
model = hopenet.Hopenet(torchvision.models.resnet.Bottleneck, [3, 4, 6, 3], 66)
- # ResNet18
- # model = hopenet.Hopenet(torchvision.models.resnet.BasicBlock, [2, 2, 2, 2], 66)
print 'Loading snapshot.'
# Load snapshot
@@ -60,7 +59,8 @@
print 'Loading data.'
transformations = transforms.Compose([transforms.Scale(224),
- transforms.RandomCrop(224), transforms.ToTensor()])
+ transforms.CenterCrop(224), transforms.ToTensor(),
+ transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
model.cuda(gpu)
@@ -74,20 +74,33 @@
idx_tensor = torch.FloatTensor(idx_tensor).cuda(gpu)
video = cv2.VideoCapture(video_path)
+
+ # New cv2
width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH)) # float
height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)) # float
# Define the codec and create VideoWriter object
fourcc = cv2.VideoWriter_fourcc(*'MJPG')
- out = cv2.VideoWriter('output/video/output-%s.avi' % args.output_string, fourcc, 30.0, (width, height))
+ out = cv2.VideoWriter('output/video/output-%s.avi' % args.output_string, fourcc, args.fps, (width, height))
+
+ # # Old cv2
+ # width = int(video.get(cv2.cv.CV_CAP_PROP_FRAME_WIDTH)) # float
+ # height = int(video.get(cv2.cv.CV_CAP_PROP_FRAME_HEIGHT)) # float
+ #
+ # # Define the codec and create VideoWriter object
+ # fourcc = cv2.cv.CV_FOURCC(*'MJPG')
+ # out = cv2.VideoWriter('output/video/output-%s.avi' % args.output_string, fourcc, 30.0, (width, height))
txt_out = open('output/video/output-%s.txt' % args.output_string, 'w')
- bbox_file = open(args.bboxes, 'r')
frame_num = 1
- # TODO: support for several bounding boxes
- for line in bbox_file:
+ with open(args.bboxes, 'r') as f:
+ bbox_line_list = f.read().splitlines()
+
+ idx = 0
+ while idx < len(bbox_line_list):
+ line = bbox_line_list[idx]
line = line.strip('\n')
line = line.split(' ')
det_frame_num = int(line[0])
@@ -95,12 +108,8 @@
print frame_num
# Stop at a certain frame number
- if frame_num > 10000:
- out.release()
- video.release()
- bbox_file.close()
- txt_out.close()
- sys.exit(0)
+ if frame_num > args.n_frames:
+ break
# Save all frames as they are if they don't have bbox annotation.
while frame_num < det_frame_num:
@@ -108,54 +117,75 @@
if ret == False:
out.release()
video.release()
- bbox_file.close()
txt_out.close()
sys.exit(0)
- out.write(frame)
+ # out.write(frame)
frame_num += 1
+ # Start processing frame with bounding box
ret,frame = video.read()
if ret == False:
- out.release()
- video.release()
- bbox_file.close()
- txt_out.close()
- sys.exit(0)
+ break
+ cv2_frame = cv2.cvtColor(frame,cv2.COLOR_BGR2RGB)
- x_min, y_min, x_max, y_max = int(line[1]), int(line[2]), int(line[3]), int(line[4])
- # Crop image
- img = frame[y_min:y_max,x_min:x_max]
- img = Image.fromarray(img)
+ while True:
+ x_min, y_min, x_max, y_max = int(float(line[1])), int(float(line[2])), int(float(line[3])), int(float(line[4]))
- # Transform
- img = transformations(img)
- img_shape = img.size()
- img = img.view(1, img_shape[0], img_shape[1], img_shape[2])
- img = Variable(img).cuda(gpu)
- yaw, pitch, roll = model(img)
+ bbox_width = abs(x_max - x_min)
+ bbox_height = abs(y_max - y_min)
+ # x_min -= 3 * bbox_width / 4
+ # x_max += 3 * bbox_width / 4
+ # y_min -= 3 * bbox_height / 4
+ # y_max += bbox_height / 4
+ x_min -= 50
+ x_max += 50
+ y_min -= 50
+ y_max += 30
+ x_min = max(x_min, 0)
+ y_min = max(y_min, 0)
+ x_max = min(frame.shape[1], x_max)
+ y_max = min(frame.shape[0], y_max)
+ # Crop face loosely
+ img = cv2_frame[y_min:y_max,x_min:x_max]
+ img = Image.fromarray(img)
- yaw_predicted = F.softmax(yaw)
- pitch_predicted = F.softmax(pitch)
- roll_predicted = F.softmax(roll)
- # Get continuous predictions in degrees.
- yaw_predicted = torch.sum(yaw_predicted.data[0] * idx_tensor) * 3 - 99
- pitch_predicted = torch.sum(pitch_predicted.data[0] * idx_tensor) * 3 - 99
- roll_predicted = torch.sum(roll_predicted.data[0] * idx_tensor) * 3 - 99
+ # Transform
+ img = transformations(img)
+ img_shape = img.size()
+ img = img.view(1, img_shape[0], img_shape[1], img_shape[2])
+ img = Variable(img).cuda(gpu)
- # Print new frame with cube and TODO: axis
- txt_out.write(str(frame_num) + ' %f %f %f\n' % (yaw_predicted, pitch_predicted, roll_predicted))
- utils.plot_pose_cube(frame, yaw_predicted, pitch_predicted, roll_predicted, (x_min + x_max) / 2, (y_min + y_max) / 2, size = 200)
- out.write(frame)
+ yaw, pitch, roll = model(img)
- frame_num += 1
+ yaw_predicted = F.softmax(yaw)
+ pitch_predicted = F.softmax(pitch)
+ roll_predicted = F.softmax(roll)
+ # Get continuous predictions in degrees.
+ yaw_predicted = torch.sum(yaw_predicted.data[0] * idx_tensor) * 3 - 99
+ pitch_predicted = torch.sum(pitch_predicted.data[0] * idx_tensor) * 3 - 99
+ roll_predicted = torch.sum(roll_predicted.data[0] * idx_tensor) * 3 - 99
- while True:
- ret, frame = video.read()
- if ret == False:
- out.release()
- video.release()
- bbox_file.close()
- txt_out.close()
- sys.exit(0)
+ # Print new frame with cube and axis
+ txt_out.write(str(frame_num) + ' %f %f %f\n' % (yaw_predicted, pitch_predicted, roll_predicted))
+ # utils.plot_pose_cube(frame, yaw_predicted, pitch_predicted, roll_predicted, (x_min + x_max) / 2, (y_min + y_max) / 2, size = bbox_width)
+ utils.draw_axis(frame, yaw_predicted, pitch_predicted, roll_predicted, tdx = (x_min + x_max) / 2, tdy= (y_min + y_max) / 2, size = bbox_height/2)
+ # Plot expanded bounding box
+ # cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), (0,255,0), 1)
+
+ # Peek next frame detection
+ next_frame_num = int(bbox_line_list[idx+1].strip('\n').split(' ')[0])
+ # print 'next_frame_num ', next_frame_num
+ if next_frame_num == det_frame_num:
+ idx += 1
+ line = bbox_line_list[idx].strip('\n').split(' ')
+ det_frame_num = int(line[0])
+ else:
+ break
+
+ idx += 1
out.write(frame)
frame_num += 1
+
+ out.release()
+ video.release()
+ txt_out.close()
--
Gitblit v1.8.0