From 31fc66b795c0a57b8009d7b03f49f6cd099ceb29 Mon Sep 17 00:00:00 2001
From: natanielruiz <nataniel777@hotmail.com>
Date: 星期六, 23 九月 2017 12:07:48 +0800
Subject: [PATCH] Trying superres

---
 code/test_on_video.py |   44 +++++++++++++++++++++++++++++++++++++-------
 1 files changed, 37 insertions(+), 7 deletions(-)

diff --git a/code/test_on_video.py b/code/test_on_video.py
index 4fad440..c837775 100644
--- a/code/test_on_video.py
+++ b/code/test_on_video.py
@@ -45,8 +45,10 @@
     if not os.path.exists(args.video_path):
         sys.exit('Video does not exist')
 
-    # ResNet50 with 3 outputs.
-    model = hopenet.Hopenet(torchvision.models.resnet.Bottleneck, [3, 4, 6, 3], 66)
+    # ResNet101 with 3 outputs.
+    # model = hopenet.Hopenet(torchvision.models.resnet.Bottleneck, [3, 4, 23, 3], 66)
+    # ResNet50
+    model = hopenet.Hopenet(torchvision.models.resnet.Bottleneck, [3, 4, 6, 3], 66, 0)
     # ResNet18
     # model = hopenet.Hopenet(torchvision.models.resnet.BasicBlock, [2, 2, 2, 2], 66)
 
@@ -58,7 +60,8 @@
     print 'Loading data.'
 
     transformations = transforms.Compose([transforms.Scale(224),
-    transforms.RandomCrop(224), transforms.ToTensor()])
+    transforms.CenterCrop(224), transforms.ToTensor(),
+    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
 
     model.cuda(gpu)
 
@@ -72,12 +75,24 @@
     idx_tensor = torch.FloatTensor(idx_tensor).cuda(gpu)
 
     video = cv2.VideoCapture(video_path)
-    width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))   # float
-    height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)) # float
+
+    # New cv2
+    # width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))   # float
+    # height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)) # float
+    #
+    # # Define the codec and create VideoWriter object
+    # fourcc = cv2.VideoWriter_fourcc(*'MJPG')
+    # out = cv2.VideoWriter('output/video/output-%s.avi' % args.output_string, fourcc, 30.0, (width, height))
+
+    # Old cv2
+    width = int(video.get(cv2.cv.CV_CAP_PROP_FRAME_WIDTH))   # float
+    height = int(video.get(cv2.cv.CV_CAP_PROP_FRAME_HEIGHT)) # float
 
     # Define the codec and create VideoWriter object
-    fourcc = cv2.VideoWriter_fourcc(*'MJPG')
+    fourcc = cv2.cv.CV_FOURCC(*'MJPG')
     out = cv2.VideoWriter('output/video/output-%s.avi' % args.output_string, fourcc, 30.0, (width, height))
+
+    txt_out = open('output/video/output-%s.txt' % args.output_string, 'w')
 
     bbox_file = open(args.bboxes, 'r')
     frame_num = 1
@@ -95,6 +110,7 @@
             out.release()
             video.release()
             bbox_file.close()
+            txt_out.close()
             sys.exit(0)
 
         # Save all frames as they are if they don't have bbox annotation.
@@ -104,6 +120,7 @@
                 out.release()
                 video.release()
                 bbox_file.close()
+                txt_out.close()
                 sys.exit(0)
             out.write(frame)
             frame_num += 1
@@ -113,9 +130,18 @@
             out.release()
             video.release()
             bbox_file.close()
+            txt_out.close()
             sys.exit(0)
 
         x_min, y_min, x_max, y_max = int(line[1]), int(line[2]), int(line[3]), int(line[4])
+        x_min -= 150
+        x_max += 150
+        y_min -= 250
+        y_max += 100
+        x_min = max(x_min, 0)
+        y_min = max(y_min, 0)
+        x_max = min(frame.shape[1], x_max)
+        y_max = min(frame.shape[0], y_max)
         # Crop image
         img = frame[y_min:y_max,x_min:x_max]
         img = Image.fromarray(img)
@@ -125,7 +151,7 @@
         img_shape = img.size()
         img = img.view(1, img_shape[0], img_shape[1], img_shape[2])
         img = Variable(img).cuda(gpu)
-        yaw, pitch, roll = model(img)
+        yaw, pitch, roll, angles = model(img)
 
         yaw_predicted = F.softmax(yaw)
         pitch_predicted = F.softmax(pitch)
@@ -136,7 +162,10 @@
         roll_predicted = torch.sum(roll_predicted.data[0] * idx_tensor) * 3 - 99
 
         # Print new frame with cube and TODO: axis
+        txt_out.write(str(frame_num) + ' %f %f %f\n' % (yaw_predicted, pitch_predicted, roll_predicted))
         utils.plot_pose_cube(frame, yaw_predicted, pitch_predicted, roll_predicted, (x_min + x_max) / 2, (y_min + y_max) / 2, size = 200)
+        # Plot expanded bounding box
+        cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), (0,255,0), 3)
         out.write(frame)
 
         frame_num += 1
@@ -147,6 +176,7 @@
             out.release()
             video.release()
             bbox_file.close()
+            txt_out.close()
             sys.exit(0)
         out.write(frame)
         frame_num += 1

--
Gitblit v1.8.0