From f415df3448622f30c3d1eb680596871672b38dac Mon Sep 17 00:00:00 2001
From: natanielruiz <nataniel777@hotmail.com>
Date: 星期四, 26 十月 2017 02:51:37 +0800
Subject: [PATCH] after fg

---
 code/test_on_video.py |  143 +++++++++++++++++++++++++----------------------
 1 files changed, 75 insertions(+), 68 deletions(-)

diff --git a/code/test_on_video.py b/code/test_on_video.py
index c837775..38fe798 100644
--- a/code/test_on_video.py
+++ b/code/test_on_video.py
@@ -25,6 +25,7 @@
     parser.add_argument('--video', dest='video_path', help='Path of video')
     parser.add_argument('--bboxes', dest='bboxes', help='Bounding box annotations of frames')
     parser.add_argument('--output_string', dest='output_string', help='String appended to output file')
+    parser.add_argument('--n_frames', dest='n_frames', help='Number of frames', type=int)
     args = parser.parse_args()
     return args
 
@@ -49,8 +50,6 @@
     # model = hopenet.Hopenet(torchvision.models.resnet.Bottleneck, [3, 4, 23, 3], 66)
     # ResNet50
     model = hopenet.Hopenet(torchvision.models.resnet.Bottleneck, [3, 4, 6, 3], 66, 0)
-    # ResNet18
-    # model = hopenet.Hopenet(torchvision.models.resnet.BasicBlock, [2, 2, 2, 2], 66)
 
     print 'Loading snapshot.'
     # Load snapshot
@@ -77,28 +76,31 @@
     video = cv2.VideoCapture(video_path)
 
     # New cv2
-    # width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))   # float
-    # height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)) # float
-    #
-    # # Define the codec and create VideoWriter object
-    # fourcc = cv2.VideoWriter_fourcc(*'MJPG')
-    # out = cv2.VideoWriter('output/video/output-%s.avi' % args.output_string, fourcc, 30.0, (width, height))
-
-    # Old cv2
-    width = int(video.get(cv2.cv.CV_CAP_PROP_FRAME_WIDTH))   # float
-    height = int(video.get(cv2.cv.CV_CAP_PROP_FRAME_HEIGHT)) # float
+    width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))   # float
+    height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)) # float
 
     # Define the codec and create VideoWriter object
-    fourcc = cv2.cv.CV_FOURCC(*'MJPG')
+    fourcc = cv2.VideoWriter_fourcc(*'MJPG')
     out = cv2.VideoWriter('output/video/output-%s.avi' % args.output_string, fourcc, 30.0, (width, height))
+
+    # # Old cv2
+    # width = int(video.get(cv2.cv.CV_CAP_PROP_FRAME_WIDTH))   # float
+    # height = int(video.get(cv2.cv.CV_CAP_PROP_FRAME_HEIGHT)) # float
+    #
+    # # Define the codec and create VideoWriter object
+    # fourcc = cv2.cv.CV_FOURCC(*'MJPG')
+    # out = cv2.VideoWriter('output/video/output-%s.avi' % args.output_string, fourcc, 30.0, (width, height))
 
     txt_out = open('output/video/output-%s.txt' % args.output_string, 'w')
 
-    bbox_file = open(args.bboxes, 'r')
     frame_num = 1
 
-    # TODO: support for several bounding boxes
-    for line in bbox_file:
+    with open(args.bboxes, 'r') as f:
+        bbox_line_list = f.read().splitlines()
+
+    idx = 0
+    while idx < len(bbox_line_list):
+        line = bbox_line_list[idx]
         line = line.strip('\n')
         line = line.split(' ')
         det_frame_num = int(line[0])
@@ -106,12 +108,8 @@
         print frame_num
 
         # Stop at a certain frame number
-        if frame_num > 10000:
-            out.release()
-            video.release()
-            bbox_file.close()
-            txt_out.close()
-            sys.exit(0)
+        if frame_num > args.n_frames:
+            break
 
         # Save all frames as they are if they don't have bbox annotation.
         while frame_num < det_frame_num:
@@ -119,64 +117,73 @@
             if ret == False:
                 out.release()
                 video.release()
-                bbox_file.close()
                 txt_out.close()
                 sys.exit(0)
             out.write(frame)
             frame_num += 1
 
+        # Start processing frame with bounding box
         ret,frame = video.read()
         if ret == False:
-            out.release()
-            video.release()
-            bbox_file.close()
-            txt_out.close()
-            sys.exit(0)
+            break
 
-        x_min, y_min, x_max, y_max = int(line[1]), int(line[2]), int(line[3]), int(line[4])
-        x_min -= 150
-        x_max += 150
-        y_min -= 250
-        y_max += 100
-        x_min = max(x_min, 0)
-        y_min = max(y_min, 0)
-        x_max = min(frame.shape[1], x_max)
-        y_max = min(frame.shape[0], y_max)
-        # Crop image
-        img = frame[y_min:y_max,x_min:x_max]
-        img = Image.fromarray(img)
+        while True:
+            x_min, y_min, x_max, y_max, conf = int(float(line[1])), int(float(line[2])), int(float(line[3])), int(float(line[4])), float(line[5])
 
-        # Transform
-        img = transformations(img)
-        img_shape = img.size()
-        img = img.view(1, img_shape[0], img_shape[1], img_shape[2])
-        img = Variable(img).cuda(gpu)
-        yaw, pitch, roll, angles = model(img)
+            if conf > 0.98:
+                bbox_width = abs(x_max - x_min)
+                bbox_height = abs(y_max - y_min)
+                # x_min -= 3 * bbox_width / 4
+                # x_max += 3 * bbox_width / 4
+                # y_min -= 3 * bbox_height / 4
+                # y_max += bbox_height / 4
+                x_min -= 50
+                x_max += 50
+                y_min -= 50
+                y_max += 30
+                x_min = max(x_min, 0)
+                y_min = max(y_min, 0)
+                x_max = min(frame.shape[1], x_max)
+                y_max = min(frame.shape[0], y_max)
+                # Crop image
+                img = frame[y_min:y_max,x_min:x_max]
+                img = Image.fromarray(img)
 
-        yaw_predicted = F.softmax(yaw)
-        pitch_predicted = F.softmax(pitch)
-        roll_predicted = F.softmax(roll)
-        # Get continuous predictions in degrees.
-        yaw_predicted = torch.sum(yaw_predicted.data[0] * idx_tensor) * 3 - 99
-        pitch_predicted = torch.sum(pitch_predicted.data[0] * idx_tensor) * 3 - 99
-        roll_predicted = torch.sum(roll_predicted.data[0] * idx_tensor) * 3 - 99
+                # Transform
+                img = transformations(img)
+                img_shape = img.size()
+                img = img.view(1, img_shape[0], img_shape[1], img_shape[2])
+                img = Variable(img).cuda(gpu)
+                yaw, pitch, roll, angles = model(img)
 
-        # Print new frame with cube and TODO: axis
-        txt_out.write(str(frame_num) + ' %f %f %f\n' % (yaw_predicted, pitch_predicted, roll_predicted))
-        utils.plot_pose_cube(frame, yaw_predicted, pitch_predicted, roll_predicted, (x_min + x_max) / 2, (y_min + y_max) / 2, size = 200)
-        # Plot expanded bounding box
-        cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), (0,255,0), 3)
-        out.write(frame)
+                yaw_predicted = F.softmax(yaw)
+                pitch_predicted = F.softmax(pitch)
+                roll_predicted = F.softmax(roll)
+                # Get continuous predictions in degrees.
+                yaw_predicted = torch.sum(yaw_predicted.data[0] * idx_tensor) * 3 - 99
+                pitch_predicted = torch.sum(pitch_predicted.data[0] * idx_tensor) * 3 - 99
+                roll_predicted = torch.sum(roll_predicted.data[0] * idx_tensor) * 3 - 99
+                # Print new frame with cube and axis
+                txt_out.write(str(frame_num) + ' %f %f %f\n' % (yaw_predicted, pitch_predicted, roll_predicted))
+                # utils.plot_pose_cube(frame, yaw_predicted, pitch_predicted, roll_predicted, (x_min + x_max) / 2, (y_min + y_max) / 2, size = bbox_width)
+                utils.draw_axis(frame, yaw_predicted, pitch_predicted, roll_predicted, tdx = (x_min + x_max) / 2, tdy= (y_min + y_max) / 2, size = bbox_height/2)
+                # Plot expanded bounding box
+                # cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), (0,255,0), 1)
 
-        frame_num += 1
+            # Peek next frame detection
+            next_frame_num = int(bbox_line_list[idx+1].strip('\n').split(' ')[0])
+            # print 'next_frame_num ', next_frame_num
+            if next_frame_num == det_frame_num:
+                idx += 1
+                line = bbox_line_list[idx].strip('\n').split(' ')
+                det_frame_num = int(line[0])
+            else:
+                break
 
-    while True:
-        ret, frame = video.read()
-        if ret == False:
-            out.release()
-            video.release()
-            bbox_file.close()
-            txt_out.close()
-            sys.exit(0)
+        idx += 1
         out.write(frame)
         frame_num += 1
+
+    out.release()
+    video.release()
+    txt_out.close()

--
Gitblit v1.8.0