From f415df3448622f30c3d1eb680596871672b38dac Mon Sep 17 00:00:00 2001 From: natanielruiz <nataniel777@hotmail.com> Date: 星期四, 26 十月 2017 02:51:37 +0800 Subject: [PATCH] after fg --- code/test_on_video.py | 143 +++++++++++++++++++++++++---------------------- 1 files changed, 75 insertions(+), 68 deletions(-) diff --git a/code/test_on_video.py b/code/test_on_video.py index c837775..38fe798 100644 --- a/code/test_on_video.py +++ b/code/test_on_video.py @@ -25,6 +25,7 @@ parser.add_argument('--video', dest='video_path', help='Path of video') parser.add_argument('--bboxes', dest='bboxes', help='Bounding box annotations of frames') parser.add_argument('--output_string', dest='output_string', help='String appended to output file') + parser.add_argument('--n_frames', dest='n_frames', help='Number of frames', type=int) args = parser.parse_args() return args @@ -49,8 +50,6 @@ # model = hopenet.Hopenet(torchvision.models.resnet.Bottleneck, [3, 4, 23, 3], 66) # ResNet50 model = hopenet.Hopenet(torchvision.models.resnet.Bottleneck, [3, 4, 6, 3], 66, 0) - # ResNet18 - # model = hopenet.Hopenet(torchvision.models.resnet.BasicBlock, [2, 2, 2, 2], 66) print 'Loading snapshot.' # Load snapshot @@ -77,28 +76,31 @@ video = cv2.VideoCapture(video_path) # New cv2 - # width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH)) # float - # height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)) # float - # - # # Define the codec and create VideoWriter object - # fourcc = cv2.VideoWriter_fourcc(*'MJPG') - # out = cv2.VideoWriter('output/video/output-%s.avi' % args.output_string, fourcc, 30.0, (width, height)) - - # Old cv2 - width = int(video.get(cv2.cv.CV_CAP_PROP_FRAME_WIDTH)) # float - height = int(video.get(cv2.cv.CV_CAP_PROP_FRAME_HEIGHT)) # float + width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH)) # float + height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)) # float # Define the codec and create VideoWriter object - fourcc = cv2.cv.CV_FOURCC(*'MJPG') + fourcc = cv2.VideoWriter_fourcc(*'MJPG') out = cv2.VideoWriter('output/video/output-%s.avi' % args.output_string, fourcc, 30.0, (width, height)) + + # # Old cv2 + # width = int(video.get(cv2.cv.CV_CAP_PROP_FRAME_WIDTH)) # float + # height = int(video.get(cv2.cv.CV_CAP_PROP_FRAME_HEIGHT)) # float + # + # # Define the codec and create VideoWriter object + # fourcc = cv2.cv.CV_FOURCC(*'MJPG') + # out = cv2.VideoWriter('output/video/output-%s.avi' % args.output_string, fourcc, 30.0, (width, height)) txt_out = open('output/video/output-%s.txt' % args.output_string, 'w') - bbox_file = open(args.bboxes, 'r') frame_num = 1 - # TODO: support for several bounding boxes - for line in bbox_file: + with open(args.bboxes, 'r') as f: + bbox_line_list = f.read().splitlines() + + idx = 0 + while idx < len(bbox_line_list): + line = bbox_line_list[idx] line = line.strip('\n') line = line.split(' ') det_frame_num = int(line[0]) @@ -106,12 +108,8 @@ print frame_num # Stop at a certain frame number - if frame_num > 10000: - out.release() - video.release() - bbox_file.close() - txt_out.close() - sys.exit(0) + if frame_num > args.n_frames: + break # Save all frames as they are if they don't have bbox annotation. while frame_num < det_frame_num: @@ -119,64 +117,73 @@ if ret == False: out.release() video.release() - bbox_file.close() txt_out.close() sys.exit(0) out.write(frame) frame_num += 1 + # Start processing frame with bounding box ret,frame = video.read() if ret == False: - out.release() - video.release() - bbox_file.close() - txt_out.close() - sys.exit(0) + break - x_min, y_min, x_max, y_max = int(line[1]), int(line[2]), int(line[3]), int(line[4]) - x_min -= 150 - x_max += 150 - y_min -= 250 - y_max += 100 - x_min = max(x_min, 0) - y_min = max(y_min, 0) - x_max = min(frame.shape[1], x_max) - y_max = min(frame.shape[0], y_max) - # Crop image - img = frame[y_min:y_max,x_min:x_max] - img = Image.fromarray(img) + while True: + x_min, y_min, x_max, y_max, conf = int(float(line[1])), int(float(line[2])), int(float(line[3])), int(float(line[4])), float(line[5]) - # Transform - img = transformations(img) - img_shape = img.size() - img = img.view(1, img_shape[0], img_shape[1], img_shape[2]) - img = Variable(img).cuda(gpu) - yaw, pitch, roll, angles = model(img) + if conf > 0.98: + bbox_width = abs(x_max - x_min) + bbox_height = abs(y_max - y_min) + # x_min -= 3 * bbox_width / 4 + # x_max += 3 * bbox_width / 4 + # y_min -= 3 * bbox_height / 4 + # y_max += bbox_height / 4 + x_min -= 50 + x_max += 50 + y_min -= 50 + y_max += 30 + x_min = max(x_min, 0) + y_min = max(y_min, 0) + x_max = min(frame.shape[1], x_max) + y_max = min(frame.shape[0], y_max) + # Crop image + img = frame[y_min:y_max,x_min:x_max] + img = Image.fromarray(img) - yaw_predicted = F.softmax(yaw) - pitch_predicted = F.softmax(pitch) - roll_predicted = F.softmax(roll) - # Get continuous predictions in degrees. - yaw_predicted = torch.sum(yaw_predicted.data[0] * idx_tensor) * 3 - 99 - pitch_predicted = torch.sum(pitch_predicted.data[0] * idx_tensor) * 3 - 99 - roll_predicted = torch.sum(roll_predicted.data[0] * idx_tensor) * 3 - 99 + # Transform + img = transformations(img) + img_shape = img.size() + img = img.view(1, img_shape[0], img_shape[1], img_shape[2]) + img = Variable(img).cuda(gpu) + yaw, pitch, roll, angles = model(img) - # Print new frame with cube and TODO: axis - txt_out.write(str(frame_num) + ' %f %f %f\n' % (yaw_predicted, pitch_predicted, roll_predicted)) - utils.plot_pose_cube(frame, yaw_predicted, pitch_predicted, roll_predicted, (x_min + x_max) / 2, (y_min + y_max) / 2, size = 200) - # Plot expanded bounding box - cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), (0,255,0), 3) - out.write(frame) + yaw_predicted = F.softmax(yaw) + pitch_predicted = F.softmax(pitch) + roll_predicted = F.softmax(roll) + # Get continuous predictions in degrees. + yaw_predicted = torch.sum(yaw_predicted.data[0] * idx_tensor) * 3 - 99 + pitch_predicted = torch.sum(pitch_predicted.data[0] * idx_tensor) * 3 - 99 + roll_predicted = torch.sum(roll_predicted.data[0] * idx_tensor) * 3 - 99 + # Print new frame with cube and axis + txt_out.write(str(frame_num) + ' %f %f %f\n' % (yaw_predicted, pitch_predicted, roll_predicted)) + # utils.plot_pose_cube(frame, yaw_predicted, pitch_predicted, roll_predicted, (x_min + x_max) / 2, (y_min + y_max) / 2, size = bbox_width) + utils.draw_axis(frame, yaw_predicted, pitch_predicted, roll_predicted, tdx = (x_min + x_max) / 2, tdy= (y_min + y_max) / 2, size = bbox_height/2) + # Plot expanded bounding box + # cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), (0,255,0), 1) - frame_num += 1 + # Peek next frame detection + next_frame_num = int(bbox_line_list[idx+1].strip('\n').split(' ')[0]) + # print 'next_frame_num ', next_frame_num + if next_frame_num == det_frame_num: + idx += 1 + line = bbox_line_list[idx].strip('\n').split(' ') + det_frame_num = int(line[0]) + else: + break - while True: - ret, frame = video.read() - if ret == False: - out.release() - video.release() - bbox_file.close() - txt_out.close() - sys.exit(0) + idx += 1 out.write(frame) frame_num += 1 + + out.release() + video.release() + txt_out.close() -- Gitblit v1.8.0