| | |
| | | if not os.path.exists(args.video_path): |
| | | sys.exit('Video does not exist') |
| | | |
| | | # ResNet50 with 3 outputs. |
| | | # ResNet101 with 3 outputs. |
| | | # model = hopenet.Hopenet(torchvision.models.resnet.Bottleneck, [3, 4, 23, 3], 66) |
| | | # ResNet50 |
| | | model = hopenet.Hopenet(torchvision.models.resnet.Bottleneck, [3, 4, 6, 3], 66) |
| | | # ResNet18 |
| | | # model = hopenet.Hopenet(torchvision.models.resnet.BasicBlock, [2, 2, 2, 2], 66) |
| | |
| | | print 'Loading data.' |
| | | |
| | | transformations = transforms.Compose([transforms.Scale(224), |
| | | transforms.RandomCrop(224), transforms.ToTensor()]) |
| | | transforms.CenterCrop(224), transforms.ToTensor(), |
| | | transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])]) |
| | | |
| | | model.cuda(gpu) |
| | | |
| | |
| | | fourcc = cv2.VideoWriter_fourcc(*'MJPG') |
| | | out = cv2.VideoWriter('output/video/output-%s.avi' % args.output_string, fourcc, 30.0, (width, height)) |
| | | |
| | | txt_out = open('output/video/output-%s.txt' % args.output_string, 'w') |
| | | |
| | | bbox_file = open(args.bboxes, 'r') |
| | | frame_num = 1 |
| | | |
| | |
| | | out.release() |
| | | video.release() |
| | | bbox_file.close() |
| | | txt_out.close() |
| | | sys.exit(0) |
| | | |
| | | # Save all frames as they are if they don't have bbox annotation. |
| | |
| | | out.release() |
| | | video.release() |
| | | bbox_file.close() |
| | | txt_out.close() |
| | | sys.exit(0) |
| | | out.write(frame) |
| | | frame_num += 1 |
| | |
| | | out.release() |
| | | video.release() |
| | | bbox_file.close() |
| | | txt_out.close() |
| | | sys.exit(0) |
| | | |
| | | x_min, y_min, x_max, y_max = int(line[1]), int(line[2]), int(line[3]), int(line[4]) |
| | | x_min -= 150 |
| | | x_max += 150 |
| | | y_min -= 250 |
| | | y_max += 100 |
| | | x_min = max(x_min, 0) |
| | | y_min = max(y_min, 0) |
| | | x_max = min(frame.shape[1], x_max) |
| | | y_max = min(frame.shape[0], y_max) |
| | | # Crop image |
| | | img = frame[y_min:y_max,x_min:x_max] |
| | | img = Image.fromarray(img) |
| | |
| | | img_shape = img.size() |
| | | img = img.view(1, img_shape[0], img_shape[1], img_shape[2]) |
| | | img = Variable(img).cuda(gpu) |
| | | yaw, pitch, roll = model(img) |
| | | yaw, pitch, roll, angles = model(img) |
| | | |
| | | yaw_predicted = F.softmax(yaw) |
| | | pitch_predicted = F.softmax(pitch) |
| | |
| | | roll_predicted = torch.sum(roll_predicted.data[0] * idx_tensor) * 3 - 99 |
| | | |
| | | # Print new frame with cube and TODO: axis |
| | | txt_out.write(str(frame_num) + ' %f %f %f\n' % (yaw_predicted, pitch_predicted, roll_predicted)) |
| | | utils.plot_pose_cube(frame, yaw_predicted, pitch_predicted, roll_predicted, (x_min + x_max) / 2, (y_min + y_max) / 2, size = 200) |
| | | # Plot expanded bounding box |
| | | cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), (0,255,0), 3) |
| | | out.write(frame) |
| | | |
| | | frame_num += 1 |
| | |
| | | out.release() |
| | | video.release() |
| | | bbox_file.close() |
| | | txt_out.close() |
| | | sys.exit(0) |
| | | out.write(frame) |
| | | frame_num += 1 |