Algorithm/deepHeadPose.git

			@@ -48,9 +48,9 @@
			# ResNet101 with 3 outputs.
			# model = hopenet.Hopenet(torchvision.models.resnet.Bottleneck, [3, 4, 23, 3], 66)
			# ResNet50
			# model = hopenet.Hopenet(torchvision.models.resnet.Bottleneck, [3, 4, 6, 3], 66)
			model = hopenet.Hopenet(torchvision.models.resnet.Bottleneck, [3, 4, 6, 3], 66, 0)
			# ResNet18
			model = hopenet.Hopenet(torchvision.models.resnet.BasicBlock, [2, 2, 2, 2], 66)
			# model = hopenet.Hopenet(torchvision.models.resnet.BasicBlock, [2, 2, 2, 2], 66)

			print 'Loading snapshot.'
			# Load snapshot
			@@ -60,7 +60,8 @@
			print 'Loading data.'

			transformations = transforms.Compose([transforms.Scale(224),
			transforms.RandomCrop(224), transforms.ToTensor()])
			transforms.CenterCrop(224), transforms.ToTensor(),
			transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])

			model.cuda(gpu)

			@@ -74,11 +75,21 @@
			idx_tensor = torch.FloatTensor(idx_tensor).cuda(gpu)

			video = cv2.VideoCapture(video_path)
			width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH)) # float
			height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)) # float

			# New cv2
			# width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH)) # float
			# height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)) # float
			#
			# # Define the codec and create VideoWriter object
			# fourcc = cv2.VideoWriter_fourcc(*'MJPG')
			# out = cv2.VideoWriter('output/video/output-%s.avi' % args.output_string, fourcc, 30.0, (width, height))

			# Old cv2
			width = int(video.get(cv2.cv.CV_CAP_PROP_FRAME_WIDTH)) # float
			height = int(video.get(cv2.cv.CV_CAP_PROP_FRAME_HEIGHT)) # float

			# Define the codec and create VideoWriter object
			fourcc = cv2.VideoWriter_fourcc(*'MJPG')
			fourcc = cv2.cv.CV_FOURCC(*'MJPG')
			out = cv2.VideoWriter('output/video/output-%s.avi' % args.output_string, fourcc, 30.0, (width, height))

			txt_out = open('output/video/output-%s.txt' % args.output_string, 'w')
			@@ -123,6 +134,14 @@
			sys.exit(0)

			x_min, y_min, x_max, y_max = int(line[1]), int(line[2]), int(line[3]), int(line[4])
			x_min -= 150
			x_max += 150
			y_min -= 250
			y_max += 100
			x_min = max(x_min, 0)
			y_min = max(y_min, 0)
			x_max = min(frame.shape[1], x_max)
			y_max = min(frame.shape[0], y_max)
			# Crop image
			img = frame[y_min:y_max,x_min:x_max]
			img = Image.fromarray(img)
			@@ -132,7 +151,7 @@
			img_shape = img.size()
			img = img.view(1, img_shape[0], img_shape[1], img_shape[2])
			img = Variable(img).cuda(gpu)
			yaw, pitch, roll = model(img)
			yaw, pitch, roll, angles = model(img)

			yaw_predicted = F.softmax(yaw)
			pitch_predicted = F.softmax(pitch)
			@@ -145,6 +164,8 @@
			# Print new frame with cube and TODO: axis
			txt_out.write(str(frame_num) + ' %f %f %f\n' % (yaw_predicted, pitch_predicted, roll_predicted))
			utils.plot_pose_cube(frame, yaw_predicted, pitch_predicted, roll_predicted, (x_min + x_max) / 2, (y_min + y_max) / 2, size = 200)
			# Plot expanded bounding box
			cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), (0,255,0), 3)
			out.write(frame)

			frame_num += 1