Algorithm/deepHeadPose.git

parent: 4b67b5c8 | 补丁 | 提交 | ignore whitespace

natanielruiz

2017-10-30 b217d495fed78e32e30a769307cd9eb7c2f65952

Almost done

3 文件已重命名

1个文件已修改

	code/test_hopenet.py	补丁 \| 查看 \| 原始文档 \| blame \| 历史
	code/test_on_video.py	80 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	code/test_on_video_dockerface.py	79 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	code/train_hopenet.py	补丁 \| 查看 \| 原始文档 \| blame \| 历史

 code/test_hopenet.py


 code/test_on_video.py

@@ -27,6 +27,7 @@
    parser.add_argument('--bboxes', dest='bboxes', help='Bounding box annotations of frames')
    parser.add_argument('--output_string', dest='output_string', help='String appended to output file')
    parser.add_argument('--n_frames', dest='n_frames', help='Number of frames', type=int)
    parser.add_argument('--fps', dest='fps', help='Frames per second of source video', type=float, default=30.)
    args = parser.parse_args()
    return args

@@ -80,7 +81,7 @@

    # Define the codec and create VideoWriter object
    fourcc = cv2.VideoWriter_fourcc(*'MJPG')
    out = cv2.VideoWriter('output/video/output-%s.avi' % args.output_string, fourcc, 30.0, (width, height))
    out = cv2.VideoWriter('output/video/output-%s.avi' % args.output_string, fourcc, args.fps, (width, height))

    # # Old cv2
    # width = int(video.get(cv2.cv.CV_CAP_PROP_FRAME_WIDTH))   # float
@@ -118,7 +119,7 @@
                video.release()
                txt_out.close()
                sys.exit(0)
            out.write(frame)
            # out.write(frame)
            frame_num += 1

        # Start processing frame with bounding box
@@ -127,49 +128,48 @@
            break

        while True:
            x_min, y_min, x_max, y_max, conf = int(float(line[1])), int(float(line[2])), int(float(line[3])), int(float(line[4])), float(line[5])
            x_min, y_min, x_max, y_max = int(float(line[1])), int(float(line[2])), int(float(line[3])), int(float(line[4]))

            if conf > 0.98:
                bbox_width = abs(x_max - x_min)
                bbox_height = abs(y_max - y_min)
                # x_min -= 3 * bbox_width / 4
                # x_max += 3 * bbox_width / 4
                # y_min -= 3 * bbox_height / 4
                # y_max += bbox_height / 4
                x_min -= 50
                x_max += 50
                y_min -= 50
                y_max += 30
                x_min = max(x_min, 0)
                y_min = max(y_min, 0)
                x_max = min(frame.shape[1], x_max)
                y_max = min(frame.shape[0], y_max)
                # Crop image
                img = frame[y_min:y_max,x_min:x_max]
                img = Image.fromarray(img)
            bbox_width = abs(x_max - x_min)
            bbox_height = abs(y_max - y_min)
            # x_min -= 3 * bbox_width / 4
            # x_max += 3 * bbox_width / 4
            # y_min -= 3 * bbox_height / 4
            # y_max += bbox_height / 4
            x_min -= 50
            x_max += 50
            y_min -= 50
            y_max += 30
            x_min = max(x_min, 0)
            y_min = max(y_min, 0)
            x_max = min(frame.shape[1], x_max)
            y_max = min(frame.shape[0], y_max)
            # Crop face loosely
            img = frame[y_min:y_max,x_min:x_max]
            img = Image.fromarray(img)

                # Transform
                img = transformations(img)
                img_shape = img.size()
                img = img.view(1, img_shape[0], img_shape[1], img_shape[2])
                img = Variable(img).cuda(gpu)
            # Transform
            img = transformations(img)
            img_shape = img.size()
            img = img.view(1, img_shape[0], img_shape[1], img_shape[2])
            img = Variable(img).cuda(gpu)

                yaw, pitch, roll = model(img)
            yaw, pitch, roll = model(img)

                yaw_predicted = F.softmax(yaw)
                pitch_predicted = F.softmax(pitch)
                roll_predicted = F.softmax(roll)
                # Get continuous predictions in degrees.
                yaw_predicted = torch.sum(yaw_predicted.data[0] * idx_tensor) * 3 - 99
                pitch_predicted = torch.sum(pitch_predicted.data[0] * idx_tensor) * 3 - 99
                roll_predicted = torch.sum(roll_predicted.data[0] * idx_tensor) * 3 - 99
            yaw_predicted = F.softmax(yaw)
            pitch_predicted = F.softmax(pitch)
            roll_predicted = F.softmax(roll)
            # Get continuous predictions in degrees.
            yaw_predicted = torch.sum(yaw_predicted.data[0] * idx_tensor) * 3 - 99
            pitch_predicted = torch.sum(pitch_predicted.data[0] * idx_tensor) * 3 - 99
            roll_predicted = torch.sum(roll_predicted.data[0] * idx_tensor) * 3 - 99

                # Print new frame with cube and axis
                txt_out.write(str(frame_num) + ' %f %f %f\n' % (yaw_predicted, pitch_predicted, roll_predicted))
                # utils.plot_pose_cube(frame, yaw_predicted, pitch_predicted, roll_predicted, (x_min + x_max) / 2, (y_min + y_max) / 2, size = bbox_width)
                utils.draw_axis(frame, yaw_predicted, pitch_predicted, roll_predicted, tdx = (x_min + x_max) / 2, tdy= (y_min + y_max) / 2, size = bbox_height/2)
                # Plot expanded bounding box
                # cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), (0,255,0), 1)
            # Print new frame with cube and axis
            txt_out.write(str(frame_num) + ' %f %f %f\n' % (yaw_predicted, pitch_predicted, roll_predicted))
            # utils.plot_pose_cube(frame, yaw_predicted, pitch_predicted, roll_predicted, (x_min + x_max) / 2, (y_min + y_max) / 2, size = bbox_width)
            utils.draw_axis(frame, yaw_predicted, pitch_predicted, roll_predicted, tdx = (x_min + x_max) / 2, tdy= (y_min + y_max) / 2, size = bbox_height/2)
            # Plot expanded bounding box
            # cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), (0,255,0), 1)

            # Peek next frame detection
            next_frame_num = int(bbox_line_list[idx+1].strip('\n').split(' ')[0])

 code/test_on_video_dockerface.py

File was renamed from code/test_on_video_noconf.py
@@ -27,7 +27,7 @@
    parser.add_argument('--bboxes', dest='bboxes', help='Bounding box annotations of frames')
    parser.add_argument('--output_string', dest='output_string', help='String appended to output file')
    parser.add_argument('--n_frames', dest='n_frames', help='Number of frames', type=int)
    parser.add_argument('--fps', dest='fps', help='Frames per second of source video', type=float)
    parser.add_argument('--fps', dest='fps', help='Frames per second of source video', type=float, default=30.)
    args = parser.parse_args()
    return args

@@ -119,7 +119,7 @@
                video.release()
                txt_out.close()
                sys.exit(0)
            # out.write(frame)
            out.write(frame)
            frame_num += 1

        # Start processing frame with bounding box
@@ -128,48 +128,49 @@
            break

        while True:
            x_min, y_min, x_max, y_max = int(float(line[1])), int(float(line[2])), int(float(line[3])), int(float(line[4]))
            x_min, y_min, x_max, y_max, conf = int(float(line[1])), int(float(line[2])), int(float(line[3])), int(float(line[4])), float(line[5])

            bbox_width = abs(x_max - x_min)
            bbox_height = abs(y_max - y_min)
            # x_min -= 3 * bbox_width / 4
            # x_max += 3 * bbox_width / 4
            # y_min -= 3 * bbox_height / 4
            # y_max += bbox_height / 4
            x_min -= 50
            x_max += 50
            y_min -= 50
            y_max += 30
            x_min = max(x_min, 0)
            y_min = max(y_min, 0)
            x_max = min(frame.shape[1], x_max)
            y_max = min(frame.shape[0], y_max)
            # Crop face loosely
            img = frame[y_min:y_max,x_min:x_max]
            img = Image.fromarray(img)
            if conf > 0.98:
                bbox_width = abs(x_max - x_min)
                bbox_height = abs(y_max - y_min)
                # x_min -= 3 * bbox_width / 4
                # x_max += 3 * bbox_width / 4
                # y_min -= 3 * bbox_height / 4
                # y_max += bbox_height / 4
                x_min -= 50
                x_max += 50
                y_min -= 50
                y_max += 30
                x_min = max(x_min, 0)
                y_min = max(y_min, 0)
                x_max = min(frame.shape[1], x_max)
                y_max = min(frame.shape[0], y_max)
                # Crop image
                img = frame[y_min:y_max,x_min:x_max]
                img = Image.fromarray(img)

            # Transform
            img = transformations(img)
            img_shape = img.size()
            img = img.view(1, img_shape[0], img_shape[1], img_shape[2])
            img = Variable(img).cuda(gpu)
                # Transform
                img = transformations(img)
                img_shape = img.size()
                img = img.view(1, img_shape[0], img_shape[1], img_shape[2])
                img = Variable(img).cuda(gpu)

            yaw, pitch, roll = model(img)
                yaw, pitch, roll = model(img)

            yaw_predicted = F.softmax(yaw)
            pitch_predicted = F.softmax(pitch)
            roll_predicted = F.softmax(roll)
            # Get continuous predictions in degrees.
            yaw_predicted = torch.sum(yaw_predicted.data[0] * idx_tensor) * 3 - 99
            pitch_predicted = torch.sum(pitch_predicted.data[0] * idx_tensor) * 3 - 99
            roll_predicted = torch.sum(roll_predicted.data[0] * idx_tensor) * 3 - 99
                yaw_predicted = F.softmax(yaw)
                pitch_predicted = F.softmax(pitch)
                roll_predicted = F.softmax(roll)
                # Get continuous predictions in degrees.
                yaw_predicted = torch.sum(yaw_predicted.data[0] * idx_tensor) * 3 - 99
                pitch_predicted = torch.sum(pitch_predicted.data[0] * idx_tensor) * 3 - 99
                roll_predicted = torch.sum(roll_predicted.data[0] * idx_tensor) * 3 - 99

            # Print new frame with cube and axis
            txt_out.write(str(frame_num) + ' %f %f %f\n' % (yaw_predicted, pitch_predicted, roll_predicted))
            # utils.plot_pose_cube(frame, yaw_predicted, pitch_predicted, roll_predicted, (x_min + x_max) / 2, (y_min + y_max) / 2, size = bbox_width)
            utils.draw_axis(frame, yaw_predicted, pitch_predicted, roll_predicted, tdx = (x_min + x_max) / 2, tdy= (y_min + y_max) / 2, size = bbox_height/2)
            # Plot expanded bounding box
            # cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), (0,255,0), 1)
                # Print new frame with cube and axis
                txt_out.write(str(frame_num) + ' %f %f %f\n' % (yaw_predicted, pitch_predicted, roll_predicted))
                # utils.plot_pose_cube(frame, yaw_predicted, pitch_predicted, roll_predicted, (x_min + x_max) / 2, (y_min + y_max) / 2, size = bbox_width)
                utils.draw_axis(frame, yaw_predicted, pitch_predicted, roll_predicted, tdx = (x_min + x_max) / 2, tdy= (y_min + y_max) / 2, size = bbox_height/2)
                # Plot expanded bounding box
                # cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), (0,255,0), 1)

            # Peek next frame detection
            next_frame_num = int(bbox_line_list[idx+1].strip('\n').split(' ')[0])

 code/train_hopenet.py

			@@ -27,6 +27,7 @@
			parser.add_argument('--bboxes', dest='bboxes', help='Bounding box annotations of frames')
			parser.add_argument('--output_string', dest='output_string', help='String appended to output file')
			parser.add_argument('--n_frames', dest='n_frames', help='Number of frames', type=int)
			parser.add_argument('--fps', dest='fps', help='Frames per second of source video', type=float, default=30.)
			args = parser.parse_args()
			return args

			@@ -80,7 +81,7 @@

			# Define the codec and create VideoWriter object
			fourcc = cv2.VideoWriter_fourcc(*'MJPG')
			out = cv2.VideoWriter('output/video/output-%s.avi' % args.output_string, fourcc, 30.0, (width, height))
			out = cv2.VideoWriter('output/video/output-%s.avi' % args.output_string, fourcc, args.fps, (width, height))

			# # Old cv2
			# width = int(video.get(cv2.cv.CV_CAP_PROP_FRAME_WIDTH)) # float
			@@ -118,7 +119,7 @@
			video.release()
			txt_out.close()
			sys.exit(0)
			out.write(frame)
			# out.write(frame)
			frame_num += 1

			# Start processing frame with bounding box
			@@ -127,49 +128,48 @@
			break

			while True:
			x_min, y_min, x_max, y_max, conf = int(float(line[1])), int(float(line[2])), int(float(line[3])), int(float(line[4])), float(line[5])
			x_min, y_min, x_max, y_max = int(float(line[1])), int(float(line[2])), int(float(line[3])), int(float(line[4]))

			if conf > 0.98:
			bbox_width = abs(x_max - x_min)
			bbox_height = abs(y_max - y_min)
			# x_min -= 3 * bbox_width / 4
			# x_max += 3 * bbox_width / 4
			# y_min -= 3 * bbox_height / 4
			# y_max += bbox_height / 4
			x_min -= 50
			x_max += 50
			y_min -= 50
			y_max += 30
			x_min = max(x_min, 0)
			y_min = max(y_min, 0)
			x_max = min(frame.shape[1], x_max)
			y_max = min(frame.shape[0], y_max)
			# Crop image
			img = frame[y_min:y_max,x_min:x_max]
			img = Image.fromarray(img)
			bbox_width = abs(x_max - x_min)
			bbox_height = abs(y_max - y_min)
			# x_min -= 3 * bbox_width / 4
			# x_max += 3 * bbox_width / 4
			# y_min -= 3 * bbox_height / 4
			# y_max += bbox_height / 4
			x_min -= 50
			x_max += 50
			y_min -= 50
			y_max += 30
			x_min = max(x_min, 0)
			y_min = max(y_min, 0)
			x_max = min(frame.shape[1], x_max)
			y_max = min(frame.shape[0], y_max)
			# Crop face loosely
			img = frame[y_min:y_max,x_min:x_max]
			img = Image.fromarray(img)

			# Transform
			img = transformations(img)
			img_shape = img.size()
			img = img.view(1, img_shape[0], img_shape[1], img_shape[2])
			img = Variable(img).cuda(gpu)
			# Transform
			img = transformations(img)
			img_shape = img.size()
			img = img.view(1, img_shape[0], img_shape[1], img_shape[2])
			img = Variable(img).cuda(gpu)

			yaw, pitch, roll = model(img)
			yaw, pitch, roll = model(img)

			yaw_predicted = F.softmax(yaw)
			pitch_predicted = F.softmax(pitch)
			roll_predicted = F.softmax(roll)
			# Get continuous predictions in degrees.
			yaw_predicted = torch.sum(yaw_predicted.data[0] * idx_tensor) * 3 - 99
			pitch_predicted = torch.sum(pitch_predicted.data[0] * idx_tensor) * 3 - 99
			roll_predicted = torch.sum(roll_predicted.data[0] * idx_tensor) * 3 - 99
			yaw_predicted = F.softmax(yaw)
			pitch_predicted = F.softmax(pitch)
			roll_predicted = F.softmax(roll)
			# Get continuous predictions in degrees.
			yaw_predicted = torch.sum(yaw_predicted.data[0] * idx_tensor) * 3 - 99
			pitch_predicted = torch.sum(pitch_predicted.data[0] * idx_tensor) * 3 - 99
			roll_predicted = torch.sum(roll_predicted.data[0] * idx_tensor) * 3 - 99

			# Print new frame with cube and axis
			txt_out.write(str(frame_num) + ' %f %f %f\n' % (yaw_predicted, pitch_predicted, roll_predicted))
			# utils.plot_pose_cube(frame, yaw_predicted, pitch_predicted, roll_predicted, (x_min + x_max) / 2, (y_min + y_max) / 2, size = bbox_width)
			utils.draw_axis(frame, yaw_predicted, pitch_predicted, roll_predicted, tdx = (x_min + x_max) / 2, tdy= (y_min + y_max) / 2, size = bbox_height/2)
			# Plot expanded bounding box
			# cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), (0,255,0), 1)
			# Print new frame with cube and axis
			txt_out.write(str(frame_num) + ' %f %f %f\n' % (yaw_predicted, pitch_predicted, roll_predicted))
			# utils.plot_pose_cube(frame, yaw_predicted, pitch_predicted, roll_predicted, (x_min + x_max) / 2, (y_min + y_max) / 2, size = bbox_width)
			utils.draw_axis(frame, yaw_predicted, pitch_predicted, roll_predicted, tdx = (x_min + x_max) / 2, tdy= (y_min + y_max) / 2, size = bbox_height/2)
			# Plot expanded bounding box
			# cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), (0,255,0), 1)

			# Peek next frame detection
			next_frame_num = int(bbox_line_list[idx+1].strip('\n').split(' ')[0])

File was renamed from code/test_on_video_noconf.py
			@@ -27,7 +27,7 @@
			parser.add_argument('--bboxes', dest='bboxes', help='Bounding box annotations of frames')
			parser.add_argument('--output_string', dest='output_string', help='String appended to output file')
			parser.add_argument('--n_frames', dest='n_frames', help='Number of frames', type=int)
			parser.add_argument('--fps', dest='fps', help='Frames per second of source video', type=float)
			parser.add_argument('--fps', dest='fps', help='Frames per second of source video', type=float, default=30.)
			args = parser.parse_args()
			return args

			@@ -119,7 +119,7 @@
			video.release()
			txt_out.close()
			sys.exit(0)
			# out.write(frame)
			out.write(frame)
			frame_num += 1

			# Start processing frame with bounding box
			@@ -128,48 +128,49 @@
			break

			while True:
			x_min, y_min, x_max, y_max = int(float(line[1])), int(float(line[2])), int(float(line[3])), int(float(line[4]))
			x_min, y_min, x_max, y_max, conf = int(float(line[1])), int(float(line[2])), int(float(line[3])), int(float(line[4])), float(line[5])

			bbox_width = abs(x_max - x_min)
			bbox_height = abs(y_max - y_min)
			# x_min -= 3 * bbox_width / 4
			# x_max += 3 * bbox_width / 4
			# y_min -= 3 * bbox_height / 4
			# y_max += bbox_height / 4
			x_min -= 50
			x_max += 50
			y_min -= 50
			y_max += 30
			x_min = max(x_min, 0)
			y_min = max(y_min, 0)
			x_max = min(frame.shape[1], x_max)
			y_max = min(frame.shape[0], y_max)
			# Crop face loosely
			img = frame[y_min:y_max,x_min:x_max]
			img = Image.fromarray(img)
			if conf > 0.98:
			bbox_width = abs(x_max - x_min)
			bbox_height = abs(y_max - y_min)
			# x_min -= 3 * bbox_width / 4
			# x_max += 3 * bbox_width / 4
			# y_min -= 3 * bbox_height / 4
			# y_max += bbox_height / 4
			x_min -= 50
			x_max += 50
			y_min -= 50
			y_max += 30
			x_min = max(x_min, 0)
			y_min = max(y_min, 0)
			x_max = min(frame.shape[1], x_max)
			y_max = min(frame.shape[0], y_max)
			# Crop image
			img = frame[y_min:y_max,x_min:x_max]
			img = Image.fromarray(img)

			# Transform
			img = transformations(img)
			img_shape = img.size()
			img = img.view(1, img_shape[0], img_shape[1], img_shape[2])
			img = Variable(img).cuda(gpu)
			# Transform
			img = transformations(img)
			img_shape = img.size()
			img = img.view(1, img_shape[0], img_shape[1], img_shape[2])
			img = Variable(img).cuda(gpu)

			yaw, pitch, roll = model(img)
			yaw, pitch, roll = model(img)

			yaw_predicted = F.softmax(yaw)
			pitch_predicted = F.softmax(pitch)
			roll_predicted = F.softmax(roll)
			# Get continuous predictions in degrees.
			yaw_predicted = torch.sum(yaw_predicted.data[0] * idx_tensor) * 3 - 99
			pitch_predicted = torch.sum(pitch_predicted.data[0] * idx_tensor) * 3 - 99
			roll_predicted = torch.sum(roll_predicted.data[0] * idx_tensor) * 3 - 99
			yaw_predicted = F.softmax(yaw)
			pitch_predicted = F.softmax(pitch)
			roll_predicted = F.softmax(roll)
			# Get continuous predictions in degrees.
			yaw_predicted = torch.sum(yaw_predicted.data[0] * idx_tensor) * 3 - 99
			pitch_predicted = torch.sum(pitch_predicted.data[0] * idx_tensor) * 3 - 99
			roll_predicted = torch.sum(roll_predicted.data[0] * idx_tensor) * 3 - 99

			# Print new frame with cube and axis
			txt_out.write(str(frame_num) + ' %f %f %f\n' % (yaw_predicted, pitch_predicted, roll_predicted))
			# utils.plot_pose_cube(frame, yaw_predicted, pitch_predicted, roll_predicted, (x_min + x_max) / 2, (y_min + y_max) / 2, size = bbox_width)
			utils.draw_axis(frame, yaw_predicted, pitch_predicted, roll_predicted, tdx = (x_min + x_max) / 2, tdy= (y_min + y_max) / 2, size = bbox_height/2)
			# Plot expanded bounding box
			# cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), (0,255,0), 1)
			# Print new frame with cube and axis
			txt_out.write(str(frame_num) + ' %f %f %f\n' % (yaw_predicted, pitch_predicted, roll_predicted))
			# utils.plot_pose_cube(frame, yaw_predicted, pitch_predicted, roll_predicted, (x_min + x_max) / 2, (y_min + y_max) / 2, size = bbox_width)
			utils.draw_axis(frame, yaw_predicted, pitch_predicted, roll_predicted, tdx = (x_min + x_max) / 2, tdy= (y_min + y_max) / 2, size = bbox_height/2)
			# Plot expanded bounding box
			# cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), (0,255,0), 1)

			# Peek next frame detection
			next_frame_num = int(bbox_line_list[idx+1].strip('\n').split(' ')[0])