Algorithm/deepHeadPose.git

parent: 5483d8fe | 补丁 | 提交 | ignore whitespace

natanielruiz

2017-10-30 4b67b5c8ed5566ec3030d537536282e830d87e40

next

7个文件已修改

	code/hopenet.py	18 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	code/test_alexnet.py	9 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	code/test_on_video.py	15 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	code/test_on_video_noconf.py	14 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	code/test_preangles.py	12 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	code/test_resnet50_regression.py	9 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史
	code/train_preangles.py	14 ●●●●● 补丁 \| 查看 \| 原始文档 \| blame \| 历史

 code/hopenet.py

@@ -24,11 +24,8 @@
        self.fc_pitch = nn.Linear(512 * block.expansion, num_bins)
        self.fc_roll = nn.Linear(512 * block.expansion, num_bins)

        # Vestigial layer from previous experiments
        self.fc_finetune = nn.Linear(512 * block.expansion + 3, 3)

        # Used to get the expected value of angle from bins
        self.softmax = nn.Softmax()
        self.idx_tensor = Variable(torch.FloatTensor(range(66))).cuda()

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
@@ -72,18 +69,7 @@
        pre_pitch = self.fc_pitch(x)
        pre_roll = self.fc_roll(x)

        yaw = self.softmax(pre_yaw)
        yaw = Variable(torch.sum(yaw.data * self.idx_tensor.data, 1), requires_grad=True)
        pitch = self.softmax(pre_pitch)
        pitch = Variable(torch.sum(pitch.data * self.idx_tensor.data, 1), requires_grad=True)
        roll = self.softmax(pre_roll)
        roll = Variable(torch.sum(roll.data * self.idx_tensor.data, 1), requires_grad=True)
        yaw = yaw.view(yaw.size(0), 1)
        pitch = pitch.view(pitch.size(0), 1)
        roll = roll.view(roll.size(0), 1)
        preangles = torch.cat([yaw, pitch, roll], 1)

        return pre_yaw, pre_pitch, pre_roll, preangles
        return pre_yaw, pre_pitch, pre_roll

class ResNet(nn.Module):
    # ResNet for regression of 3 Euler angles.

 code/test_alexnet.py

@@ -36,13 +36,6 @@

    return args

def load_filtered_state_dict(model, snapshot):
    # By user apaszke from discuss.pytorch.org
    model_dict = model.state_dict()
    snapshot = {k: v for k, v in snapshot.items() if k in model_dict}
    model_dict.update(snapshot)
    model.load_state_dict(model_dict)

if __name__ == '__main__':
    args = parse_args()

@@ -55,7 +48,7 @@
    print 'Loading snapshot.'
    # Load snapshot
    saved_state_dict = torch.load(snapshot_path)
    load_filtered_state_dict(model, saved_state_dict)
    model.load_state_dict(saved_state_dict)

    print 'Loading data.'


 code/test_on_video.py

@@ -47,7 +47,7 @@
    if not os.path.exists(args.video_path):
        sys.exit('Video does not exist')

    # ResNet50
    # ResNet50 structure
    model = hopenet.Hopenet(torchvision.models.resnet.Bottleneck, [3, 4, 6, 3], 66)

    print 'Loading snapshot.'
@@ -154,11 +154,16 @@
                img = img.view(1, img_shape[0], img_shape[1], img_shape[2])
                img = Variable(img).cuda(gpu)

                yaw, pitch, roll, angles = model(img)
                yaw, pitch, roll = model(img)

                yaw_predicted = angles[:,0].data[0].cpu()
                pitch_predicted = angles[:,1].data[0].cpu()
                roll_predicted = angles[:,2].data[0].cpu()
                yaw_predicted = F.softmax(yaw)
                pitch_predicted = F.softmax(pitch)
                roll_predicted = F.softmax(roll)
                # Get continuous predictions in degrees.
                yaw_predicted = torch.sum(yaw_predicted.data[0] * idx_tensor) * 3 - 99
                pitch_predicted = torch.sum(pitch_predicted.data[0] * idx_tensor) * 3 - 99
                roll_predicted = torch.sum(roll_predicted.data[0] * idx_tensor) * 3 - 99

                # Print new frame with cube and axis
                txt_out.write(str(frame_num) + ' %f %f %f\n' % (yaw_predicted, pitch_predicted, roll_predicted))
                # utils.plot_pose_cube(frame, yaw_predicted, pitch_predicted, roll_predicted, (x_min + x_max) / 2, (y_min + y_max) / 2, size = bbox_width)

 code/test_on_video_noconf.py

@@ -153,12 +153,16 @@
            img_shape = img.size()
            img = img.view(1, img_shape[0], img_shape[1], img_shape[2])
            img = Variable(img).cuda(gpu)
            
            yaw, pitch, roll, angles = model(img)

            yaw_predicted = angles[:,0].data[0].cpu()
            pitch_predicted = angles[:,1].data[0].cpu()
            roll_predicted = angles[:,2].data[0].cpu()
            yaw, pitch, roll = model(img)

            yaw_predicted = F.softmax(yaw)
            pitch_predicted = F.softmax(pitch)
            roll_predicted = F.softmax(roll)
            # Get continuous predictions in degrees.
            yaw_predicted = torch.sum(yaw_predicted.data[0] * idx_tensor) * 3 - 99
            pitch_predicted = torch.sum(pitch_predicted.data[0] * idx_tensor) * 3 - 99
            roll_predicted = torch.sum(roll_predicted.data[0] * idx_tensor) * 3 - 99

            # Print new frame with cube and axis
            txt_out.write(str(frame_num) + ' %f %f %f\n' % (yaw_predicted, pitch_predicted, roll_predicted))

 code/test_preangles.py

@@ -36,13 +36,6 @@

    return args

def load_filtered_state_dict(model, snapshot):
    # By user apaszke from discuss.pytorch.org
    model_dict = model.state_dict()
    snapshot = {k: v for k, v in snapshot.items() if k in model_dict}
    model_dict.update(snapshot)
    model.load_state_dict(model_dict)

if __name__ == '__main__':
    args = parse_args()

@@ -57,7 +50,6 @@
    # Load snapshot
    saved_state_dict = torch.load(snapshot_path)
    model.load_state_dict(saved_state_dict)
    # load_filtered_state_dict(model, saved_state_dict)

    print 'Loading data.'

@@ -105,8 +97,6 @@

    l1loss = torch.nn.L1Loss(size_average=False)



    for i, (images, labels, cont_labels, name) in enumerate(test_loader):
        images = Variable(images).cuda(gpu)
        total += cont_labels.size(0)
@@ -115,7 +105,7 @@
        label_pitch = cont_labels[:,1].float()
        label_roll = cont_labels[:,2].float()

        yaw, pitch, roll, angles = model(images)
        yaw, pitch, roll = model(images)

        # Binned predictions
        _, yaw_bpred = torch.max(yaw.data, 1)

 code/test_resnet50_regression.py

@@ -36,13 +36,6 @@

    return args

def load_filtered_state_dict(model, snapshot):
    # By user apaszke from discuss.pytorch.org
    model_dict = model.state_dict()
    snapshot = {k: v for k, v in snapshot.items() if k in model_dict}
    model_dict.update(snapshot)
    model.load_state_dict(model_dict)

if __name__ == '__main__':
    args = parse_args()

@@ -55,7 +48,7 @@
    print 'Loading snapshot.'
    # Load snapshot
    saved_state_dict = torch.load(snapshot_path)
    load_filtered_state_dict(model, saved_state_dict)
    model.load_state_dict(saved_state_dict)

    print 'Loading data.'


 code/train_preangles.py

@@ -124,6 +124,10 @@
    # Regression loss coefficient
    alpha = args.alpha

    softmax = nn.Softmax().cuda(gpu)
    idx_tensor = [idx for idx in xrange(66)]
    idx_tensor = Variable(torch.FloatTensor(idx_tensor)).cuda(gpu)

    optimizer = torch.optim.Adam([{'params': get_ignored_params(model), 'lr': 0},
                                  {'params': get_non_ignored_params(model), 'lr': args.lr},
                                  {'params': get_fc_params(model), 'lr': args.lr * 5}],
@@ -153,9 +157,13 @@
            loss_roll = criterion(roll, label_roll)

            # MSE loss
            yaw_predicted = angles[:,0]
            pitch_predicted = angles[:,1]
            roll_predicted = angles[:,2]
            yaw_predicted = softmax(yaw)
            pitch_predicted = softmax(pitch)
            roll_predicted = softmax(roll)

            yaw_predicted = torch.sum(yaw_predicted * idx_tensor, 1) * 3 - 99
            pitch_predicted = torch.sum(pitch_predicted * idx_tensor, 1) * 3 - 99
            roll_predicted = torch.sum(roll_predicted * idx_tensor, 1) * 3 - 99

            loss_reg_yaw = reg_criterion(yaw_predicted, label_yaw_cont)
            loss_reg_pitch = reg_criterion(pitch_predicted, label_pitch_cont)

			@@ -24,11 +24,8 @@
			self.fc_pitch = nn.Linear(512 * block.expansion, num_bins)
			self.fc_roll = nn.Linear(512 * block.expansion, num_bins)

			# Vestigial layer from previous experiments
			self.fc_finetune = nn.Linear(512 * block.expansion + 3, 3)

			# Used to get the expected value of angle from bins
			self.softmax = nn.Softmax()
			self.idx_tensor = Variable(torch.FloatTensor(range(66))).cuda()

			for m in self.modules():
			if isinstance(m, nn.Conv2d):
			@@ -72,18 +69,7 @@
			pre_pitch = self.fc_pitch(x)
			pre_roll = self.fc_roll(x)

			yaw = self.softmax(pre_yaw)
			yaw = Variable(torch.sum(yaw.data * self.idx_tensor.data, 1), requires_grad=True)
			pitch = self.softmax(pre_pitch)
			pitch = Variable(torch.sum(pitch.data * self.idx_tensor.data, 1), requires_grad=True)
			roll = self.softmax(pre_roll)
			roll = Variable(torch.sum(roll.data * self.idx_tensor.data, 1), requires_grad=True)
			yaw = yaw.view(yaw.size(0), 1)
			pitch = pitch.view(pitch.size(0), 1)
			roll = roll.view(roll.size(0), 1)
			preangles = torch.cat([yaw, pitch, roll], 1)

			return pre_yaw, pre_pitch, pre_roll, preangles
			return pre_yaw, pre_pitch, pre_roll

			class ResNet(nn.Module):
			# ResNet for regression of 3 Euler angles.

			@@ -36,13 +36,6 @@

			return args

			def load_filtered_state_dict(model, snapshot):
			# By user apaszke from discuss.pytorch.org
			model_dict = model.state_dict()
			snapshot = {k: v for k, v in snapshot.items() if k in model_dict}
			model_dict.update(snapshot)
			model.load_state_dict(model_dict)

			if __name__ == '__main__':
			args = parse_args()

			@@ -55,7 +48,7 @@
			print 'Loading snapshot.'
			# Load snapshot
			saved_state_dict = torch.load(snapshot_path)
			load_filtered_state_dict(model, saved_state_dict)
			model.load_state_dict(saved_state_dict)

			print 'Loading data.'

			@@ -47,7 +47,7 @@
			if not os.path.exists(args.video_path):
			sys.exit('Video does not exist')

			# ResNet50
			# ResNet50 structure
			model = hopenet.Hopenet(torchvision.models.resnet.Bottleneck, [3, 4, 6, 3], 66)

			print 'Loading snapshot.'
			@@ -154,11 +154,16 @@
			img = img.view(1, img_shape[0], img_shape[1], img_shape[2])
			img = Variable(img).cuda(gpu)

			yaw, pitch, roll, angles = model(img)
			yaw, pitch, roll = model(img)

			yaw_predicted = angles[:,0].data[0].cpu()
			pitch_predicted = angles[:,1].data[0].cpu()
			roll_predicted = angles[:,2].data[0].cpu()
			yaw_predicted = F.softmax(yaw)
			pitch_predicted = F.softmax(pitch)
			roll_predicted = F.softmax(roll)
			# Get continuous predictions in degrees.
			yaw_predicted = torch.sum(yaw_predicted.data[0] * idx_tensor) * 3 - 99
			pitch_predicted = torch.sum(pitch_predicted.data[0] * idx_tensor) * 3 - 99
			roll_predicted = torch.sum(roll_predicted.data[0] * idx_tensor) * 3 - 99

			# Print new frame with cube and axis
			txt_out.write(str(frame_num) + ' %f %f %f\n' % (yaw_predicted, pitch_predicted, roll_predicted))
			# utils.plot_pose_cube(frame, yaw_predicted, pitch_predicted, roll_predicted, (x_min + x_max) / 2, (y_min + y_max) / 2, size = bbox_width)

			@@ -153,12 +153,16 @@
			img_shape = img.size()
			img = img.view(1, img_shape[0], img_shape[1], img_shape[2])
			img = Variable(img).cuda(gpu)

			yaw, pitch, roll, angles = model(img)

			yaw_predicted = angles[:,0].data[0].cpu()
			pitch_predicted = angles[:,1].data[0].cpu()
			roll_predicted = angles[:,2].data[0].cpu()
			yaw, pitch, roll = model(img)

			yaw_predicted = F.softmax(yaw)
			pitch_predicted = F.softmax(pitch)
			roll_predicted = F.softmax(roll)
			# Get continuous predictions in degrees.
			yaw_predicted = torch.sum(yaw_predicted.data[0] * idx_tensor) * 3 - 99
			pitch_predicted = torch.sum(pitch_predicted.data[0] * idx_tensor) * 3 - 99
			roll_predicted = torch.sum(roll_predicted.data[0] * idx_tensor) * 3 - 99

			# Print new frame with cube and axis
			txt_out.write(str(frame_num) + ' %f %f %f\n' % (yaw_predicted, pitch_predicted, roll_predicted))

			@@ -124,6 +124,10 @@
			# Regression loss coefficient
			alpha = args.alpha

			softmax = nn.Softmax().cuda(gpu)
			idx_tensor = [idx for idx in xrange(66)]
			idx_tensor = Variable(torch.FloatTensor(idx_tensor)).cuda(gpu)

			optimizer = torch.optim.Adam([{'params': get_ignored_params(model), 'lr': 0},
			{'params': get_non_ignored_params(model), 'lr': args.lr},
			{'params': get_fc_params(model), 'lr': args.lr * 5}],
			@@ -153,9 +157,13 @@
			loss_roll = criterion(roll, label_roll)

			# MSE loss
			yaw_predicted = angles[:,0]
			pitch_predicted = angles[:,1]
			roll_predicted = angles[:,2]
			yaw_predicted = softmax(yaw)
			pitch_predicted = softmax(pitch)
			roll_predicted = softmax(roll)

			yaw_predicted = torch.sum(yaw_predicted * idx_tensor, 1) * 3 - 99
			pitch_predicted = torch.sum(pitch_predicted * idx_tensor, 1) * 3 - 99
			roll_predicted = torch.sum(roll_predicted * idx_tensor, 1) * 3 - 99

			loss_reg_yaw = reg_criterion(yaw_predicted, label_yaw_cont)
			loss_reg_pitch = reg_criterion(pitch_predicted, label_pitch_cont)