From 3e0d6320a4acdb3c73e0efb31ee57fb1534237b2 Mon Sep 17 00:00:00 2001 From: chenshijun <csj_sky@126.com> Date: 星期三, 05 六月 2019 10:41:06 +0800 Subject: [PATCH] real time video --- code/test_on_video_dlib.py | 6 + demo.txt | 3 code/demo2.py | 168 ++++++++++++++++++++++++++++++++++++++++++ code/utils.py | 2 test/demo.py | 31 +++++++ test/demo4.py | 0 test/demo3.py | 15 +++ 7 files changed, 222 insertions(+), 3 deletions(-) diff --git a/code/demo2.py b/code/demo2.py new file mode 100644 index 0000000..be61416 --- /dev/null +++ b/code/demo2.py @@ -0,0 +1,168 @@ +import sys, os, argparse + +import numpy as np +import cv2 +import matplotlib.pyplot as plt + +import torch +import torch.nn as nn +from torch.autograd import Variable +from torch.utils.data import DataLoader +from torchvision import transforms +import torch.backends.cudnn as cudnn +import torchvision +import torch.nn.functional as F +from PIL import Image + +import datasets, hopenet, utils + +from skimage import io +import dlib +import time + +if __name__ == '__main__': + + + cudnn.enabled = True + + batch_size = 1 + gpu = 'cuda:0' + snapshot_path = './snapshots/hopenet_robust_alpha1.pkl' + out_dir = 'output/video' + + + if not os.path.exists(out_dir): + os.makedirs(out_dir) + + # ResNet50 structure + model = hopenet.Hopenet(torchvision.models.resnet.Bottleneck, [3, 4, 6, 3], 66) + + # Dlib face detection model + cnn_face_detector = dlib.cnn_face_detection_model_v1('./models/mmod_human_face_detector.dat') + + print('Loading snapshot.') + # Load snapshot + saved_state_dict = torch.load(snapshot_path) + model.load_state_dict(saved_state_dict) + + print ('Loading data.') + + transformations = transforms.Compose([transforms.Scale(224), + transforms.CenterCrop(224), transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])]) + + model.cuda(gpu) + + print('Ready to test network.') + + # Test the Model + model.eval() # Change model to 'eval' mode (BN uses moving mean/var). + total = 0 + + idx_tensor = [idx for idx in range(66)] + idx_tensor = torch.FloatTensor(idx_tensor).cuda(gpu) + + video = cv2.VideoCapture('rtsp://admin:a1234567@192.168.1.215:554/H.264/ch1/main/av_stream') + if not video.isOpened(): + print('not opened') + exit() + + # New cv2 + width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH)) # float + height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)) # float + print('width=========='+str(width)) + print('height=========='+str(height)) + # Define the codec and create VideoWriter object + fourcc = cv2.VideoWriter_fourcc(*'MJPG') + # out = cv2.VideoWriter('output/video/output-%s.avi' % 's', fourcc, 25, (width, height)) + out = cv2.VideoWriter('output/video/output-%s.avi' % 's', fourcc, 25, (1024, 534)) + # # Old cv2 + # width = int(video.get(cv2.cv.CV_CAP_PROP_FRAME_WIDTH)) # float + # height = int(video.get(cv2.cv.CV_CAP_PROP_FRAME_HEIGHT)) # float + # + # # Define the codec and create VideoWriter object + # fourcc = cv2.cv.CV_FOURCC(*'MJPG') + # out = cv2.VideoWriter('output/video/output-%s.avi' % args.output_string, fourcc, 30.0, (width, height)) + + txt_out = open('/home/basic/code/deep-head-pose/output/images/%s.txt' % 's', 'w') + + frame_num = 1 + start = time.time() + while frame_num <= 10000: + # print(frame_num) + + + ret,frame = video.read() + if ret == False: + break + + frame = cv2.resize(frame, (1024, 534), interpolation=cv2.INTER_LINEAR) + if frame_num % 8 != 0: + frame_num += 1 + cv2.imshow("frame", frame) + if cv2.waitKey(1) & 0xFF == ord('q'): + break + # out.write(frame) + continue + cv2_frame = cv2.cvtColor(frame,cv2.COLOR_BGR2RGB) + + + # Dlib detect + dets = cnn_face_detector(cv2_frame, 1) + + for idx, det in enumerate(dets): + # print('**********************yes****************************yes********************************yes************************************') + # Get x_min, y_min, x_max, y_max, conf + x_min = det.rect.left() + y_min = det.rect.top() + x_max = det.rect.right() + y_max = det.rect.bottom() + conf = det.confidence + + if conf > 1.0: + bbox_width = abs(x_max - x_min) + bbox_height = abs(y_max - y_min) + x_min -= 2 * bbox_width / 4 + x_max += 2 * bbox_width / 4 + y_min -= 3 * bbox_height / 4 + y_max += bbox_height / 4 + x_min = max(x_min, 0); y_min = max(y_min, 0) + x_max = min(frame.shape[1], x_max); y_max = min(frame.shape[0], y_max) + # Crop image + img = cv2_frame[int(y_min):int(y_max),int(x_min):int(x_max)] + img = Image.fromarray(img) + + # Transform + img = transformations(img) + img_shape = img.size() + img = img.view(1, img_shape[0], img_shape[1], img_shape[2]) + img = Variable(img).cuda(gpu) + + yaw, pitch, roll = model(img) + + yaw_predicted = F.softmax(yaw) + pitch_predicted = F.softmax(pitch) + roll_predicted = F.softmax(roll) + # Get continuous predictions in degrees. + yaw_predicted = torch.sum(yaw_predicted.data[0] * idx_tensor) * 3 - 99 + pitch_predicted = torch.sum(pitch_predicted.data[0] * idx_tensor) * 3 - 99 + roll_predicted = torch.sum(roll_predicted.data[0] * idx_tensor) * 3 - 99 + + # Print new frame with cube and axis + txt_out.write(str(frame_num) + ' %f %f %f\n' % (yaw_predicted, pitch_predicted, roll_predicted)) + # utils.plot_pose_cube(frame, yaw_predicted, pitch_predicted, roll_predicted, (x_min + x_max) / 2, (y_min + y_max) / 2, size = bbox_width) + # utils.draw_axis(frame, yaw_predicted, pitch_predicted, roll_predicted, tdx = (x_min + x_max) / 2, tdy= (y_min + y_max) / 2, size = bbox_height/2) + utils.draw_axis(frame, yaw_predicted, pitch_predicted, roll_predicted, tdx = (x_min + x_max)/2, tdy= (y_min + y_max)/2, size = bbox_height*3) + # Plot expanded bounding box + # cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), (0,255,0), 1) + cv2.imshow("frame", frame) + if cv2.waitKey(1) & 0xFF == ord('q'): + break + # out.write(frame) + frame_num += 1 + end = time.time() + + print(end - start) + out.release() + video.release() + # cv2.destroyAllWindows() diff --git a/code/test_on_video_dlib.py b/code/test_on_video_dlib.py index eefced7..8ae74fa 100644 --- a/code/test_on_video_dlib.py +++ b/code/test_on_video_dlib.py @@ -106,15 +106,19 @@ print(frame_num) ret,frame = video.read() + if ret == False: break - + + frame = cv2.resize(frame, (960, 540), interpolation=cv2.INTER_LINEAR) + print(frame.shape) cv2_frame = cv2.cvtColor(frame,cv2.COLOR_BGR2RGB) # Dlib detect dets = cnn_face_detector(cv2_frame, 1) for idx, det in enumerate(dets): + print('**********************yes****************************yes********************************yes************************************') # Get x_min, y_min, x_max, y_max, conf x_min = det.rect.left() y_min = det.rect.top() diff --git a/code/utils.py b/code/utils.py index b75d51f..ff98fd3 100644 --- a/code/utils.py +++ b/code/utils.py @@ -112,6 +112,6 @@ cv2.line(img, (int(tdx), int(tdy)), (int(x1),int(y1)),(0,0,255),3) cv2.line(img, (int(tdx), int(tdy)), (int(x2),int(y2)),(0,255,0),3) - cv2.line(img, (int(tdx), int(tdy)), (int(x3),int(y3)),(255,0,0),2) + cv2.line(img, (int(tdx), int(tdy)), (int(x3),int(y3)),(0,0,0),5) return img diff --git a/demo.txt b/demo.txt index 8aa7957..3d6ba0a 100644 --- a/demo.txt +++ b/demo.txt @@ -1 +1,2 @@ -python code/test_on_video_dlib.py --snapshot ./snapshots/hopenet_robust_alpha1.pkl --face_model ./models/mmod_human_face_detector.dat --video ./videos/out2.mp4 --output_string "something you want to add to the output video name and result file name" --n_frames 500 --fps 30 \ No newline at end of file +python code/test_on_video_dlib.py --snapshot ./snapshots/hopenet_robust_alpha1.pkl --face_model ./models/mmod_human_face_detector.dat --video ./videos/out2.mp4 --output_string "something you want to add to the output video name and result file name" --n_frames 500 --fps 30 +python code/test_on_video_dlib.py --snapshot ./snapshots/hopenet_robust_alpha1.pkl --face_model ./models/mmod_human_face_detector.dat --video ./output/test/output__sssss.avi --output_string "something you want to add to the output video name and result file name" --n_frames 500 --fps 30 \ No newline at end of file diff --git a/test/demo.py b/test/demo.py new file mode 100644 index 0000000..a823418 --- /dev/null +++ b/test/demo.py @@ -0,0 +1,31 @@ +import cv2 + +cap = cv2.VideoCapture('rtsp://admin:a1234567@192.168.1.188:554/H.264/ch1/main/av_stream') +if not cap.isOpened(): + print('not opened') + exit() + +# New cv2 +width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) # float +height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) # float + +# Define the codec and create VideoWriter object +fourcc = cv2.VideoWriter_fourcc(*'MJPG') +out = cv2.VideoWriter('output/test/output-%s.avi' % 's', fourcc, 30, (width, height)) + +i = 0 +while True: + ret, frame = cap.read() + i += 1 + if ret: + print(i) + # frame = cv2.flip(frame, 0) + # cv2.imwrite('/home/basic/code/deep-head-pose/test/img_%s.png'%str(i), frame) + if i > 100: + # frame = cv2.flip(frame, 0) + break + out.write(frame) + +out.release() +cap.release() +# cv2.destroyAllWindows() \ No newline at end of file diff --git a/test/demo3.py b/test/demo3.py new file mode 100644 index 0000000..d79095e --- /dev/null +++ b/test/demo3.py @@ -0,0 +1,15 @@ +import multiprocessing + +def readFrame(frames): + frame = 0 + frames.append(frame) + while frame < 1000: + frame += 1 + frames.append(frame) + +def processFrame(frames): + pass + +frames = [] +p1 = multiprocessing.Process(target=readFrame, args=frames) +p2 = multiprocessing.Process(target=processFrame, args=frames) diff --git a/test/demo4.py b/test/demo4.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/test/demo4.py -- Gitblit v1.8.0