xuepengqiang
2020-05-26 bb5cb224c9abe4216aaa49a8287b06d9f05dab60
add m
2个文件已删除
49个文件已添加
1455 ■■■■■ 已修改文件
.gitignore 44 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
README.md 4 ●●●● 补丁 | 查看 | 原始文档 | blame | 历史
demo.py 29 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
lib/__init__.py 6 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
lib/__init__.pyc 补丁 | 查看 | 原始文档 | blame | 历史
lib/__pycache__/__init__.cpython-36.pyc 补丁 | 查看 | 原始文档 | blame | 历史
lib/config/cpu_run.dll 补丁 | 查看 | 原始文档 | blame | 历史
lib/core/__init__.py 6 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
lib/core/__init__.pyc 补丁 | 查看 | 原始文档 | blame | 历史
lib/core/__pycache__/__init__.cpython-36.pyc 补丁 | 查看 | 原始文档 | blame | 历史
lib/core/__pycache__/ds_tracker.cpython-36.pyc 补丁 | 查看 | 原始文档 | blame | 历史
lib/core/__pycache__/generate_detections.cpython-36.pyc 补丁 | 查看 | 原始文档 | blame | 历史
lib/core/__pycache__/yolo.cpython-36.pyc 补丁 | 查看 | 原始文档 | blame | 历史
lib/core/ds_tracker.py 118 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
lib/core/ds_tracker.pyc 补丁 | 查看 | 原始文档 | blame | 历史
lib/core/generate_detections.py 168 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
lib/core/yolo.py 114 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
lib/deep_sort/__init__.py 6 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
lib/deep_sort/__pycache__/__init__.cpython-36.pyc 补丁 | 查看 | 原始文档 | blame | 历史
lib/deep_sort/__pycache__/detection.cpython-36.pyc 补丁 | 查看 | 原始文档 | blame | 历史
lib/deep_sort/__pycache__/iou_matching.cpython-36.pyc 补丁 | 查看 | 原始文档 | blame | 历史
lib/deep_sort/__pycache__/kalman_filter.cpython-36.pyc 补丁 | 查看 | 原始文档 | blame | 历史
lib/deep_sort/__pycache__/linear_assignment.cpython-36.pyc 补丁 | 查看 | 原始文档 | blame | 历史
lib/deep_sort/__pycache__/nn_matching.cpython-36.pyc 补丁 | 查看 | 原始文档 | blame | 历史
lib/deep_sort/__pycache__/preprocessing.cpython-36.pyc 补丁 | 查看 | 原始文档 | blame | 历史
lib/deep_sort/__pycache__/track.cpython-36.pyc 补丁 | 查看 | 原始文档 | blame | 历史
lib/deep_sort/__pycache__/tracker.cpython-36.pyc 补丁 | 查看 | 原始文档 | blame | 历史
lib/deep_sort/detection.py 19 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
lib/deep_sort/iou_matching.py 39 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
lib/deep_sort/kalman_filter.py 107 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
lib/deep_sort/linear_assignment.py 84 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
lib/deep_sort/nn_matching.py 62 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
lib/deep_sort/preprocessing.py 43 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
lib/deep_sort/track.py 66 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
lib/deep_sort/tracker.py 94 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
lib/utils/__init__.py 6 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
lib/utils/__pycache__/__init__.cpython-36.pyc 补丁 | 查看 | 原始文档 | blame | 历史
lib/utils/__pycache__/utils.cpython-36.pyc 补丁 | 查看 | 原始文档 | blame | 历史
lib/utils/utils.py 19 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
lib/yolo3/__init__.py 补丁 | 查看 | 原始文档 | blame | 历史
lib/yolo3/__init__.pyc 补丁 | 查看 | 原始文档 | blame | 历史
lib/yolo3/__pycache__/__init__.cpython-36.pyc 补丁 | 查看 | 原始文档 | blame | 历史
lib/yolo3/__pycache__/model.cpython-36.pyc 补丁 | 查看 | 原始文档 | blame | 历史
lib/yolo3/__pycache__/utils.cpython-36.pyc 补丁 | 查看 | 原始文档 | blame | 历史
lib/yolo3/model.py 300 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
lib/yolo3/model.pyc 补丁 | 查看 | 原始文档 | blame | 历史
lib/yolo3/utils.py 20 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
lib/yolo3/utils.pyc 补丁 | 查看 | 原始文档 | blame | 历史
model_dump/coco_classes.txt 80 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
model_dump/voc_classes.txt 20 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
model_dump/yolo_anchors.txt 1 ●●●● 补丁 | 查看 | 原始文档 | blame | 历史
.gitignore
File was deleted
README.md
File was deleted
demo.py
New file
@@ -0,0 +1,29 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 2019/10/30 11:55
# @Author  : Scheaven
# @File    :  demo.py
# @description:
import argparse
from lib.core.ds_tracker import human_tracker
from lib.core.yolo import YOLO
from lib.core import generate_detections as gdet
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
######################paraters######################
def parse_args():
    parser = argparse.ArgumentParser(description="Deep SORT")
    parser.add_argument("-i", "--in_type", help="camera or video",
        default='video', required=False)
    parser.add_argument("--c_in", help="camera ",
        default='rtsp://admin:a1234567@192.168.5.32:554/h264/ch1/main/av_stream', required=False)
    parser.add_argument("--v_in", help="video",
        default='../cs01.avi', required=False)
    return parser.parse_args()
if __name__ == '__main__':
    args = parse_args()
    model_filename = 'model_dump/mars-small128.pb'
    encoder = gdet.create_box_encoder(model_filename, batch_size=1)
    human_tracker(YOLO(), encoder, args)
lib/__init__.py
New file
@@ -0,0 +1,6 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 2019/10/30 11:55
# @Author  : Scheaven
# @File    :  __init__.py.py
# @description:
lib/__init__.pyc
Binary files differ
lib/__pycache__/__init__.cpython-36.pyc
Binary files differ
lib/config/cpu_run.dll
Binary files differ
lib/core/__init__.py
New file
@@ -0,0 +1,6 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 2019/10/30 13:39
# @Author  : Scheaven
# @File    :  __init__.py.py
# @description:
lib/core/__init__.pyc
Binary files differ
lib/core/__pycache__/__init__.cpython-36.pyc
Binary files differ
lib/core/__pycache__/ds_tracker.cpython-36.pyc
Binary files differ
lib/core/__pycache__/generate_detections.cpython-36.pyc
Binary files differ
lib/core/__pycache__/yolo.cpython-36.pyc
Binary files differ
lib/core/ds_tracker.py
New file
@@ -0,0 +1,118 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 2019/10/30 13:40
# @Author  : Scheaven
# @File    :  ds_tracker.py
# @description:
from timeit import time
import cv2
import numpy as np
from PIL import Image
from lib.deep_sort.tracker import Tracker
from lib.deep_sort import preprocessing
from lib.deep_sort import nn_matching
from lib.deep_sort.detection import Detection
from lib.utils.utils import video_open
from collections import deque
pts = [deque(maxlen=30) for _ in range(9999)]
np.random.seed(100)
COLORS = np.random.randint(0, 255, size=(200, 3),
    dtype="uint8")
def human_tracker(yolo, encoder, args):
    max_cosine_distance = 0.3
    nn_budget = None
    nms_max_overlap = 1.0
    metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget)
    tracker = Tracker(metric)
    video = video_open(args)
    video_capture = video.generate_video()
    w = int(video_capture.get(3))
    h = int(video_capture.get(4))
    fourcc = cv2.VideoWriter_fourcc(*'MJPG')
    out = cv2.VideoWriter('./output/' + '_output.avi', fourcc, 15, (w, h))
    i = 0
    fps = 0
    track_fps = 0
    while True:
        ret, frame = video_capture.read()
        if ret != True:
            break
        t1 = time.time()
        image = Image.fromarray(frame)
        time3 = time.time()
        boxs = yolo.detect_image(image)
        time4 = time.time()
        print('detect cost is', time4 - time3)
        features = encoder(frame, boxs)
        print("features shape:  ", features.shape)
        detections = [Detection(bbox, 1.0, feature) for bbox, feature in zip(boxs, features)]
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores)
        detections = [detections[i] for i in indices]
        print('features extract is', time4 - time3)
        tracker.predict()
        tracker.update(detections)
        fps = (fps + (1. / (time.time() - t1))) / 2
        track_fps = (track_fps + (1. / (time.time() - time4))) / 2
        print("fps= %f" % (fps))
        indexIDs = []
        i = int(0)
        for track in tracker.tracks:
            if track.is_confirmed() and track.time_since_update > 1:
                continue
            indexIDs.append(int(track.track_id))
            color = [int(c) for c in COLORS[indexIDs[i] % len(COLORS)]]
            bbox = track.to_tlbr()
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (color), 2)
            # cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1])), 0, 5e-3 * 200, (0, 255, 0), 2)
            # cv2.putText(frame, str(round(fps, 2)), (100, 100), 0, 5e-3 * 300, (0, 0 , 255), 2)
            # cv2.putText(frame, str(track_fps), (100, 50), 0, 5e-3 * 300, (0, 0 , 255), 2)
            cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1] - 20)), 0, 5e-3 * 150, (color), 2)
            # cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (color), 3)
            # cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1] - 50)), 0, 5e-3 * 150, (color), 2)
                # cv2.putText(frame, str(class_names[0]), (int(bbox[0]), int(bbox[1] - 20)), 0, 5e-3 * 150, (color), 2)
            center = (int(((bbox[0]) + (bbox[2])) / 2), int(((bbox[1]) + (bbox[3])) / 2))
            # track_id[center]
            i += 1
            pts[track.track_id].append(center)
            thickness = 5
            cv2.circle(frame, (center), 1, color, thickness)
            for j in range(1, len(pts[track.track_id])):
                if pts[track.track_id][j - 1] is None or pts[track.track_id][j] is None:
                    continue
                thickness = int(np.sqrt(64 / float(j + 1)) * 2)
                cv2.line(frame, (pts[track.track_id][j - 1]), (pts[track.track_id][j]), (color), thickness)
        for det in detections:
            bbox = det.to_tlbr()
            # cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 0, 0), 2)
            cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2)
        # cv2.imshow('', cv2.resize(frame,(854, 480)))
        out.write(frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    video_capture.release()
    cv2.destroyAllWindows()
lib/core/ds_tracker.pyc
Binary files differ
lib/core/generate_detections.py
New file
@@ -0,0 +1,168 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 2019/10/30 13:59
# @Author  : Scheaven
# @File    :  generate_detections.py
# @description:
import os
import errno
import argparse
import numpy as np
import cv2
import tensorflow as tf
def _run_in_batches(f, data_dict, out, batch_size):
    data_len = len(out)
    num_batches = int(data_len / batch_size)
    s, e = 0, 0
    for i in range(num_batches):
        s, e = i * batch_size, (i + 1) * batch_size
        batch_data_dict = {k: v[s:e] for k, v in data_dict.items()}
        out[s:e] = f(batch_data_dict)
    if e < len(out):
        batch_data_dict = {k: v[e:] for k, v in data_dict.items()}
        out[e:] = f(batch_data_dict)
def extract_image_patch(image, bbox, patch_shape):
    bbox = np.array(bbox)
    if patch_shape is not None:
        target_aspect = float(patch_shape[1]) / patch_shape[0]
        new_width = target_aspect * bbox[3]
        bbox[0] -= (new_width - bbox[2]) / 2
        bbox[2] = new_width
    bbox[2:] += bbox[:2]
    bbox = bbox.astype(np.int)
    bbox[:2] = np.maximum(0, bbox[:2])
    bbox[2:] = np.minimum(np.asarray(image.shape[:2][::-1]) - 1, bbox[2:])
    if np.any(bbox[:2] >= bbox[2:]):
        return None
    sx, sy, ex, ey = bbox
    image = image[sy:ey, sx:ex]
    image = cv2.resize(image, tuple(patch_shape[::-1]))
    return image
class ImageEncoder(object):
    def __init__(self, checkpoint_filename, input_name="images",
                 output_name="features"):
        self.session = tf.Session()
        with tf.gfile.GFile(checkpoint_filename, "rb") as file_handle:
            graph_def = tf.GraphDef()
            graph_def.ParseFromString(file_handle.read())
        tf.import_graph_def(graph_def, name="net")
        self.input_var = tf.get_default_graph().get_tensor_by_name(
            "net/%s:0" % input_name)
        self.output_var = tf.get_default_graph().get_tensor_by_name(
            "net/%s:0" % output_name)
        assert len(self.output_var.get_shape()) == 2
        assert len(self.input_var.get_shape()) == 4
        self.feature_dim = self.output_var.get_shape().as_list()[-1]
        self.image_shape = self.input_var.get_shape().as_list()[1:]
    def __call__(self, data_x, batch_size=32):
        out = np.zeros((len(data_x), self.feature_dim), np.float32)
        _run_in_batches(
            lambda x: self.session.run(self.output_var, feed_dict=x),
            {self.input_var: data_x}, out, batch_size)
        return out
def create_box_encoder(model_filename, input_name="images",
                       output_name="features", batch_size=32):
    image_encoder = ImageEncoder(model_filename, input_name, output_name)
    image_shape = image_encoder.image_shape
    def encoder(image, boxes):
        image_patches = []
        for box in boxes:
            patch = extract_image_patch(image, box, image_shape[:2])  # image 中的human_boxs部分
            if patch is None:
                print("WARNING: Failed to extract image patch: %s." % str(box))
                patch = np.random.uniform(
                    0., 255., image_shape).astype(np.uint8)
            image_patches.append(patch)
        image_patches = np.asarray(image_patches)
        return image_encoder(image_patches, batch_size)
    return encoder
def generate_detections(encoder, mot_dir, output_dir, detection_dir=None):
    if detection_dir is None:
        detection_dir = mot_dir
    try:
        os.makedirs(output_dir)
    except OSError as exception:
        if exception.errno == errno.EEXIST and os.path.isdir(output_dir):
            pass
        else:
            raise ValueError(
                "Failed to created output directory '%s'" % output_dir)
    for sequence in os.listdir(mot_dir):
        print("Processing %s" % sequence)
        sequence_dir = os.path.join(mot_dir, sequence)
        image_dir = os.path.join(sequence_dir, "img1")
        image_filenames = {
            int(os.path.splitext(f)[0]): os.path.join(image_dir, f)
            for f in os.listdir(image_dir)}
        detection_file = os.path.join(
            detection_dir, sequence, "det/det.txt")
        detections_in = np.loadtxt(detection_file, delimiter=',')
        detections_out = []
        frame_indices = detections_in[:, 0].astype(np.int)
        min_frame_idx = frame_indices.astype(np.int).min()
        max_frame_idx = frame_indices.astype(np.int).max()
        for frame_idx in range(min_frame_idx, max_frame_idx + 1):
            print("Frame %05d/%05d" % (frame_idx, max_frame_idx))
            mask = frame_indices == frame_idx
            rows = detections_in[mask]
            if frame_idx not in image_filenames:
                print("WARNING could not find image for frame %d" % frame_idx)
                continue
            bgr_image = cv2.imread(
                image_filenames[frame_idx], cv2.IMREAD_COLOR)
            features = encoder(bgr_image, rows[:, 2:6].copy())
            detections_out += [np.r_[(row, feature)] for row, feature
                               in zip(rows, features)]
        output_filename = os.path.join(output_dir, "%s.npy" % sequence)
        np.save(
            output_filename, np.asarray(detections_out), allow_pickle=False)
def parse_args():
    parser = argparse.ArgumentParser(description="Re-ID feature extractor")
    parser.add_argument(
        "--model",
        default="model_dump/mars-small128.pb",
        help="Path to freezed inference graph protobuf.")
    parser.add_argument(
        "--mot_dir", help="Path to MOTChallenge directory (train or test)",
        required=True)
    parser.add_argument(
        "--detection_dir", help="Path to custom detections. Defaults to "
        "standard MOT detections Directory structure should be the default "
        "MOTChallenge structure: [sequence]/det/det.txt", default=None)
    parser.add_argument(
        "--output_dir", help="Output directory. Will be created if it does not"
        " exist.", default="detections")
    return parser.parse_args()
def main():
    args = parse_args()
    encoder = create_box_encoder(args.model, batch_size=32)
    generate_detections(encoder, args.mot_dir, args.output_dir,
                        args.detection_dir)
if __name__ == "__main__":
    main()
lib/core/yolo.py
New file
@@ -0,0 +1,114 @@
#! /usr/bin/env python
# -*- coding: utf-8 -*-
import colorsys
import os
import random
from timeit import time
from timeit import default_timer as timer
import numpy as np
from keras import backend as K
from keras.models import load_model
from PIL import Image, ImageFont, ImageDraw
from lib.yolo3.model import yolo_eval
from lib.yolo3.utils import letterbox_image
class YOLO(object):
    def __init__(self):
        self.model_path = 'model_dump/yolo.h5'
        self.anchors_path = 'model_dump/yolo_anchors.txt'
        self.classes_path = 'model_dump/coco_classes.txt'
        self.score = 0.5
        self.iou = 0.5
        self.class_names = self._get_class()
        self.anchors = self._get_anchors()
        self.sess = K.get_session()
        self.model_image_size = (416, 416)
        self.is_fixed_size = self.model_image_size != (None, None)
        self.boxes, self.scores, self.classes = self.generate()
    def _get_class(self):
        classes_path = os.path.expanduser(self.classes_path)
        with open(classes_path) as f:
            class_names = f.readlines()
        class_names = [c.strip() for c in class_names]
        return class_names
    def _get_anchors(self):
        anchors_path = os.path.expanduser(self.anchors_path)
        with open(anchors_path) as f:
            anchors = f.readline()
            anchors = [float(x) for x in anchors.split(',')]
            anchors = np.array(anchors).reshape(-1, 2)
        return anchors
    def generate(self):
        model_path = os.path.expanduser(self.model_path)
        assert model_path.endswith('.h5'), 'Keras model must be a .h5 file.'
        self.yolo_model = load_model(model_path, compile=False)
        print('{} model, anchors, and classes loaded.'.format(model_path))
        hsv_tuples = [(x / len(self.class_names), 1., 1.)
                      for x in range(len(self.class_names))]
        self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
        self.colors = list(
            map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
                self.colors))
        random.seed(10101)
        random.shuffle(self.colors)
        random.seed(None)
        self.input_image_shape = K.placeholder(shape=(2, ))
        boxes, scores, classes = yolo_eval(self.yolo_model.output, self.anchors,
                len(self.class_names), self.input_image_shape,
                score_threshold=self.score, iou_threshold=self.iou)
        return boxes, scores, classes
    def detect_image(self, image):
        if self.is_fixed_size:
            assert self.model_image_size[0]%32 == 0, 'Multiples of 32 required'
            assert self.model_image_size[1]%32 == 0, 'Multiples of 32 required'
            boxed_image = letterbox_image(image, tuple(reversed(self.model_image_size)))
        else:
            new_image_size = (image.width - (image.width % 32),
                              image.height - (image.height % 32))
            boxed_image = letterbox_image(image, new_image_size)
        image_data = np.array(boxed_image, dtype='float32')
        #print(image_data.shape)
        image_data /= 255.
        image_data = np.expand_dims(image_data, 0)
        out_boxes, out_scores, out_classes = self.sess.run(
            [self.boxes, self.scores, self.classes],
            feed_dict={
                self.yolo_model.input: image_data,
                self.input_image_shape: [image.size[1], image.size[0]],
                K.learning_phase(): 0
            })
        return_boxs = []
        for i, c in reversed(list(enumerate(out_classes))):
            predicted_class = self.class_names[c]
            if predicted_class != 'person' :
                continue
            box = out_boxes[i]
           # score = out_scores[i]
            x = int(box[1])
            y = int(box[0])
            w = int(box[3]-box[1])
            h = int(box[2]-box[0])
            if x < 0 :
                w = w + x
                x = 0
            if y < 0 :
                h = h + y
                y = 0
            return_boxs.append([x,y,w,h])
        return return_boxs
    def close_session(self):
        self.sess.close()
lib/deep_sort/__init__.py
New file
@@ -0,0 +1,6 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 2019/10/30 15:04
# @Author  : Scheaven
# @File    :  __init__.py.py
# @description:
lib/deep_sort/__pycache__/__init__.cpython-36.pyc
Binary files differ
lib/deep_sort/__pycache__/detection.cpython-36.pyc
Binary files differ
lib/deep_sort/__pycache__/iou_matching.cpython-36.pyc
Binary files differ
lib/deep_sort/__pycache__/kalman_filter.cpython-36.pyc
Binary files differ
lib/deep_sort/__pycache__/linear_assignment.cpython-36.pyc
Binary files differ
lib/deep_sort/__pycache__/nn_matching.cpython-36.pyc
Binary files differ
lib/deep_sort/__pycache__/preprocessing.cpython-36.pyc
Binary files differ
lib/deep_sort/__pycache__/track.cpython-36.pyc
Binary files differ
lib/deep_sort/__pycache__/tracker.cpython-36.pyc
Binary files differ
lib/deep_sort/detection.py
New file
@@ -0,0 +1,19 @@
import numpy as np
class Detection(object):
    def __init__(self, tlwh, confidence, feature):
        self.tlwh = np.asarray(tlwh, dtype=np.float)
        self.confidence = float(confidence)
        self.feature = np.asarray(feature, dtype=np.float32)
    def to_tlbr(self):
        ret = self.tlwh.copy()
        ret[2:] += ret[:2]
        return ret
    def to_xyah(self):
        ret = self.tlwh.copy()
        ret[:2] += ret[2:] / 2
        ret[2] /= ret[3]
        return ret
lib/deep_sort/iou_matching.py
New file
@@ -0,0 +1,39 @@
from __future__ import absolute_import
import numpy as np
from . import linear_assignment
def iou(bbox, candidates):
    bbox_tl, bbox_br = bbox[:2], bbox[:2] + bbox[2:]
    candidates_tl = candidates[:, :2]
    candidates_br = candidates[:, :2] + candidates[:, 2:]
    tl = np.c_[np.maximum(bbox_tl[0], candidates_tl[:, 0])[:, np.newaxis],
               np.maximum(bbox_tl[1], candidates_tl[:, 1])[:, np.newaxis]]
    br = np.c_[np.minimum(bbox_br[0], candidates_br[:, 0])[:, np.newaxis],
               np.minimum(bbox_br[1], candidates_br[:, 1])[:, np.newaxis]]
    wh = np.maximum(0., br - tl)
    area_intersection = wh.prod(axis=1)
    area_bbox = bbox[2:].prod()
    area_candidates = candidates[:, 2:].prod(axis=1)
    return area_intersection / (area_bbox + area_candidates - area_intersection)
def iou_cost(tracks, detections, track_indices=None,
             detection_indices=None):
    if track_indices is None:
        track_indices = np.arange(len(tracks))
    if detection_indices is None:
        detection_indices = np.arange(len(detections))
    cost_matrix = np.zeros((len(track_indices), len(detection_indices)))
    for row, track_idx in enumerate(track_indices):
        if tracks[track_idx].time_since_update > 1:
            cost_matrix[row, :] = linear_assignment.INFTY_COST
            continue
        bbox = tracks[track_idx].to_tlwh()
        candidates = np.asarray([detections[i].tlwh for i in detection_indices])
        cost_matrix[row, :] = 1. - iou(bbox, candidates)
    return cost_matrix
lib/deep_sort/kalman_filter.py
New file
@@ -0,0 +1,107 @@
import numpy as np
import scipy.linalg
chi2inv95 = {
    1: 3.8415,
    2: 5.9915,
    3: 7.8147,
    4: 9.4877,
    5: 11.070,
    6: 12.592,
    7: 14.067,
    8: 15.507,
    9: 16.919}
class KalmanFilter(object):
    def __init__(self):
        ndim, dt = 4, 1.
        self._motion_mat = np.eye(2 * ndim, 2 * ndim)
        for i in range(ndim):
            self._motion_mat[i, ndim + i] = dt
        self._update_mat = np.eye(ndim, 2 * ndim)
        self._std_weight_position = 1. / 20
        self._std_weight_velocity = 1. / 160
    def initiate(self, measurement):
        mean_pos = measurement
        mean_vel = np.zeros_like(mean_pos)
        mean = np.r_[mean_pos, mean_vel]
        std = [
            2 * self._std_weight_position * measurement[3],
            2 * self._std_weight_position * measurement[3],
            1e-2,
            2 * self._std_weight_position * measurement[3],
            10 * self._std_weight_velocity * measurement[3],
            10 * self._std_weight_velocity * measurement[3],
            1e-5,
            10 * self._std_weight_velocity * measurement[3]]
        covariance = np.diag(np.square(std))
        return mean, covariance
    def predict(self, mean, covariance):
        std_pos = [
            self._std_weight_position * mean[3],
            self._std_weight_position * mean[3],
            1e-2,
            self._std_weight_position * mean[3]]
        std_vel = [
            self._std_weight_velocity * mean[3],
            self._std_weight_velocity * mean[3],
            1e-5,
            self._std_weight_velocity * mean[3]]
        motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))
        mean = np.dot(self._motion_mat, mean)
        covariance = np.linalg.multi_dot((
            self._motion_mat, covariance, self._motion_mat.T)) + motion_cov
        return mean, covariance
    def project(self, mean, covariance):
        std = [
            self._std_weight_position * mean[3],
            self._std_weight_position * mean[3],
            1e-1,
            self._std_weight_position * mean[3]]
        innovation_cov = np.diag(np.square(std))
        mean = np.dot(self._update_mat, mean)
        covariance = np.linalg.multi_dot((
            self._update_mat, covariance, self._update_mat.T))
        return mean, covariance + innovation_cov
    def update(self, mean, covariance, measurement):
        projected_mean, projected_cov = self.project(mean, covariance)
        chol_factor, lower = scipy.linalg.cho_factor(
            projected_cov, lower=True, check_finite=False)
        kalman_gain = scipy.linalg.cho_solve(
            (chol_factor, lower), np.dot(covariance, self._update_mat.T).T,
            check_finite=False).T
        innovation = measurement - projected_mean
        new_mean = mean + np.dot(innovation, kalman_gain.T)
        new_covariance = covariance - np.linalg.multi_dot((
            kalman_gain, projected_cov, kalman_gain.T))
        return new_mean, new_covariance
    def gating_distance(self, mean, covariance, measurements,
                        only_position=False):
        mean, covariance = self.project(mean, covariance)
        if only_position:
            mean, covariance = mean[:2], covariance[:2, :2]
            measurements = measurements[:, :2]
        cholesky_factor = np.linalg.cholesky(covariance)  # 分解正定矩阵
        d = measurements - mean
        z = scipy.linalg.solve_triangular(
            cholesky_factor, d.T, lower=True, check_finite=False,
            overwrite_b=True)
        squared_maha = np.sum(z * z, axis=0)
        return squared_maha
lib/deep_sort/linear_assignment.py
New file
@@ -0,0 +1,84 @@
from __future__ import absolute_import
import numpy as np
from sklearn.utils.linear_assignment_ import linear_assignment
from . import kalman_filter
INFTY_COST = 1e+5
def min_cost_matching(
        distance_metric, max_distance, tracks, detections, track_indices=None,
        detection_indices=None):
    if track_indices is None:
        track_indices = np.arange(len(tracks))
    if detection_indices is None:
        detection_indices = np.arange(len(detections))
    if len(detection_indices) == 0 or len(track_indices) == 0:
        return [], track_indices, detection_indices  # Nothing to match.
    cost_matrix = distance_metric(
        tracks, detections, track_indices, detection_indices)
    cost_matrix[cost_matrix > max_distance] = max_distance + 1e-5
    indices = linear_assignment(cost_matrix)
    matches, unmatched_tracks, unmatched_detections = [], [], []
    for col, detection_idx in enumerate(detection_indices):
        if col not in indices[:, 1]:
            unmatched_detections.append(detection_idx)
    for row, track_idx in enumerate(track_indices):
        if row not in indices[:, 0]:
            unmatched_tracks.append(track_idx)
    for row, col in indices:
        track_idx = track_indices[row]
        detection_idx = detection_indices[col]
        if cost_matrix[row, col] > max_distance:
            unmatched_tracks.append(track_idx)
            unmatched_detections.append(detection_idx)
        else:
            matches.append((track_idx, detection_idx))
    return matches, unmatched_tracks, unmatched_detections
def matching_cascade(
        distance_metric, max_distance, cascade_depth, tracks, detections,
        track_indices=None, detection_indices=None):
    if track_indices is None:
        track_indices = list(range(len(tracks)))
    if detection_indices is None:
        detection_indices = list(range(len(detections)))
    unmatched_detections = detection_indices
    matches = []
    for level in range(cascade_depth):
        if len(unmatched_detections) == 0:
            break
        track_indices_l = [
            k for k in track_indices
            if tracks[k].time_since_update == 1 + level
        ]
        if len(track_indices_l) == 0:
            continue
        matches_l, _, unmatched_detections = \
            min_cost_matching(
                distance_metric, max_distance, tracks, detections,
                track_indices_l, unmatched_detections)
        matches += matches_l
    unmatched_tracks = list(set(track_indices) - set(k for k, _ in matches))
    return matches, unmatched_tracks, unmatched_detections
def gate_cost_matrix(
        kf, cost_matrix, tracks, detections, track_indices, detection_indices,
        gated_cost=INFTY_COST, only_position=False):
    gating_dim = 2 if only_position else 4
    gating_threshold = kalman_filter.chi2inv95[gating_dim]
    measurements = np.asarray(
        [detections[i].to_xyah() for i in detection_indices])
    for row, track_idx in enumerate(track_indices):
        track = tracks[track_idx]
        gating_distance = kf.gating_distance(
            track.mean, track.covariance, measurements, only_position)
        cost_matrix[row, gating_distance > gating_threshold] = gated_cost  # 设置为inf ,距离远的直接扩大到无穷
    return cost_matrix
lib/deep_sort/nn_matching.py
New file
@@ -0,0 +1,62 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 2019/10/30 15:04
# @Author  : Scheaven
# @File    :  nn_matching.py
# @description:
import numpy as np
def _pdist(a, b):
    a, b = np.asarray(a), np.asarray(b)
    if len(a) == 0 or len(b) == 0:
        return np.zeros((len(a), len(b)))
    a2, b2 = np.square(a).sum(axis=1), np.square(b).sum(axis=1)
    r2 = -2. * np.dot(a, b.T) + a2[:, None] + b2[None, :]
    r2 = np.clip(r2, 0., float(np.inf))
    return r2
def _cosine_distance(a, b, data_is_normalized=False):
    if not data_is_normalized:
        a = np.asarray(a) / np.linalg.norm(a, axis=1, keepdims=True)
        b = np.asarray(b) / np.linalg.norm(b, axis=1, keepdims=True)
    return 1. - np.dot(a, b.T)
def _nn_euclidean_distance(x, y):
    distances = _pdist(x, y)
    return np.maximum(0.0, distances.min(axis=0))
def _nn_cosine_distance(x, y):
    distances = _cosine_distance(x, y)
    return distances.min(axis=0)
class NearestNeighborDistanceMetric(object):
    def __init__(self, metric, matching_threshold, budget=None):
        if metric == "euclidean":
            self._metric = _nn_euclidean_distance
        elif metric == "cosine":
            self._metric = _nn_cosine_distance
        else:
            raise ValueError(
                "Invalid metric; must be either 'euclidean' or 'cosine'")
        self.matching_threshold = matching_threshold
        self.budget = budget
        self.samples = {}  # 可以加载离线库
    def partial_fit(self, features, targets, active_targets):
        for feature, target in zip(features, targets):
            self.samples.setdefault(target, []).append(feature)
            if self.budget is not None:
                self.samples[target] = self.samples[target][-self.budget:]
        self.samples = {k: self.samples[k] for k in active_targets}
    def distance(self, features, targets):
        cost_matrix = np.zeros((len(targets), len(features)))
        for i, target in enumerate(targets):
            cost_matrix[i, :] = self._metric(self.samples[target], features)
        return cost_matrix
lib/deep_sort/preprocessing.py
New file
@@ -0,0 +1,43 @@
# vim: expandtab:ts=4:sw=4
import numpy as np
import cv2
def non_max_suppression(boxes, max_bbox_overlap, scores=None):
    if len(boxes) == 0:
        return []
    boxes = boxes.astype(np.float)
    pick = []
    x1 = boxes[:, 0]
    y1 = boxes[:, 1]
    x2 = boxes[:, 2] + boxes[:, 0]
    y2 = boxes[:, 3] + boxes[:, 1]
    area = (x2 - x1 + 1) * (y2 - y1 + 1)
    if scores is not None:
        idxs = np.argsort(scores)
    else:
        idxs = np.argsort(y2)
    while len(idxs) > 0:
        last = len(idxs) - 1
        i = idxs[last]
        pick.append(i)
        xx1 = np.maximum(x1[i], x1[idxs[:last]])
        yy1 = np.maximum(y1[i], y1[idxs[:last]])
        xx2 = np.minimum(x2[i], x2[idxs[:last]])
        yy2 = np.minimum(y2[i], y2[idxs[:last]])
        w = np.maximum(0, xx2 - xx1 + 1)
        h = np.maximum(0, yy2 - yy1 + 1)
        overlap = (w * h) / area[idxs[:last]]
        idxs = np.delete(
            idxs, np.concatenate(
                ([last], np.where(overlap > max_bbox_overlap)[0])))
    return pick
lib/deep_sort/track.py
New file
@@ -0,0 +1,66 @@
class TrackState:
    Tentative = 1
    Confirmed = 2
    Deleted = 3
class Track:
    def __init__(self, mean, covariance, track_id, n_init, max_age,
                 feature=None):
        self.mean = mean
        self.covariance = covariance
        self.track_id = track_id
        self.hits = 1
        self.age = 1
        self.time_since_update = 0
        self.state = TrackState.Tentative
        self.features = []
        if feature is not None:
            self.features.append(feature)
        self._n_init = n_init
        self._max_age = max_age
    def to_tlwh(self):
        ret = self.mean[:4].copy()
        ret[2] *= ret[3]
        ret[:2] -= ret[2:] / 2
        return ret
    def to_tlbr(self):
        ret = self.to_tlwh()
        ret[2:] = ret[:2] + ret[2:]
        return ret
    def predict(self, kf):
        self.mean, self.covariance = kf.predict(self.mean, self.covariance)
        self.age += 1
        self.time_since_update += 1
    def update(self, kf, detection):
        self.mean, self.covariance = kf.update(
            self.mean, self.covariance, detection.to_xyah())
        self.features.append(detection.feature)
        self.hits += 1
        self.time_since_update = 0
        if self.state == TrackState.Tentative and self.hits >= self._n_init:
            self.state = TrackState.Confirmed
    def mark_missed(self):
        if self.state == TrackState.Tentative:
            self.state = TrackState.Deleted
        elif self.time_since_update > self._max_age:
            self.state = TrackState.Deleted
    def is_tentative(self):
        return self.state == TrackState.Tentative
    def is_confirmed(self):
        return self.state == TrackState.Confirmed
    def is_deleted(self):
        return self.state == TrackState.Deleted
lib/deep_sort/tracker.py
New file
@@ -0,0 +1,94 @@
# vim: expandtab:ts=4:sw=4
from __future__ import absolute_import
import numpy as np
from . import kalman_filter
from . import linear_assignment
from . import iou_matching
from .track import Track
class Tracker:
    def __init__(self, metric, max_iou_distance=0.7, max_age=300, n_init=3):
        self.metric = metric
        self.max_iou_distance = max_iou_distance
        self.max_age = max_age  # max_trace_length
        self.n_init = n_init
        self.kf = kalman_filter.KalmanFilter()
        self.tracks = []
        self._next_id = 1
    def predict(self):
        for track in self.tracks:
            track.predict(self.kf)
    def update(self, detections):
        matches, unmatched_tracks, unmatched_detections = \
            self._match(detections)
        for track_idx, detection_idx in matches:
            self.tracks[track_idx].update(
                self.kf, detections[detection_idx])
        for track_idx in unmatched_tracks:
            self.tracks[track_idx].mark_missed()
        for detection_idx in unmatched_detections:
            self._initiate_track(detections[detection_idx])
        self.tracks = [t for t in self.tracks if not t.is_deleted()]
        active_targets = [t.track_id for t in self.tracks if t.is_confirmed()]
        features, targets = [], []
        for track in self.tracks:
            if not track.is_confirmed():
                continue
            features += track.features
            targets += [track.track_id for _ in track.features]
            track.features = []
        self.metric.partial_fit(
            np.asarray(features), np.asarray(targets), active_targets)
    def _match(self, detections):
        def gated_metric(tracks, dets, track_indices, detection_indices):
            features = np.array([dets[i].feature for i in detection_indices])
            targets = np.array([tracks[i].track_id for i in track_indices])
            cost_matrix = self.metric.distance(features, targets)
            cost_matrix = linear_assignment.gate_cost_matrix(
                self.kf, cost_matrix, tracks, dets, track_indices,
                detection_indices)
            return cost_matrix
        confirmed_tracks = [
            i for i, t in enumerate(self.tracks) if t.is_confirmed()]
        unconfirmed_tracks = [
            i for i, t in enumerate(self.tracks) if not t.is_confirmed()]
        matches_a, unmatched_tracks_a, unmatched_detections = \
            linear_assignment.matching_cascade(
                gated_metric, self.metric.matching_threshold, self.max_age,
                self.tracks, detections, confirmed_tracks)
        iou_track_candidates = unconfirmed_tracks + [
            k for k in unmatched_tracks_a if
            self.tracks[k].time_since_update == 1]
        unmatched_tracks_a = [
            k for k in unmatched_tracks_a if
            self.tracks[k].time_since_update != 1]
        matches_b, unmatched_tracks_b, unmatched_detections = \
            linear_assignment.min_cost_matching(
                iou_matching.iou_cost, self.max_iou_distance, self.tracks,
                detections, iou_track_candidates, unmatched_detections)
        matches = matches_a + matches_b
        unmatched_tracks = list(set(unmatched_tracks_a + unmatched_tracks_b))
        return matches, unmatched_tracks, unmatched_detections
    def _initiate_track(self, detection):
        mean, covariance = self.kf.initiate(detection.to_xyah())
        self.tracks.append(Track(
            mean, covariance, self._next_id, self.n_init, self.max_age,
            detection.feature))
        self._next_id += 1
lib/utils/__init__.py
New file
@@ -0,0 +1,6 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 2019/10/30 13:53
# @Author  : Scheaven
# @File    :  __init__.py.py
# @description:
lib/utils/__pycache__/__init__.cpython-36.pyc
Binary files differ
lib/utils/__pycache__/utils.cpython-36.pyc
Binary files differ
lib/utils/utils.py
New file
@@ -0,0 +1,19 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Time    : 2019/10/30 15:27
# @Author  : Scheaven
# @File    :  utils.py
# @description:
import cv2
class video_open:
    def __init__(self,args):
        #self.readtype=read_type
        if args.in_type == "camera":
            self.readtype = args.c_in
        else:
            self.readtype= args.v_in
    def generate_video(self):
        video_capture = cv2.VideoCapture(self.readtype)
        return video_capture
lib/yolo3/__init__.py
lib/yolo3/__init__.pyc
Binary files differ
lib/yolo3/__pycache__/__init__.cpython-36.pyc
Binary files differ
lib/yolo3/__pycache__/model.cpython-36.pyc
Binary files differ
lib/yolo3/__pycache__/utils.cpython-36.pyc
Binary files differ
lib/yolo3/model.py
New file
@@ -0,0 +1,300 @@
from functools import wraps
import numpy as np
import tensorflow as tf
from keras import backend as K
from keras.layers import Conv2D, Add, ZeroPadding2D, UpSampling2D, Concatenate
from keras.layers.advanced_activations import LeakyReLU
from keras.layers.normalization import BatchNormalization
from keras.models import Model
from keras.regularizers import l2
from lib.yolo3.utils import compose
@wraps(Conv2D)
def DarknetConv2D(*args, **kwargs):
    darknet_conv_kwargs = {'kernel_regularizer': l2(5e-4)}
    darknet_conv_kwargs['padding'] = 'valid' if kwargs.get('strides')==(2,2) else 'same'
    darknet_conv_kwargs.update(kwargs)
    return Conv2D(*args, **darknet_conv_kwargs)
def DarknetConv2D_BN_Leaky(*args, **kwargs):
    no_bias_kwargs = {'use_bias': False}
    no_bias_kwargs.update(kwargs)
    return compose(
        DarknetConv2D(*args, **no_bias_kwargs),
        BatchNormalization(),
        LeakyReLU(alpha=0.1))
def resblock_body(x, num_filters, num_blocks):
    x = ZeroPadding2D(((1,0),(1,0)))(x)
    x = DarknetConv2D_BN_Leaky(num_filters, (3,3), strides=(2,2))(x)
    for i in range(num_blocks):
        y = compose(
                DarknetConv2D_BN_Leaky(num_filters//2, (1,1)),
                DarknetConv2D_BN_Leaky(num_filters, (3,3)))(x)
        x = Add()([x,y])
    return x
def darknet_body(x):
    x = DarknetConv2D_BN_Leaky(32, (3,3))(x)
    x = resblock_body(x, 64, 1)
    x = resblock_body(x, 128, 2)
    x = resblock_body(x, 256, 8)
    x = resblock_body(x, 512, 8)
    x = resblock_body(x, 1024, 4)
    return x
def make_last_layers(x, num_filters, out_filters):
    x = compose(
            DarknetConv2D_BN_Leaky(num_filters, (1,1)),
            DarknetConv2D_BN_Leaky(num_filters*2, (3,3)),
            DarknetConv2D_BN_Leaky(num_filters, (1,1)),
            DarknetConv2D_BN_Leaky(num_filters*2, (3,3)),
            DarknetConv2D_BN_Leaky(num_filters, (1,1)))(x)
    y = compose(
            DarknetConv2D_BN_Leaky(num_filters*2, (3,3)),
            DarknetConv2D(out_filters, (1,1)))(x)
    return x, y
def yolo_body(inputs, num_anchors, num_classes):
    darknet = Model(inputs, darknet_body(inputs))
    x, y1 = make_last_layers(darknet.output, 512, num_anchors*(num_classes+5))
    x = compose(
            DarknetConv2D_BN_Leaky(256, (1,1)),
            UpSampling2D(2))(x)
    x = Concatenate()([x,darknet.layers[152].output])
    x, y2 = make_last_layers(x, 256, num_anchors*(num_classes+5))
    x = compose(
            DarknetConv2D_BN_Leaky(128, (1,1)),
            UpSampling2D(2))(x)
    x = Concatenate()([x,darknet.layers[92].output])
    x, y3 = make_last_layers(x, 128, num_anchors*(num_classes+5))
    return Model(inputs, [y1,y2,y3])
def yolo_head(feats, anchors, num_classes, input_shape):
    num_anchors = len(anchors)
    anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2])
    grid_shape = K.shape(feats)[1:3] # height, width
    grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]),
        [1, grid_shape[1], 1, 1])
    grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]),
        [grid_shape[0], 1, 1, 1])
    grid = K.concatenate([grid_x, grid_y])
    grid = K.cast(grid, K.dtype(feats))
    feats = K.reshape(
        feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5])
    box_xy = K.sigmoid(feats[..., :2])
    box_wh = K.exp(feats[..., 2:4])
    box_confidence = K.sigmoid(feats[..., 4:5])
    box_class_probs = K.sigmoid(feats[..., 5:])
    box_xy = (box_xy + grid) / K.cast(grid_shape[::-1], K.dtype(feats))
    box_wh = box_wh * anchors_tensor / K.cast(input_shape[::-1], K.dtype(feats))
    return box_xy, box_wh, box_confidence, box_class_probs
def yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape):
    box_yx = box_xy[..., ::-1]
    box_hw = box_wh[..., ::-1]
    input_shape = K.cast(input_shape, K.dtype(box_yx))
    image_shape = K.cast(image_shape, K.dtype(box_yx))
    new_shape = K.round(image_shape * K.min(input_shape/image_shape))
    offset = (input_shape-new_shape)/2./input_shape
    scale = input_shape/new_shape
    box_yx = (box_yx - offset) * scale
    box_hw *= scale
    box_mins = box_yx - (box_hw / 2.)
    box_maxes = box_yx + (box_hw / 2.)
    boxes =  K.concatenate([
        box_mins[..., 0:1],
        box_mins[..., 1:2],
        box_maxes[..., 0:1],
        box_maxes[..., 1:2]
    ])
    boxes *= K.concatenate([image_shape, image_shape])
    return boxes
def yolo_boxes_and_scores(feats, anchors, num_classes, input_shape, image_shape):
    box_xy, box_wh, box_confidence, box_class_probs = yolo_head(feats,
        anchors, num_classes, input_shape)
    boxes = yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape)
    boxes = K.reshape(boxes, [-1, 4])
    box_scores = box_confidence * box_class_probs
    box_scores = K.reshape(box_scores, [-1, num_classes])
    return boxes, box_scores
def yolo_eval(yolo_outputs,
              anchors,
              num_classes,
              image_shape,
              max_boxes=20,
              score_threshold=.6,
              iou_threshold=.5):
    anchor_mask = [[6,7,8], [3,4,5], [0,1,2]]
    input_shape = K.shape(yolo_outputs[0])[1:3] * 32
    boxes = []
    box_scores = []
    for l in range(3):
        _boxes, _box_scores = yolo_boxes_and_scores(yolo_outputs[l],
            anchors[anchor_mask[l]], num_classes, input_shape, image_shape)
        boxes.append(_boxes)
        box_scores.append(_box_scores)
    boxes = K.concatenate(boxes, axis=0)
    box_scores = K.concatenate(box_scores, axis=0)
    mask = box_scores >= score_threshold
    max_boxes_tensor = K.constant(max_boxes, dtype='int32')
    boxes_ = []
    scores_ = []
    classes_ = []
    for c in range(num_classes):
        class_boxes = tf.boolean_mask(boxes, mask[:, c])
        class_box_scores = tf.boolean_mask(box_scores[:, c], mask[:, c])
        nms_index = tf.image.non_max_suppression(
            class_boxes, class_box_scores, max_boxes_tensor, iou_threshold=iou_threshold)
        class_boxes = K.gather(class_boxes, nms_index)
        class_box_scores = K.gather(class_box_scores, nms_index)
        classes = K.ones_like(class_box_scores, 'int32') * c
        boxes_.append(class_boxes)
        scores_.append(class_box_scores)
        classes_.append(classes)
    boxes_ = K.concatenate(boxes_, axis=0)
    scores_ = K.concatenate(scores_, axis=0)
    classes_ = K.concatenate(classes_, axis=0)
    return boxes_, scores_, classes_
def preprocess_true_boxes(true_boxes, input_shape, anchors, num_classes):
    anchor_mask = [[6,7,8], [3,4,5], [0,1,2]]
    true_boxes = np.array(true_boxes, dtype='float32')
    input_shape = np.array(input_shape, dtype='int32')
    boxes_xy = (true_boxes[..., 0:2] + true_boxes[..., 2:4]) // 2
    boxes_wh = true_boxes[..., 2:4] - true_boxes[..., 0:2]
    true_boxes[..., 0:2] = boxes_xy/input_shape[::-1]
    true_boxes[..., 2:4] = boxes_wh/input_shape[::-1]
    m = true_boxes.shape[0]
    grid_shapes = [input_shape//{0:32, 1:16, 2:8}[l] for l in range(3)]
    y_true = [np.zeros((m,grid_shapes[l][0],grid_shapes[l][1],len(anchor_mask[l]),5+num_classes),
        dtype='float32') for l in range(3)]
    anchors = np.expand_dims(anchors, 0)
    anchor_maxes = anchors / 2.
    anchor_mins = -anchor_maxes
    valid_mask = boxes_wh[..., 0]>0
    for b in range(m):
        wh = boxes_wh[b, valid_mask[b]]
        wh = np.expand_dims(wh, -2)
        box_maxes = wh / 2.
        box_mins = -box_maxes
        intersect_mins = np.maximum(box_mins, anchor_mins)
        intersect_maxes = np.minimum(box_maxes, anchor_maxes)
        intersect_wh = np.maximum(intersect_maxes - intersect_mins, 0.)
        intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
        box_area = wh[..., 0] * wh[..., 1]
        anchor_area = anchors[..., 0] * anchors[..., 1]
        iou = intersect_area / (box_area + anchor_area - intersect_area)
        best_anchor = np.argmax(iou, axis=-1)
        for t, n in enumerate(best_anchor):
            for l in range(3):
                if n in anchor_mask[l]:
                    i = np.floor(true_boxes[b,t,0]*grid_shapes[l][1]).astype('int32')
                    j = np.floor(true_boxes[b,t,1]*grid_shapes[l][0]).astype('int32')
                    n = anchor_mask[l].index(n)
                    c = true_boxes[b,t, 4].astype('int32')
                    y_true[l][b, j, i, n, 0:4] = true_boxes[b,t, 0:4]
                    y_true[l][b, j, i, n, 4] = 1
                    y_true[l][b, j, i, n, 5+c] = 1
                    break
    return y_true
def box_iou(b1, b2):
    b1 = K.expand_dims(b1, -2)
    b1_xy = b1[..., :2]
    b1_wh = b1[..., 2:4]
    b1_wh_half = b1_wh/2.
    b1_mins = b1_xy - b1_wh_half
    b1_maxes = b1_xy + b1_wh_half
    b2 = K.expand_dims(b2, 0)
    b2_xy = b2[..., :2]
    b2_wh = b2[..., 2:4]
    b2_wh_half = b2_wh/2.
    b2_mins = b2_xy - b2_wh_half
    b2_maxes = b2_xy + b2_wh_half
    intersect_mins = K.maximum(b1_mins, b2_mins)
    intersect_maxes = K.minimum(b1_maxes, b2_maxes)
    intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.)
    intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1]
    b1_area = b1_wh[..., 0] * b1_wh[..., 1]
    b2_area = b2_wh[..., 0] * b2_wh[..., 1]
    iou = intersect_area / (b1_area + b2_area - intersect_area)
    return iou
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5):
    yolo_outputs = args[:3]
    y_true = args[3:]
    anchor_mask = [[6,7,8], [3,4,5], [0,1,2]]
    input_shape = K.cast(K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0]))
    grid_shapes = [K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(3)]
    loss = 0
    m = K.shape(yolo_outputs[0])[0]
    for l in range(3):
        object_mask = y_true[l][..., 4:5]
        true_class_probs = y_true[l][..., 5:]
        pred_xy, pred_wh, pred_confidence, pred_class_probs = yolo_head(yolo_outputs[l],
             anchors[anchor_mask[l]], num_classes, input_shape)
        pred_box = K.concatenate([pred_xy, pred_wh])
        xy_delta = (y_true[l][..., :2]-pred_xy)*grid_shapes[l][::-1]
        wh_delta = K.log(y_true[l][..., 2:4]) - K.log(pred_wh)
        wh_delta = K.switch(object_mask, wh_delta, K.zeros_like(wh_delta))
        box_delta = K.concatenate([xy_delta, wh_delta], axis=-1)
        box_delta_scale = 2 - y_true[l][...,2:3]*y_true[l][...,3:4]
        ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True)
        object_mask_bool = K.cast(object_mask, 'bool')
        def loop_body(b, ignore_mask):
            true_box = tf.boolean_mask(y_true[l][b,...,0:4], object_mask_bool[b,...,0])
            iou = box_iou(pred_box[b], true_box)
            best_iou = K.max(iou, axis=-1)
            ignore_mask = ignore_mask.write(b, K.cast(best_iou<ignore_thresh, K.dtype(true_box)))
            return b+1, ignore_mask
        _, ignore_mask = K.control_flow_ops.while_loop(lambda b,*args: b<m, loop_body, [0, ignore_mask])
        ignore_mask = ignore_mask.stack()
        ignore_mask = K.expand_dims(ignore_mask, -1)
        box_loss = object_mask * K.square(box_delta*box_delta_scale)
        confidence_loss = object_mask * K.square(1-pred_confidence) + \
            (1-object_mask) * K.square(0-pred_confidence) * ignore_mask
        class_loss = object_mask * K.square(true_class_probs-pred_class_probs)
        loss += K.sum(box_loss) + K.sum(confidence_loss) + K.sum(class_loss)
    return loss / K.cast(m, K.dtype(loss))
lib/yolo3/model.pyc
Binary files differ
lib/yolo3/utils.py
New file
@@ -0,0 +1,20 @@
from functools import reduce
from PIL import Image
def compose(*funcs):
    if funcs:
        return reduce(lambda f, g: lambda *a, **kw: g(f(*a, **kw)), funcs)
    else:
        raise ValueError('Composition of empty sequence not supported.')
def letterbox_image(image, size):
    image_w, image_h = image.size
    w, h = size
    new_w = int(image_w * min(w*1.0/image_w, h*1.0/image_h))
    new_h = int(image_h * min(w*1.0/image_w, h*1.0/image_h))
    resized_image = image.resize((new_w,new_h), Image.BICUBIC)
    boxed_image = Image.new('RGB', size, (128,128,128))
    boxed_image.paste(resized_image, ((w-new_w)//2,(h-new_h)//2))
    return boxed_image
lib/yolo3/utils.pyc
Binary files differ
model_dump/coco_classes.txt
New file
@@ -0,0 +1,80 @@
person
bicycle
car
motorbike
aeroplane
bus
train
truck
boat
traffic light
fire hydrant
stop sign
parking meter
bench
bird
cat
dog
horse
sheep
cow
elephant
bear
zebra
giraffe
backpack
umbrella
handbag
tie
suitcase
frisbee
skis
snowboard
sports ball
kite
baseball bat
baseball glove
skateboard
surfboard
tennis racket
bottle
wine glass
cup
fork
knife
spoon
bowl
banana
apple
sandwich
orange
broccoli
carrot
hot dog
pizza
donut
cake
chair
sofa
pottedplant
bed
diningtable
toilet
tvmonitor
laptop
mouse
remote
keyboard
cell phone
microwave
oven
toaster
sink
refrigerator
book
clock
vase
scissors
teddy bear
hair drier
toothbrush
model_dump/voc_classes.txt
New file
@@ -0,0 +1,20 @@
aeroplane
bicycle
bird
boat
bottle
bus
car
cat
chair
cow
diningtable
dog
horse
motorbike
person
pottedplant
sheep
sofa
train
tvmonitor
model_dump/yolo_anchors.txt
New file
@@ -0,0 +1 @@
10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326