mikel-brostrom/yolov8_tracking: Real-time multi-object tracking and segmentation using YOLOv8 with DeepOCSORT and OSNet (github.com)https://github.com/mikel-brostrom/yolov8_tracking
1.到上面的github网站下载跟踪代码,该代码使用YOLOv8作为检测网络,不过YOLOv8的文件需要去下面的链接下载,然后替换掉跟踪代码中YOLOv8的空文件。
GitHub - ultralytics/ultralytics at 15b3b0365ab2f12993a58985f3cb7f2137409a0cNEW - YOLOv8 🚀 in PyTorch > ONNX > CoreML > TFLite - GitHub - ultralytics/ultralytics at 15b3b0365ab2f12993a58985f3cb7f2137409a0chttps://github.com/ultralytics/ultralytics/tree/15b3b0365ab2f12993a58985f3cb7f2137409a0c2.该跟踪网络可以选择自行选择BoT-sort、Bytetrack、Deepocsort、Ocsort和Strongsort作为跟踪网络。我自己使用下来感觉Deepocsort和Bytetrack效果最好。
3.里面还有一个重识别网络也叫Reid网络,是用来匹配轨迹和检测框中国物体的外观特征的。里面的权重模型在下面的链接进行下载。其中market1501是清华大学的行人数据集,我用了之后发现效果不好(可能是数据集太老的原因)。msmt17数据集效果很不错,推荐下该数据集的预训练模型。模型越大,特征提取效果越好,一般使用osnet也够用,如果想要效果更好就推荐Resnet50,或者自己用更好的图像分类网络在msmt17上预训练。Model Zoo — torchreid 1.4.0 documentationhttps://kaiyangzhou.github.io/deep-person-reid/MODEL_ZOO
其实github中有给出使用的方法,我在这里就不多说了,主要讲一下需要注意的地方。
1.安装需要的包
pip install -r requirements.txt
2.需要安装一个单独的包,不然后面会报错
pip install lap
3.将download.py文件第156行的verify改为False
不然代码会自动访问google的库下载权重文件
4.修改yolov8_tracking-master/trackers/strongsort/deep/models
/osnet.py中第483行的权重文件地址。
要先把reid网络的权重文件下载下来,修改成对应地址
5.将MOT数据集中的图片合成为视频,再进行检测
因为MOT数据集中全是一帧一帧的图片,所以我们先要将它整合成视频。代码为
import osimport cv2video_writer = cv2.VideoWriter("result.mp4", cv2.VideoWriter_fourcc(*"mp4v"), 30, (1920, 1080))images_path = "..."images_list = os.listdir(images_path)images_list.sort()for image_name in images_list: image = cv2.imread(os.path.join(images_path, image_name)) video_writer.write(image) show = cv2.resize(image, (1280, 720)) cv2.imshow("test", show) if cv2.waitKey(10) != ord('q'): pass
6.最后在终端输入命令就可以运行起来了
7.Deepocsort代码讲解
因为我一开始完全看不懂代码,自己摸索真的很累,所以我想能够帮助需要的人来快速理解代码,重要部分我都已经打上注释,还有不理解的地方可以在评论区留言。
""" This script is adopted from the SORT script by Alex Bewley alex@bewley.ai"""from __future__ import print_functionimport pdbimport pickleimport cv2import torchimport torchvisionimport numpy as npfrom .association import *from .embedding import EmbeddingComputerfrom .cmc import CMCComputerfrom reid_multibackend import ReIDDetectMultiBackenddef k_previous_obs(observations, cur_age, k): if len(observations) == 0: # 若轨迹的观测数为0 return [-1, -1, -1, -1, -1] for i in range(k): dt = k - i if cur_age - dt in observations: return observations[cur_age - dt] max_age = max(observations.keys()) return observations[max_age]def convert_bbox_to_z(bbox): """ Takes a bounding box in the form [x1,y1,x2,y2] and returns z in the form [x,y,s,r] where x,y is the centre of the box and s is the scale/area and r is the aspect ratio """ w = bbox[2] - bbox[0] h = bbox[3] - bbox[1] x = bbox[0] + w / 2.0 y = bbox[1] + h / 2.0 s = w * h # scale is just area r = w / float(h + 1e-6) return np.array([x, y, s, r]).reshape((4, 1))def convert_bbox_to_z_new(bbox): w = bbox[2] - bbox[0] h = bbox[3] - bbox[1] x = bbox[0] + w / 2.0 y = bbox[1] + h / 2.0 return np.array([x, y, w, h]).reshape((4, 1))def convert_x_to_bbox_new(x): x, y, w, h = x.reshape(-1)[:4] return np.array([x - w / 2, y - h / 2, x + w / 2, y + h / 2]).reshape(1, 4)def convert_x_to_bbox(x, score=None): """ Takes a bounding box in the centre form [x,y,s,r] and returns it in the form [x1,y1,x2,y2] where x1,y1 is the top left and x2,y2 is the bottom right """ w = np.sqrt(x[2] * x[3]) h = x[2] / w if score == None: return np.array([x[0] - w / 2.0, x[1] - h / 2.0, x[0] + w / 2.0, x[1] + h / 2.0]).reshape((1, 4)) else: return np.array([x[0] - w / 2.0, x[1] - h / 2.0, x[0] + w / 2.0, x[1] + h / 2.0, score]).reshape((1, 5))def speed_direction(bbox1, bbox2): cx1, cy1 = (bbox1[0] + bbox1[2]) / 2.0, (bbox1[1] + bbox1[3]) / 2.0 cx2, cy2 = (bbox2[0] + bbox2[2]) / 2.0, (bbox2[1] + bbox2[3]) / 2.0 speed = np.array([cy2 - cy1, cx2 - cx1]) norm = np.sqrt((cy2 - cy1) ** 2 + (cx2 - cx1) ** 2) + 1e-6 return speed / normdef new_kf_process_noise(w, h, p=1 / 20, v=1 / 160): Q = np.diag( ((p * w) ** 2, (p * h) ** 2, (p * w) ** 2, (p * h) ** 2, (v * w) ** 2, (v * h) ** 2, (v * w) ** 2, (v * h) ** 2) ) return Qdef new_kf_measurement_noise(w, h, m=1 / 20): w_var = (m * w) ** 2 h_var = (m * h) ** 2 R = np.diag((w_var, h_var, w_var, h_var)) return Rclass KalmanBoxTracker(object): """ This class represents the internal state of individual tracked objects observed as bbox. """ count = 0 def __init__(self, bbox, cls, delta_t=3, orig=False, emb=None, alpha=0, new_kf=False): """ Initialises a tracker using initial bounding box. """ # define constant velocity model if not orig: from .kalmanfilter import KalmanFilterNew as KalmanFilter else: from filterpy.kalman import KalmanFilter self.cls = cls self.conf = bbox[-1] self.new_kf = new_kf if new_kf: self.kf = KalmanFilter(dim_x=8, dim_z=4) self.kf.F = np.array( [ # x y w h x' y' w' h' [1, 0, 0, 0, 1, 0, 0, 0], [0, 1, 0, 0, 0, 1, 0, 0], [0, 0, 1, 0, 0, 0, 1, 0], [0, 0, 0, 1, 0, 0, 0, 1], [0, 0, 0, 0, 1, 0, 0, 0], [0, 0, 0, 0, 0, 1, 0, 0], [0, 0, 0, 0, 0, 0, 1, 0], [0, 0, 0, 0, 0, 0, 0, 1], ] ) self.kf.H = np.array( [ [1, 0, 0, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0, 0, 0], [0, 0, 0, 1, 0, 0, 0, 0], ] ) _, _, w, h = convert_bbox_to_z_new(bbox).reshape(-1) self.kf.P = new_kf_process_noise(w, h) self.kf.P[:4, :4] *= 4 self.kf.P[4:, 4:] *= 100 # Process and measurement uncertainty happen in functions self.bbox_to_z_func = convert_bbox_to_z_new self.x_to_bbox_func = convert_x_to_bbox_new else: self.kf = KalmanFilter(dim_x=7, dim_z=4) self.kf.F = np.array( [ # x y s r x' y' s' [1, 0, 0, 0, 1, 0, 0], [0, 1, 0, 0, 0, 1, 0], [0, 0, 1, 0, 0, 0, 1], [0, 0, 0, 1, 0, 0, 0], [0, 0, 0, 0, 1, 0, 0], [0, 0, 0, 0, 0, 1, 0], [0, 0, 0, 0, 0, 0, 1], ] ) self.kf.H = np.array( [ [1, 0, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0, 0], [0, 0, 0, 1, 0, 0, 0], ] ) self.kf.R[2:, 2:] *= 10.0 self.kf.P[4:, 4:] *= 1000.0 # give high uncertainty to the unobservable initial velocities self.kf.P *= 10.0 self.kf.Q[-1, -1] *= 0.01 self.kf.Q[4:, 4:] *= 0.01 self.bbox_to_z_func = convert_bbox_to_z self.x_to_bbox_func = convert_x_to_bbox self.kf.x[:4] = self.bbox_to_z_func(bbox) self.time_since_update = 0 # 每有一个卡尔曼轨迹id就加一 self.id = KalmanBoxTracker.count KalmanBoxTracker.count += 1 self.history = [] self.hits = 0 self.hit_streak = 0 self.age = 0 """ NOTE: [-1,-1,-1,-1,-1] is a compromising placeholder for non-observation status, the same for the return of function k_previous_obs. It is ugly and I do not like it. But to support generate observation array in a fast and unified way, which you would see below k_observations = np.array([k_previous_obs(...]]), let's bear it for now. """ # Used for OCR self.last_observation = np.array([-1, -1, -1, -1, -1]) # placeholder # Used to output track after min_hits reached self.history_observations = [] # Used for velocity self.observations = dict() self.velocity = None self.delta_t = delta_t self.emb = emb self.frozen = False def update(self, bbox, cls): """ Updates the state vector with observed bbox. """ if bbox is not None: self.frozen = False self.cls = cls if self.last_observation.sum() >= 0: # no previous observation previous_box = None for dt in range(self.delta_t, 0, -1): if self.age - dt in self.observations: previous_box = self.observations[self.age - dt] break if previous_box is None: previous_box = self.last_observation """ Estimate the track speed direction with observations \Delta t steps away """ self.velocity = speed_direction(previous_box, bbox) """ Insert new observations. This is a ugly way to maintain both self.observations and self.history_observations. Bear it for the moment. """ self.last_observation = bbox self.observations[self.age] = bbox self.history_observations.append(bbox) self.time_since_update = 0 self.history = [] self.hits += 1 self.hit_streak += 1 if self.new_kf: R = new_kf_measurement_noise(self.kf.x[2, 0], self.kf.x[3, 0]) self.kf.update(self.bbox_to_z_func(bbox), R=R) else: self.kf.update(self.bbox_to_z_func(bbox)) else: self.kf.update(bbox) self.frozen = True def update_emb(self, emb, alpha=0.9): self.emb = alpha * self.emb + (1 - alpha) * emb self.emb /= np.linalg.norm(self.emb) def get_emb(self): return self.emb.cpu() def apply_affine_correction(self, affine): m = affine[:, :2] t = affine[:, 2].reshape(2, 1) # For OCR if self.last_observation.sum() > 0: ps = self.last_observation[:4].reshape(2, 2).T ps = m @ ps + t self.last_observation[:4] = ps.T.reshape(-1) # Apply to each box in the range of velocity computation for dt in range(self.delta_t, -1, -1): if self.age - dt in self.observations: ps = self.observations[self.age - dt][:4].reshape(2, 2).T ps = m @ ps + t self.observations[self.age - dt][:4] = ps.T.reshape(-1) # Also need to change kf state, but might be frozen self.kf.apply_affine_correction(m, t, self.new_kf) def predict(self): """ Advances the state vector and returns the predicted bounding box estimate. """ # Don't allow negative bounding boxes if self.new_kf: if self.kf.x[2] + self.kf.x[6] <= 0: self.kf.x[6] = 0 if self.kf.x[3] + self.kf.x[7] <= 0: self.kf.x[7] = 0 # Stop velocity, will update in kf during OOS if self.frozen: self.kf.x[6] = self.kf.x[7] = 0 Q = new_kf_process_noise(self.kf.x[2, 0], self.kf.x[3, 0]) else: if (self.kf.x[6] + self.kf.x[2]) <= 0: self.kf.x[6] *= 0.0 Q = None self.kf.predict(Q=Q) self.age += 1 if self.time_since_update > 0: self.hit_streak = 0 self.time_since_update += 1 self.history.append(self.x_to_bbox_func(self.kf.x)) return self.history[-1] def get_state(self): """ Returns the current bounding box estimate. """ return self.x_to_bbox_func(self.kf.x) def mahalanobis(self, bbox): """Should be run after a predict() call for accuracy.""" return self.kf.md_for_measurement(self.bbox_to_z_func(bbox))""" We support multiple ways for association cost calculation, by default we use IoU. GIoU may have better performance in some situations. We note that we hardly normalize the cost by all methods to (0,1) which may not be the best practice."""ASSO_FUNCS = { "iou": iou_batch, "giou": giou_batch, "ciou": ciou_batch, "diou": diou_batch, "ct_dist": ct_dist,}class OCSort(object): def __init__( self, # reid模型的权重文件 model_weights, device, # 是否需要半精度 fp16, # 检测框的置信度阈值 det_thresh, # 轨迹的最大生命,轨迹每预测一次, # trk.time_since_update就加一,知道大于max_age后被删除 max_age=30, # 轨道的最小连击值,卡尔曼滤波每更新一次,hit_streak就加一 min_hits=3, # 检测框和轨迹之间IOU的最小阈值 iou_threshold=0.3, delta_t=3, # 检测框和轨迹之间计算IOU的方式 asso_func="iou", # vdc_weight权重 inertia=0.2, w_association_emb=0.75, alpha_fixed_emb=0.95, aw_param=0.5, embedding_off=False, cmc_off=False, aw_off=False, new_kf_off=True, **kwargs ): """ Sets key parameters for SORT """ self.max_age = max_age self.min_hits = min_hits self.iou_threshold = iou_threshold self.trackers = [] self.frame_count = 0 # 检测框的置信度阈值 self.det_thresh = det_thresh self.delta_t = delta_t self.asso_func = ASSO_FUNCS[asso_func] self.inertia = inertia self.w_association_emb = w_association_emb self.alpha_fixed_emb = alpha_fixed_emb self.aw_param = aw_param # 初始化id为0 KalmanBoxTracker.count = 0 # embedder为reid提取出来的特征 self.embedder = ReIDDetectMultiBackend(weights=model_weights, device=device, fp16=fp16) # CMC为累积匹配特性,用于评价reid的 self.cmc = CMCComputer() self.embedding_off = embedding_off self.cmc_off = cmc_off self.aw_off = aw_off self.new_kf_off = new_kf_off self.s = 0 def update(self, dets, img_numpy, tag='blub'): """ Params: dets - a numpy array of detections in the format [[x1,y1,x2,y2,score],[x1,y1,x2,y2,score],...] Requires: this method must be called once for each frame even with empty detections (use np.empty((0, 5)) for frames without detections). Returns the a similar array, where the last column is the object ID. NOTE: The number of objects returned may differ from the number of detections provided. """ xyxys = dets[:, 0:4] scores = dets[:, 4] clss = dets[:, 5] classes = clss.numpy() xyxys = xyxys.numpy() scores = scores.numpy() dets = dets[:, 0:6].numpy() # 当检测框的置信度大于阈值时 remain_inds = scores > self.det_thresh # 检测框dets为大于阈值的检测框 dets = dets[remain_inds] self.height, self.width = img_numpy.shape[:2] # Rescale #scale = min(img_tensor.shape[2] / img_numpy.shape[0], img_tensor.shape[3] / img_numpy.shape[1]) #dets[:, :4] /= scale # Embedding if self.embedding_off or dets.shape[0] == 0: # 如果不提取特征或没有超过阈值的检测框 dets_embs = np.ones((dets.shape[0], 1)) else: # (Ndets x X) [512, 1024, 2048] #dets_embs = self.embedder.compute_embedding(img_numpy, dets[:, :4], tag) # 输入为检测出来的框和原图的numpy # 输出为检测框对应原图的特征 dets_embs = self._get_features(dets[:, :4], img_numpy) # CMC # CMC用来评估匹配成功的概率 if not self.cmc_off: transform = self.cmc.compute_affine(img_numpy, dets[:, :4], tag) for trk in self.trackers: trk.apply_affine_correction(transform) # (检测框的score-检测框置信度阈值)/(1-检测框置信度阈值) trust = (dets[:, 4] - self.det_thresh) / (1 - self.det_thresh) af = self.alpha_fixed_emb # From [self.alpha_fixed_emb, 1], goes to 1 as detector is less confident dets_alpha = af + (1 - af) * (1 - trust) # get predicted locations from existing trackers. # 从现存的轨迹预测位置 # 生成一个trackers行,5列的数组 trks = np.zeros((len(self.trackers), 5)) trk_embs = [] to_del = [] ret = [] for t, trk in enumerate(trks): # 用轨迹去预测位置 pos = self.trackers[t].predict()[0] # 用预测的位置放入生成的轨迹 trk[:] = [pos[0], pos[1], pos[2], pos[3], 0] # np.any()对矩阵所有元素做或运算,存在True则返回True # np.isnan(x)函数可以判断x是否为空值,然后输出布尔类型的变量 if np.any(np.isnan(pos)): # 若预测的位置不存在,则添加到to_del中 to_del.append(t) else: # 存在则将该轨迹的特征添加到轨迹特征列表中 trk_embs.append(self.trackers[t].get_emb()) trks = np.ma.compress_rows(np.ma.masked_invalid(trks)) if len(trk_embs) > 0: # np.vstack():在竖直方向上堆叠 # np.hstack():在水平方向上平铺 # np.vstack()输入为元组,返回堆叠后的新数组 trk_embs = np.vstack(trk_embs) else: trk_embs = np.array(trk_embs) for t in reversed(to_del): # 从轨迹中删除预测的位置为空的轨迹 self.trackers.pop(t) # 如果轨迹的速度不是None,velocities为轨迹的速度 velocities = np.array([trk.velocity if trk.velocity is not None else np.array((0, 0)) for trk in self.trackers]) # 最新的框为轨迹的last_observation last_boxes = np.array([trk.last_observation for trk in self.trackers]) k_observations = np.array([k_previous_obs(trk.observations, trk.age, self.delta_t) for trk in self.trackers]) """ First round of association """ # (M detections X N tracks, final score) if self.embedding_off or dets.shape[0] == 0 or trk_embs.shape[0] == 0: stage1_emb_cost = None else: # @表示矩阵乘法 # 检测框的特征乘轨迹的特征得到代价矩阵 stage1_emb_cost = dets_embs @ trk_embs.T # 第一次匹配 matched, unmatched_dets, unmatched_trks = associate( dets, trks, self.iou_threshold, velocities, k_observations, self.inertia, stage1_emb_cost, self.w_association_emb, self.aw_off, self.aw_param, ) # m0是检测框,m1是轨迹 for m in matched: # 更新该轨迹的检测框 self.trackers[m[1]].update(dets[m[0], :5], dets[m[0], 5]) # 更新该轨迹的人物特征 self.trackers[m[1]].update_emb(dets_embs[m[0]], alpha=dets_alpha[m[0]]) """ Second round of associaton by OCR """ # 第二次匹配 # 如果还有剩下的检测框和轨迹没有匹配的,进行第二次匹配 if unmatched_dets.shape[0] > 0 and unmatched_trks.shape[0] > 0: left_dets = dets[unmatched_dets] left_dets_embs = dets_embs[unmatched_dets] left_trks = last_boxes[unmatched_trks] left_trks_embs = trk_embs[unmatched_trks] # print(dets.shape) # (9, 6) # print(left_dets.shape) # (1, 6) # print(left_trks.shape) # (1, 5) iou_left = self.asso_func(left_dets, left_trks) # TODO: is better without this emb_cost_left = left_dets_embs @ left_trks_embs.T if self.embedding_off: emb_cost_left = np.zeros_like(emb_cost_left) iou_left = np.array(iou_left) # 如果留下的检测框和轨迹的iou大于阈值 if iou_left.max() > self.iou_threshold: """ NOTE: by using a lower threshold, e.g., self.iou_threshold - 0.1, you may get a higher performance especially on MOT17/MOT20 datasets. But we keep it uniform here for simplicity """ rematched_indices = linear_assignment(-iou_left) to_remove_det_indices = [] to_remove_trk_indices = [] for m in rematched_indices: det_ind, trk_ind = unmatched_dets[m[0]], unmatched_trks[m[1]] if iou_left[m[0], m[1]] < self.iou_threshold: continue # 在之前没有匹配成功的轨迹上添加再匹配成功的检测框 self.trackers[trk_ind].update(dets[det_ind, :5], dets[det_ind, 5]) self.trackers[trk_ind].update_emb(dets_embs[det_ind], alpha=dets_alpha[det_ind]) to_remove_det_indices.append(det_ind) to_remove_trk_indices.append(trk_ind) # 从匹配失败的检测框和轨迹列表中删除再次匹配成功的 unmatched_dets = np.setdiff1d(unmatched_dets, np.array(to_remove_det_indices)) unmatched_trks = np.setdiff1d(unmatched_trks, np.array(to_remove_trk_indices)) """ third round of associaton by OCR """ # 遍历匹配失败的轨迹,将他们置0 for m in unmatched_trks: # self.s = self.s + 1 # print("匹配失败的轨迹为" + self.s) # self.s = self.s + 1 # if self.s > 150: self.trackers[m].update(None, None) # create and initialise new trackers for unmatched detections # 遍历匹配失败的检测框,生成新的轨迹 for i in unmatched_dets: trk = KalmanBoxTracker( dets[i, :5], dets[i, 5], delta_t=self.delta_t, emb=dets_embs[i], alpha=dets_alpha[i], new_kf=not self.new_kf_off ) self.trackers.append(trk) i = len(self.trackers) # 遍历轨迹的逆序序列 for trk in reversed(self.trackers): if trk.last_observation.sum() < 0: d = trk.get_state()[0] else: """ this is optional to use the recent observation or the kalman filter prediction, we didn't notice significant difference here """ d = trk.last_observation[:4] if (trk.time_since_update < 1) and (trk.hit_streak >= self.min_hits or self.frame_count <= self.min_hits): # +1 as MOT benchmark requires positive ret.append(np.concatenate((d, [trk.id + 1], [trk.cls], [trk.conf])).reshape(1, -1)) i -= 1 # remove dead tracklet # 如果轨迹的生命大于阈值则删除 if trk.time_since_update > self.max_age: self.trackers.pop(i) if len(ret) > 0: return np.concatenate(ret) return np.empty((0, 5)) def _xywh_to_xyxy(self, bbox_xywh): x, y, w, h = bbox_xywh x1 = max(int(x - w / 2), 0) x2 = min(int(x + w / 2), self.width - 1) y1 = max(int(y - h / 2), 0) y2 = min(int(y + h / 2), self.height - 1) return x1, y1, x2, y2 def _get_features(self, bbox_xywh, ori_img): im_crops = [] for box in bbox_xywh: x1, y1, x2, y2 = self._xywh_to_xyxy(box) im = ori_img[y1:y2, x1:x2] im_crops.append(im) if im_crops: features = self.embedder(im_crops).cpu() else: features = np.array([]) return features def update_public(self, dets, cates, scores): self.frame_count += 1 det_scores = np.ones((dets.shape[0], 1)) dets = np.concatenate((dets, det_scores), axis=1) remain_inds = scores > self.det_thresh cates = cates[remain_inds] dets = dets[remain_inds] trks = np.zeros((len(self.trackers), 5)) to_del = [] ret = [] for t, trk in enumerate(trks): pos = self.trackers[t].predict()[0] cat = self.trackers[t].cate trk[:] = [pos[0], pos[1], pos[2], pos[3], cat] if np.any(np.isnan(pos)): to_del.append(t) trks = np.ma.compress_rows(np.ma.masked_invalid(trks)) for t in reversed(to_del): self.trackers.pop(t) velocities = np.array([trk.velocity if trk.velocity is not None else np.array((0, 0)) for trk in self.trackers]) last_boxes = np.array([trk.last_observation for trk in self.trackers]) k_observations = np.array([k_previous_obs(trk.observations, trk.age, self.delta_t) for trk in self.trackers]) matched, unmatched_dets, unmatched_trks = associate_kitti( dets, trks, cates, self.iou_threshold, velocities, k_observations, self.inertia, ) for m in matched: self.trackers[m[1]].update(dets[m[0], :]) if unmatched_dets.shape[0] > 0 and unmatched_trks.shape[0] > 0: """ The re-association stage by OCR. NOTE: at this stage, adding other strategy might be able to continue improve the performance, such as BYTE association by ByteTrack. """ left_dets = dets[unmatched_dets] left_trks = last_boxes[unmatched_trks] left_dets_c = left_dets.copy() left_trks_c = left_trks.copy() iou_left = self.asso_func(left_dets_c, left_trks_c) iou_left = np.array(iou_left) det_cates_left = cates[unmatched_dets] trk_cates_left = trks[unmatched_trks][:, 4] num_dets = unmatched_dets.shape[0] num_trks = unmatched_trks.shape[0] cate_matrix = np.zeros((num_dets, num_trks)) for i in range(num_dets): for j in range(num_trks): if det_cates_left[i] != trk_cates_left[j]: """ For some datasets, such as KITTI, there are different categories, we have to avoid associate them together. """ cate_matrix[i][j] = -1e6 iou_left = iou_left + cate_matrix if iou_left.max() > self.iou_threshold - 0.1: rematched_indices = linear_assignment(-iou_left) to_remove_det_indices = [] to_remove_trk_indices = [] for m in rematched_indices: det_ind, trk_ind = unmatched_dets[m[0]], unmatched_trks[m[1]] if iou_left[m[0], m[1]] < self.iou_threshold - 0.1: continue self.trackers[trk_ind].update(dets[det_ind, :]) to_remove_det_indices.append(det_ind) to_remove_trk_indices.append(trk_ind) unmatched_dets = np.setdiff1d(unmatched_dets, np.array(to_remove_det_indices)) unmatched_trks = np.setdiff1d(unmatched_trks, np.array(to_remove_trk_indices)) for i in unmatched_dets: trk = KalmanBoxTracker(dets[i, :]) trk.cate = cates[i] self.trackers.append(trk) i = len(self.trackers) for trk in reversed(self.trackers): if trk.last_observation.sum() > 0: d = trk.last_observation[:4] else: d = trk.get_state()[0] if trk.time_since_update < 1: if (self.frame_count <= self.min_hits) or (trk.hit_streak >= self.min_hits): # id+1 as MOT benchmark requires positive ret.append(np.concatenate((d, [trk.id + 1], [trk.cls], [trk.conf])).reshape(1, -1)) if trk.hit_streak == self.min_hits: # Head Padding (HP): recover the lost steps during initializing the track for prev_i in range(self.min_hits - 1): prev_observation = trk.history_observations[-(prev_i + 2)] ret.append(( np.concatenate( ( prev_observation[:4], [trk.id + 1], [trk.cls], [trk.conf], ) )).reshape(1, -1) ) i -= 1 if trk.time_since_update > self.max_age: self.trackers.pop(i) if len(ret) > 0: return np.concatenate(ret) return np.empty((0, 7)) def dump_cache(self): self.cmc.dump_cache() self.embedder.dump_cache()
来源地址:https://blog.csdn.net/Orange_sparkle/article/details/129509459