From cfdedf805b6a5fb251dd528290252b6260b9b487 Mon Sep 17 00:00:00 2001 From: Ryan Lee Date: Wed, 6 Mar 2024 01:09:51 -0500 Subject: [PATCH 01/17] Starting point from previous PR --- tools/eval/README.md | 29 +++ tools/eval/datasets/__init__.py | 2 + tools/eval/datasets/otb.py | 378 ++++++++++++++++++++++++++++++++ tools/eval/eval.py | 9 + 4 files changed, 418 insertions(+) create mode 100644 tools/eval/datasets/otb.py diff --git a/tools/eval/README.md b/tools/eval/README.md index 1453d8e8..1b216cd8 100644 --- a/tools/eval/README.md +++ b/tools/eval/README.md @@ -22,6 +22,7 @@ Supported datasets: - [ICDAR](#icdar2003) - [IIIT5K](#iiit5k) - [Mini Supervisely](#mini-supervisely) +- [OTB](#otb) ## ImageNet @@ -211,4 +212,32 @@ Run evaluation on quantized model with the following command : ```shell python eval.py -m pphumanseg_q -d mini_supervisely -dr /path/to/pphumanseg +``` + +## OTB + +### Prepare data + +Please visit [here](https://drive.google.com/drive/folders/1DZvtlnG9U94cgLD6Yi3eU7r6QZJkjdl-?usp=sharing) to download the OTB dataset and the json file. Organize files as follow: + +```shell +$ tree -L 2 /path/to/imagenet +. +├── Basketball +│   ├── groundtruth_rect.txt +│   ├── img +│   │ └── XXXX.jpg +├── Biker +│   ├── groundtruth_rect.txt +│   ├── img +│   │ └── XXXX.jpg +└── OTB.json +``` + +### Evaluation + +Run evaluation with the following command: + +```shell +python eval.py -m dasiamrpn -d otb -dr /path/to/otb ``` \ No newline at end of file diff --git a/tools/eval/datasets/__init__.py b/tools/eval/datasets/__init__.py index 5ed59faa..bb1a5375 100644 --- a/tools/eval/datasets/__init__.py +++ b/tools/eval/datasets/__init__.py @@ -4,6 +4,7 @@ from .icdar import ICDAR from .iiit5k import IIIT5K from .minisupervisely import MiniSupervisely +from .otb import OTB class Registery: def __init__(self, name): @@ -23,3 +24,4 @@ def register(self, item): DATASETS.register(ICDAR) DATASETS.register(IIIT5K) DATASETS.register(MiniSupervisely) +DATASETS.register(OTB) diff --git a/tools/eval/datasets/otb.py b/tools/eval/datasets/otb.py new file mode 100644 index 00000000..7ebe6c5f --- /dev/null +++ b/tools/eval/datasets/otb.py @@ -0,0 +1,378 @@ +import os +import json +import numpy as np +import cv2 as cv +from colorama import Style, Fore +from tqdm import tqdm +from multiprocessing import Pool + +def overlap_ratio(rect1, rect2): + '''Compute overlap ratio between two rects + Args + rect:2d array of N x [x,y,w,h] + Return: + iou + ''' + left = np.maximum(rect1[:,0], rect2[:,0]) + right = np.minimum(rect1[:,0]+rect1[:,2], rect2[:,0]+rect2[:,2]) + top = np.maximum(rect1[:,1], rect2[:,1]) + bottom = np.minimum(rect1[:,1]+rect1[:,3], rect2[:,1]+rect2[:,3]) + + intersect = np.maximum(0,right - left) * np.maximum(0,bottom - top) + union = rect1[:,2]*rect1[:,3] + rect2[:,2]*rect2[:,3] - intersect + iou = intersect / union + iou = np.maximum(np.minimum(1, iou), 0) + return iou +def success_overlap(gt_bb, result_bb, n_frame): + thresholds_overlap = np.arange(0, 1.05, 0.05) + success = np.zeros(len(thresholds_overlap)) + iou = np.ones(len(gt_bb)) * (-1) + mask = np.sum(gt_bb[:, 2:] > 0, axis=1) == 2 + iou[mask] = overlap_ratio(gt_bb[mask], result_bb[mask]) + for i in range(len(thresholds_overlap)): + success[i] = np.sum(iou > thresholds_overlap[i]) / float(n_frame) + return success + +def success_error(gt_center, result_center, thresholds, n_frame): + success = np.zeros(len(thresholds)) + dist = np.ones(len(gt_center)) * (-1) + mask = np.sum(gt_center > 0, axis=1) == 2 + dist[mask] = np.sqrt(np.sum( + np.power(gt_center[mask] - result_center[mask], 2), axis=1)) + for i in range(len(thresholds)): + success[i] = np.sum(dist <= thresholds[i]) / float(n_frame) + return success + +class OPEBenchmark: + def __init__(self, dataset): + self.dataset = dataset + + def convert_bb_to_center(self, bboxes): + return np.array([(bboxes[:, 0] + (bboxes[:, 2] - 1) / 2), + (bboxes[:, 1] + (bboxes[:, 3] - 1) / 2)]).T + + def convert_bb_to_norm_center(self, bboxes, gt_wh): + return self.convert_bb_to_center(bboxes) / (gt_wh+1e-16) + + def eval_success(self,tracker): + success_ret = {} + success_ret_ = {} + for video in self.dataset: + gt_traj = np.array(video.gt_traj) + tracker_traj = video.load_tracker() + tracker_traj = np.array(tracker_traj) + n_frame = len(gt_traj) + if hasattr(video, 'absent'): + gt_traj = gt_traj[video.absent == 1] + tracker_traj = tracker_traj[video.absent == 1] + success_ret_[video.name] = success_overlap(gt_traj, tracker_traj, n_frame) + success_ret["tracker"] = success_ret_ + return success_ret + + def eval_precision(self,tracker): + precision_ret = {} + precision_ret_ = {} + for video in self.dataset: + gt_traj = np.array(video.gt_traj) + tracker_traj = video.load_tracker() + tracker_traj = np.array(tracker_traj) + n_frame = len(gt_traj) + if hasattr(video, 'absent'): + gt_traj = gt_traj[video.absent == 1] + tracker_traj = tracker_traj[video.absent == 1] + gt_center = self.convert_bb_to_center(gt_traj) + tracker_center = self.convert_bb_to_center(tracker_traj) + thresholds = np.arange(0, 51, 1) + precision_ret_[video.name] = success_error(gt_center, tracker_center, + thresholds, n_frame) + precision_ret["tracker"] = precision_ret_ + return precision_ret + + def eval_norm_precision(self,tracker): + norm_precision_ret = {} + norm_precision_ret_ = {} + for video in self.dataset: + gt_traj = np.array(video.gt_traj) + tracker_traj = video.load_tracker() + tracker_traj = np.array(tracker_traj) + n_frame = len(gt_traj) + if hasattr(video, 'absent'): + gt_traj = gt_traj[video.absent == 1] + tracker_traj = tracker_traj[video.absent == 1] + gt_center_norm = self.convert_bb_to_norm_center(gt_traj, gt_traj[:, 2:4]) + tracker_center_norm = self.convert_bb_to_norm_center(tracker_traj, gt_traj[:, 2:4]) + thresholds = np.arange(0, 51, 1) / 100 + norm_precision_ret_[video.name] = success_error(gt_center_norm, + tracker_center_norm, thresholds, n_frame) + norm_precision_ret["tracker"] = norm_precision_ret_ + return norm_precision_ret + + def show_result(self, success_ret, precision_ret=None, + norm_precision_ret=None, show_video_level=False, helight_threshold=0.6): + """pretty print result + Args: + result: returned dict from function eval + """ + # sort tracker + tracker_auc = {} + for tracker_name in success_ret.keys(): + auc = np.mean(list(success_ret[tracker_name].values())) + tracker_auc[tracker_name] = auc + tracker_auc_ = sorted(tracker_auc.items(), + key=lambda x:x[1], + reverse=True)[:20] + tracker_names = [x[0] for x in tracker_auc_] + + + tracker_name_len = max((max([len(x) for x in success_ret.keys()])+2), 12) + header = ("|{:^"+str(tracker_name_len)+"}|{:^9}|{:^16}|{:^11}|").format( + "Tracker name", "Success", "Norm Precision", "Precision") + formatter = "|{:^"+str(tracker_name_len)+"}|{:^9.3f}|{:^16.3f}|{:^11.3f}|" + print('-'*len(header)) + print(header) + print('-'*len(header)) + for tracker_name in tracker_names: + success = tracker_auc[tracker_name] + if precision_ret is not None: + precision = np.mean(list(precision_ret[tracker_name].values()), axis=0)[20] + else: + precision = 0 + if norm_precision_ret is not None: + norm_precision = np.mean(list(norm_precision_ret[tracker_name].values()), + axis=0)[20] + else: + norm_precision = 0 + print(formatter.format(tracker_name, success, norm_precision, precision)) + print('-'*len(header)) + + if show_video_level and len(success_ret) < 10 \ + and precision_ret is not None \ + and len(precision_ret) < 10: + print("\n\n") + header1 = "|{:^21}|".format("Tracker name") + header2 = "|{:^21}|".format("Video name") + for tracker_name in success_ret.keys(): + header1 += ("{:^21}|").format(tracker_name) + header2 += "{:^9}|{:^11}|".format("success", "precision") + print('-'*len(header1)) + print(header1) + print('-'*len(header1)) + print(header2) + print('-'*len(header1)) + videos = list(success_ret[tracker_name].keys()) + for video in videos: + row = "|{:^21}|".format(video) + for tracker_name in success_ret.keys(): + success = np.mean(success_ret[tracker_name][video]) + precision = np.mean(precision_ret[tracker_name][video]) + success_str = "{:^9.3f}".format(success) + if success < helight_threshold: + row += f'{Fore.RED}{success_str}{Style.RESET_ALL}|' + else: + row += success_str+'|' + precision_str = "{:^11.3f}".format(precision) + if precision < helight_threshold: + row += f'{Fore.RED}{precision_str}{Style.RESET_ALL}|' + else: + row += precision_str+'|' + print(row) + print('-'*len(header1)) + +class Video(object): + def __init__(self, name, root, video_dir, init_rect, img_names, + gt_rect, attr): + self.name = name + self.video_dir = video_dir + self.init_rect = init_rect + self.gt_traj = gt_rect + self.attr = attr + self.pred_trajs = {} + self.img_names = [os.path.join(root, x) for x in img_names] + self.imgs = None + img = cv.imread(self.img_names[0]) + assert img is not None, self.img_names[0] + self.width = img.shape[1] + self.height = img.shape[0] + + def __len__(self): + return len(self.img_names) + + def __getitem__(self, idx): + if self.imgs is None: + return cv.imread(self.img_names[idx]), self.gt_traj[idx] + else: + return self.imgs[idx], self.gt_traj[idx] + + def __iter__(self): + for i in range(len(self.img_names)): + if self.imgs is not None: + yield self.imgs[i], self.gt_traj[i] + else: + yield cv.imread(self.img_names[i]), self.gt_traj[i] + def load_tracker(self): + traj_file = os.path.join("OTB_results", self.name+'.txt') + if not os.path.exists(traj_file): + if self.name == 'FleetFace': + txt_name = 'fleetface.txt' + elif self.name == 'Jogging-1': + txt_name = 'jogging_1.txt' + elif self.name == 'Jogging-2': + txt_name = 'jogging_2.txt' + elif self.name == 'Skating2-1': + txt_name = 'skating2_1.txt' + elif self.name == 'Skating2-2': + txt_name = 'skating2_2.txt' + elif self.name == 'FaceOcc1': + txt_name = 'faceocc1.txt' + elif self.name == 'FaceOcc2': + txt_name = 'faceocc2.txt' + elif self.name == 'Human4-2': + txt_name = 'human4_2.txt' + else: + txt_name = self.name[0].lower()+self.name[1:]+'.txt' + traj_file = os.path.join("OTB_results", txt_name) + if os.path.exists(traj_file): + with open(traj_file, 'r') as f : + pred_traj = [list(map(float, x.strip().split(','))) + for x in f.readlines()] + if len(pred_traj) != len(self.gt_traj): + print("tracker", len(pred_traj), len(self.gt_traj), self.name) + else: + return pred_traj + else: + print(traj_file) + + +class OTBDATASET: + def __init__(self, root): + with open(os.path.join(root, 'OTB.json'), 'r') as f: + meta_data = json.load(f) + self.root = root + # load videos + pbar = tqdm(meta_data.keys(), desc='loading OTB', ncols=100) + self.videos = {} + for video in pbar: + pbar.set_postfix_str(video) + self.videos[video] = Video(video, + self.root, + meta_data[video]['video_dir'], + meta_data[video]['init_rect'], + meta_data[video]['img_names'], + meta_data[video]['gt_rect'], + meta_data[video]['attr']) + # set attr + attr = [] + for x in self.videos.values(): + attr += x.attr + attr = set(attr) + self.attr = {} + self.attr['ALL'] = list(self.videos.keys()) + for x in attr: + self.attr[x] = [] + for k, v in self.videos.items(): + for attr_ in v.attr: + self.attr[attr_].append(k) + + def __getitem__(self, idx): + if isinstance(idx, str): + return self.videos[idx] + elif isinstance(idx, int): + return self.videos[sorted(list(self.videos.keys()))[idx]] + + def __len__(self): + return len(self.videos) + + def __iter__(self): + keys = sorted(list(self.videos.keys())) + for key in keys: + yield self.videos[key] + + +def get_axis_aligned_bbox(region): + """ convert region to (cx, cy, w, h) that represent by axis aligned box + """ + nv = region.size + if nv == 8: + cx = np.mean(region[0::2]) + cy = np.mean(region[1::2]) + x1 = min(region[0::2]) + x2 = max(region[0::2]) + y1 = min(region[1::2]) + y2 = max(region[1::2]) + A1 = np.linalg.norm(region[0:2] - region[2:4]) * \ + np.linalg.norm(region[2:4] - region[4:6]) + A2 = (x2 - x1) * (y2 - y1) + s = np.sqrt(A1 / A2) + w = s * (x2 - x1) + 1 + h = s * (y2 - y1) + 1 + else: + x = region[0] + y = region[1] + w = region[2] + h = region[3] + cx = x+w/2 + cy = y+h/2 + return cx, cy, w, h + +class OTB: + + def __init__(self, root): + self.root = root + self.dataset = OTBDATASET(root) + @property + def name(self): + return self.__class__.__name__ + + def eval(self, model): + for v_idx, video in enumerate(self.dataset): + toc = 0 + pred_bboxes = [] + scores = [] + track_times = [] + for idx, (img, gt_bbox) in enumerate(video): + # convert bgr to rgb + img = cv.cvtColor(img, cv.COLOR_BGR2RGB) + tic = cv.getTickCount() + if idx == 0: + cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox)) + gt_bbox_ = (int(cx - w / 2), int(cy - h / 2), int(w), int(h)) + model.init(img, gt_bbox_) + pred_bbox = gt_bbox_ + pred_bboxes.append(pred_bbox) + scores.append(None) + else: + isLocated, bbox, score = model.infer(img) + pred_bbox = bbox + pred_bboxes.append(pred_bbox) + scores.append(score) + toc += cv.getTickCount() - tic + track_times.append((cv.getTickCount() - tic) / cv.getTickFrequency()) + if idx == 0: + cv.destroyAllWindows() + toc /= cv.getTickFrequency() + model_path = os.path.join('OTB_results') + if not os.path.isdir(model_path): + os.makedirs(model_path) + result_path = os.path.join(model_path,'{}.txt'.format(video.name)) + with open(result_path, 'w') as f: + for x in pred_bboxes: + f.write(','.join([str(i) for i in x]) + '\n') + print('({:3d}) Video: {:12s} Time: {:5.1f}s Speed: {:3.1f}fps'.format( + v_idx + 1, video.name, toc, idx / toc)) + + + def get_result(self): + return self.top1_acc, self.top5_acc + + def print_result(self): + benchmark = OPEBenchmark(self.dataset) + success_ret = {} + with Pool(processes=1) as pool: + for ret in tqdm(pool.imap_unordered(benchmark.eval_success,"tracker"), desc='eval success', total=1, ncols=100): + success_ret.update(ret) + precision_ret = {} + with Pool(processes=1) as pool: + for ret in tqdm(pool.imap_unordered(benchmark.eval_precision,"tracker"), desc='eval precision', total=1, ncols=100): + precision_ret.update(ret) + benchmark.show_result(success_ret, precision_ret, + show_video_level=False) diff --git a/tools/eval/eval.py b/tools/eval/eval.py index 6c961cc6..1d9dfb4f 100644 --- a/tools/eval/eval.py +++ b/tools/eval/eval.py @@ -95,6 +95,12 @@ name="PPHumanSeg", topic="human_segmentation", modelPath=os.path.join(root_dir, "models/human_segmentation_pphumanseg/human_segmentation_pphumanseg_2023mar_int8.onnx")), + dasiamrpn=dict( + name="DaSiamRPN", + topic="object_tracking", + kernel_cls1_path=os.path.join(root_dir, "models/object_tracking_dasiamrpn/object_tracking_dasiamrpn_kernel_cls1_2021nov.onnx"), + kernel_r1_path=os.path.join(root_dir, "models/object_tracking_dasiamrpn/object_tracking_dasiamrpn_kernel_r1_2021nov.onnx"), + model_path=os.path.join(root_dir, "models/object_tracking_dasiamrpn/object_tracking_dasiamrpn_model_2021nov.onnx")), ) datasets = dict( @@ -118,6 +124,9 @@ mini_supervisely=dict( name="MiniSupervisely", topic="human_segmentation"), + otb=dict( + name="OTB", + topic="object_tracking"), ) def main(args): From aa1b2908ff2b935778e051f0bebec8377d5eadbd Mon Sep 17 00:00:00 2001 From: Ryan Lee Date: Wed, 13 Mar 2024 03:55:35 -0400 Subject: [PATCH 02/17] Automatic dataset directory setup handling in init --- tools/eval/README.md | 20 +- tools/eval/datasets/otb.py | 388 ++++++++++++++++--------------------- tools/eval/eval.py | 8 +- 3 files changed, 177 insertions(+), 239 deletions(-) diff --git a/tools/eval/README.md b/tools/eval/README.md index 1b216cd8..1665ffbf 100644 --- a/tools/eval/README.md +++ b/tools/eval/README.md @@ -218,26 +218,12 @@ python eval.py -m pphumanseg_q -d mini_supervisely -dr /path/to/pphumanseg ### Prepare data -Please visit [here](https://drive.google.com/drive/folders/1DZvtlnG9U94cgLD6Yi3eU7r6QZJkjdl-?usp=sharing) to download the OTB dataset and the json file. Organize files as follow: - -```shell -$ tree -L 2 /path/to/imagenet -. -├── Basketball -│   ├── groundtruth_rect.txt -│   ├── img -│   │ └── XXXX.jpg -├── Biker -│   ├── groundtruth_rect.txt -│   ├── img -│   │ └── XXXX.jpg -└── OTB.json -``` +Please visit [here](https://drive.google.com/drive/folders/1DZvtlnG9U94cgLD6Yi3eU7r6QZJkjdl-?usp=sharing) to download the OTB dataset and the json file. Please both files together in a directory. ### Evaluation Run evaluation with the following command: ```shell -python eval.py -m dasiamrpn -d otb -dr /path/to/otb -``` \ No newline at end of file +python eval.py -m vittrack -d otb -dr /path/to/otb +``` diff --git a/tools/eval/datasets/otb.py b/tools/eval/datasets/otb.py index 7ebe6c5f..9470003b 100644 --- a/tools/eval/datasets/otb.py +++ b/tools/eval/datasets/otb.py @@ -4,43 +4,37 @@ import cv2 as cv from colorama import Style, Fore from tqdm import tqdm -from multiprocessing import Pool +from multiprocessing import Pool, cpu_count def overlap_ratio(rect1, rect2): - '''Compute overlap ratio between two rects - Args - rect:2d array of N x [x,y,w,h] - Return: - iou - ''' - left = np.maximum(rect1[:,0], rect2[:,0]) - right = np.minimum(rect1[:,0]+rect1[:,2], rect2[:,0]+rect2[:,2]) - top = np.maximum(rect1[:,1], rect2[:,1]) - bottom = np.minimum(rect1[:,1]+rect1[:,3], rect2[:,1]+rect2[:,3]) - - intersect = np.maximum(0,right - left) * np.maximum(0,bottom - top) - union = rect1[:,2]*rect1[:,3] + rect2[:,2]*rect2[:,3] - intersect + """Calculate the Intersection over Union (IoU) overlap ratio between two sets of rectangles.""" + left = np.maximum(rect1[:, 0], rect2[:, 0]) + right = np.minimum(rect1[:, 0] + rect1[:, 2], rect2[:, 0] + rect2[:, 2]) + top = np.maximum(rect1[:, 1], rect2[:, 1]) + bottom = np.minimum(rect1[:, 1] + rect1[:, 3], rect2[:, 1] + rect2[:, 3]) + intersect = np.maximum(right - left, 0) * np.maximum(bottom - top, 0) + union = rect1[:, 2] * rect1[:, 3] + rect2[:, 2] * rect2[:, 3] - intersect iou = intersect / union - iou = np.maximum(np.minimum(1, iou), 0) + iou = np.clip(iou, 0, 1) return iou + def success_overlap(gt_bb, result_bb, n_frame): + """Calculate the success rate based on the overlap ratio between ground truth and predicted bounding boxes.""" thresholds_overlap = np.arange(0, 1.05, 0.05) success = np.zeros(len(thresholds_overlap)) - iou = np.ones(len(gt_bb)) * (-1) mask = np.sum(gt_bb[:, 2:] > 0, axis=1) == 2 - iou[mask] = overlap_ratio(gt_bb[mask], result_bb[mask]) - for i in range(len(thresholds_overlap)): - success[i] = np.sum(iou > thresholds_overlap[i]) / float(n_frame) + iou = overlap_ratio(gt_bb[mask], result_bb[mask]) + for i, threshold in enumerate(thresholds_overlap): + success[i] = np.sum(iou > threshold) / n_frame return success def success_error(gt_center, result_center, thresholds, n_frame): + """Calculate the success rate based on the error distance between ground truth and predicted bounding box centers.""" success = np.zeros(len(thresholds)) - dist = np.ones(len(gt_center)) * (-1) mask = np.sum(gt_center > 0, axis=1) == 2 - dist[mask] = np.sqrt(np.sum( - np.power(gt_center[mask] - result_center[mask], 2), axis=1)) - for i in range(len(thresholds)): - success[i] = np.sum(dist <= thresholds[i]) / float(n_frame) + dist = np.linalg.norm(gt_center[mask] - result_center[mask], axis=1) + for i, threshold in enumerate(thresholds): + success[i] = np.sum(dist <= threshold) / n_frame return success class OPEBenchmark: @@ -48,139 +42,90 @@ def __init__(self, dataset): self.dataset = dataset def convert_bb_to_center(self, bboxes): + """Convert bounding box coordinates to centers.""" return np.array([(bboxes[:, 0] + (bboxes[:, 2] - 1) / 2), (bboxes[:, 1] + (bboxes[:, 3] - 1) / 2)]).T def convert_bb_to_norm_center(self, bboxes, gt_wh): - return self.convert_bb_to_center(bboxes) / (gt_wh+1e-16) + """Convert bounding box coordinates to normalized centers.""" + return self.convert_bb_to_center(bboxes) / (gt_wh + 1e-16) - def eval_success(self,tracker): - success_ret = {} - success_ret_ = {} + def evaluate(self, metric): + """Evaluate the tracking performance based on the specified metric.""" + evaluation_ret = {} for video in self.dataset: gt_traj = np.array(video.gt_traj) - tracker_traj = video.load_tracker() - tracker_traj = np.array(tracker_traj) + tracker_traj = np.array(video.load_tracker()) n_frame = len(gt_traj) if hasattr(video, 'absent'): gt_traj = gt_traj[video.absent == 1] tracker_traj = tracker_traj[video.absent == 1] - success_ret_[video.name] = success_overlap(gt_traj, tracker_traj, n_frame) - success_ret["tracker"] = success_ret_ - return success_ret + if metric == 'success': + evaluation_ret[video.name] = success_overlap(gt_traj, tracker_traj, n_frame) + elif metric == 'precision': + gt_center = self.convert_bb_to_center(gt_traj) + tracker_center = self.convert_bb_to_center(tracker_traj) + thresholds = np.arange(0, 51, 1) + evaluation_ret[video.name] = success_error(gt_center, tracker_center, thresholds, n_frame) + elif metric == 'norm_precision': + gt_center_norm = self.convert_bb_to_norm_center(gt_traj, gt_traj[:, 2:4]) + tracker_center_norm = self.convert_bb_to_norm_center(tracker_traj, gt_traj[:, 2:4]) + thresholds = np.arange(0, 51, 1) / 100 + evaluation_ret[video.name] = success_error(gt_center_norm, tracker_center_norm, thresholds, n_frame) + return {"tracker": evaluation_ret} - def eval_precision(self,tracker): - precision_ret = {} - precision_ret_ = {} - for video in self.dataset: - gt_traj = np.array(video.gt_traj) - tracker_traj = video.load_tracker() - tracker_traj = np.array(tracker_traj) - n_frame = len(gt_traj) - if hasattr(video, 'absent'): - gt_traj = gt_traj[video.absent == 1] - tracker_traj = tracker_traj[video.absent == 1] - gt_center = self.convert_bb_to_center(gt_traj) - tracker_center = self.convert_bb_to_center(tracker_traj) - thresholds = np.arange(0, 51, 1) - precision_ret_[video.name] = success_error(gt_center, tracker_center, - thresholds, n_frame) - precision_ret["tracker"] = precision_ret_ - return precision_ret - - def eval_norm_precision(self,tracker): - norm_precision_ret = {} - norm_precision_ret_ = {} - for video in self.dataset: - gt_traj = np.array(video.gt_traj) - tracker_traj = video.load_tracker() - tracker_traj = np.array(tracker_traj) - n_frame = len(gt_traj) - if hasattr(video, 'absent'): - gt_traj = gt_traj[video.absent == 1] - tracker_traj = tracker_traj[video.absent == 1] - gt_center_norm = self.convert_bb_to_norm_center(gt_traj, gt_traj[:, 2:4]) - tracker_center_norm = self.convert_bb_to_norm_center(tracker_traj, gt_traj[:, 2:4]) - thresholds = np.arange(0, 51, 1) / 100 - norm_precision_ret_[video.name] = success_error(gt_center_norm, - tracker_center_norm, thresholds, n_frame) - norm_precision_ret["tracker"] = norm_precision_ret_ - return norm_precision_ret - - def show_result(self, success_ret, precision_ret=None, - norm_precision_ret=None, show_video_level=False, helight_threshold=0.6): - """pretty print result - Args: - result: returned dict from function eval - """ - # sort tracker - tracker_auc = {} - for tracker_name in success_ret.keys(): - auc = np.mean(list(success_ret[tracker_name].values())) - tracker_auc[tracker_name] = auc - tracker_auc_ = sorted(tracker_auc.items(), - key=lambda x:x[1], - reverse=True)[:20] - tracker_names = [x[0] for x in tracker_auc_] - - - tracker_name_len = max((max([len(x) for x in success_ret.keys()])+2), 12) - header = ("|{:^"+str(tracker_name_len)+"}|{:^9}|{:^16}|{:^11}|").format( - "Tracker name", "Success", "Norm Precision", "Precision") - formatter = "|{:^"+str(tracker_name_len)+"}|{:^9.3f}|{:^16.3f}|{:^11.3f}|" - print('-'*len(header)) + def show_result(self, success, precision=None, norm_precision=None, show_video_level=False, height_threshold=0.6): + tracker_auc = {tracker_name: np.mean(list(scores.values())) for tracker_name, scores in success.items()} + tracker_auc = sorted(tracker_auc.items(), key=lambda x: x[1], reverse=True)[:20] + tracker_names = [x[0] for x in tracker_auc] + tracker_name_len = max(max(len(x) for x in success.keys()) + 2, 12) + header = ("|{:^" + str(tracker_name_len) + "}|{:^9}|{:^11}|{:^16}|").format( + "Tracker name", "Success", "Precision", "Norm Precision") + formatter = "|{:^" + str(tracker_name_len) + "}|{:^9.3f}|{:^11.3f}|{:^16.3f}|" + + print('-' * len(header)) print(header) - print('-'*len(header)) + print('-' * len(header)) + for tracker_name in tracker_names: - success = tracker_auc[tracker_name] - if precision_ret is not None: - precision = np.mean(list(precision_ret[tracker_name].values()), axis=0)[20] - else: - precision = 0 - if norm_precision_ret is not None: - norm_precision = np.mean(list(norm_precision_ret[tracker_name].values()), - axis=0)[20] - else: - norm_precision = 0 - print(formatter.format(tracker_name, success, norm_precision, precision)) - print('-'*len(header)) + success_score = np.mean(list(success[tracker_name].values())) + precision_score = np.mean(list(precision[tracker_name].values()), axis=0)[20] if precision else 0 + norm_precision_score = np.mean(list(norm_precision[tracker_name].values()), axis=0)[20] if norm_precision else 0 + print(formatter.format(tracker_name, success_score, precision_score, norm_precision_score)) + + print('-' * len(header)) - if show_video_level and len(success_ret) < 10 \ - and precision_ret is not None \ - and len(precision_ret) < 10: + if show_video_level and len(success) < 10 and precision and len(precision) < 10: print("\n\n") header1 = "|{:^21}|".format("Tracker name") header2 = "|{:^21}|".format("Video name") - for tracker_name in success_ret.keys(): + + for tracker_name in success.keys(): header1 += ("{:^21}|").format(tracker_name) header2 += "{:^9}|{:^11}|".format("success", "precision") - print('-'*len(header1)) + + print('-' * len(header1)) print(header1) - print('-'*len(header1)) + print('-' * len(header1)) print(header2) - print('-'*len(header1)) - videos = list(success_ret[tracker_name].keys()) - for video in videos: + print('-' * len(header1)) + + for video, scores in success.items(): row = "|{:^21}|".format(video) - for tracker_name in success_ret.keys(): - success = np.mean(success_ret[tracker_name][video]) - precision = np.mean(precision_ret[tracker_name][video]) - success_str = "{:^9.3f}".format(success) - if success < helight_threshold: - row += f'{Fore.RED}{success_str}{Style.RESET_ALL}|' - else: - row += success_str+'|' - precision_str = "{:^11.3f}".format(precision) - if precision < helight_threshold: - row += f'{Fore.RED}{precision_str}{Style.RESET_ALL}|' - else: - row += precision_str+'|' + + for tracker_name in tracker_names: + success_score = np.mean(success[tracker_name][video]) + precision_score = np.mean(precision[tracker_name][video]) + success_str = f'{success_score:.3f}' if success_score < height_threshold else f'{success_score:.3f}' + precision_str = f'{precision_score:.3f}' if precision_score < height_threshold else f'{precision_score:.3f}' + row += f"{success_str:^9}|{precision_str:^11}|" + print(row) - print('-'*len(header1)) -class Video(object): - def __init__(self, name, root, video_dir, init_rect, img_names, - gt_rect, attr): + print('-' * len(header1)) + +class Video: + def __init__(self, name, root, video_dir, init_rect, img_names, gt_rect, attr): self.name = name self.video_dir = video_dir self.init_rect = init_rect @@ -209,32 +154,27 @@ def __iter__(self): yield self.imgs[i], self.gt_traj[i] else: yield cv.imread(self.img_names[i]), self.gt_traj[i] + def load_tracker(self): + """Load tracker results from file.""" traj_file = os.path.join("OTB_results", self.name+'.txt') if not os.path.exists(traj_file): - if self.name == 'FleetFace': - txt_name = 'fleetface.txt' - elif self.name == 'Jogging-1': - txt_name = 'jogging_1.txt' - elif self.name == 'Jogging-2': - txt_name = 'jogging_2.txt' - elif self.name == 'Skating2-1': - txt_name = 'skating2_1.txt' - elif self.name == 'Skating2-2': - txt_name = 'skating2_2.txt' - elif self.name == 'FaceOcc1': - txt_name = 'faceocc1.txt' - elif self.name == 'FaceOcc2': - txt_name = 'faceocc2.txt' - elif self.name == 'Human4-2': - txt_name = 'human4_2.txt' - else: - txt_name = self.name[0].lower()+self.name[1:]+'.txt' + txt_names = { + 'FleetFace': 'fleetface.txt', + 'Jogging-1': 'jogging_1.txt', + 'Jogging-2': 'jogging_2.txt', + 'Skating2-1': 'skating2_1.txt', + 'Skating2-2': 'skating2_2.txt', + 'FaceOcc1': 'faceocc1.txt', + 'FaceOcc2': 'faceocc2.txt', + 'Human4-2': 'human4_2.txt' + } + txt_name = txt_names.get(self.name, self.name[0].lower() + self.name[1:] + '.txt') traj_file = os.path.join("OTB_results", txt_name) + if os.path.exists(traj_file): - with open(traj_file, 'r') as f : - pred_traj = [list(map(float, x.strip().split(','))) - for x in f.readlines()] + with open(traj_file, 'r') as f: + pred_traj = [list(map(float, x.strip().split(','))) for x in f.readlines()] if len(pred_traj) != len(self.gt_traj): print("tracker", len(pred_traj), len(self.gt_traj), self.name) else: @@ -242,33 +182,27 @@ def load_tracker(self): else: print(traj_file) - class OTBDATASET: def __init__(self, root): with open(os.path.join(root, 'OTB.json'), 'r') as f: meta_data = json.load(f) self.root = root - # load videos - pbar = tqdm(meta_data.keys(), desc='loading OTB', ncols=100) self.videos = {} + pbar = tqdm(meta_data.keys(), desc='Loading OTB', ncols=100) for video in pbar: pbar.set_postfix_str(video) self.videos[video] = Video(video, - self.root, - meta_data[video]['video_dir'], - meta_data[video]['init_rect'], - meta_data[video]['img_names'], - meta_data[video]['gt_rect'], - meta_data[video]['attr']) - # set attr - attr = [] - for x in self.videos.values(): - attr += x.attr - attr = set(attr) - self.attr = {} - self.attr['ALL'] = list(self.videos.keys()) - for x in attr: - self.attr[x] = [] + self.root, + meta_data[video]['video_dir'], + meta_data[video]['init_rect'], + meta_data[video]['img_names'], + meta_data[video]['gt_rect'], + meta_data[video]['attr']) + self.attr = {'ALL': list(self.videos.keys())} + all_attributes = [x.attr for x in self.videos.values()] + all_attributes = set(sum(all_attributes, [])) + for attr_ in all_attributes: + self.attr[attr_] = [] for k, v in self.videos.items(): for attr_ in v.attr: self.attr[attr_].append(k) @@ -277,20 +211,19 @@ def __getitem__(self, idx): if isinstance(idx, str): return self.videos[idx] elif isinstance(idx, int): - return self.videos[sorted(list(self.videos.keys()))[idx]] + sorted_keys = sorted(list(self.videos.keys())) + return self.videos[sorted_keys[idx]] def __len__(self): return len(self.videos) def __iter__(self): - keys = sorted(list(self.videos.keys())) - for key in keys: + sorted_keys = sorted(list(self.videos.keys())) + for key in sorted_keys: yield self.videos[key] - def get_axis_aligned_bbox(region): - """ convert region to (cx, cy, w, h) that represent by axis aligned box - """ + """Converts a region to (cx, cy, w, h) representing an axis-aligned box.""" nv = region.size if nv == 8: cx = np.mean(region[0::2]) @@ -299,80 +232,101 @@ def get_axis_aligned_bbox(region): x2 = max(region[0::2]) y1 = min(region[1::2]) y2 = max(region[1::2]) - A1 = np.linalg.norm(region[0:2] - region[2:4]) * \ - np.linalg.norm(region[2:4] - region[4:6]) + A1 = np.linalg.norm(region[0:2] - region[2:4]) * np.linalg.norm(region[2:4] - region[4:6]) A2 = (x2 - x1) * (y2 - y1) s = np.sqrt(A1 / A2) w = s * (x2 - x1) + 1 h = s * (y2 - y1) + 1 else: - x = region[0] - y = region[1] - w = region[2] - h = region[3] - cx = x+w/2 - cy = y+h/2 + x, y, w, h = region + cx = x + w / 2 + cy = y + h / 2 return cx, cy, w, h class OTB: - def __init__(self, root): - self.root = root - self.dataset = OTBDATASET(root) + # Go up one if directory is provided + root = os.path.abspath(root) + if root.endswith("OTB100"): + root = os.path.dirname(root) + print(root) + + # Unzip the OTB100.zip file + if os.path.exists(f'{root}/OTB100.zip'): + os.system(f'unzip -q "{os.path.join(root, "OTB100.zip")}" -d "{root}"') + os.remove(f'{root}/OTB100.zip') + + # Move the JSON label in if it's outside + if os.path.exists(f'{root}/OTB.json'): + os.rename(f'{root}/OTB.json', f'{root}/OTB100/OTB.json') + + if os.path.exists(f'{root}/OTB100'): + original_directories = ['Jogging', 'Skating2', 'Human4'] + updated_directories = ['Jogging-1', 'Jogging-2', 'Skating2-1', 'Skating2-2', 'Human4-2', 'OTB.json'] + original_exist = all(os.path.exists(f'{root}/OTB100/{dir}') for dir in original_directories) + updated_exist = all(os.path.exists(f'{root}/OTB100/{dir}') for dir in updated_directories) + if original_exist: + os.rename(f'{root}/OTB100/Jogging', f'{root}/OTB100/Jogging-1') + os.rename(f'{root}/OTB100/Skating2', f'{root}/OTB100/Skating2-1') + os.rename(f'{root}/OTB100/Human4', f'{root}/OTB100/Human4-2') + os.system(f'cp -r "{root}/OTB100/Jogging-1" "{root}/OTB100/Jogging-2"') + os.system(f'cp -r "{root}/OTB100/Skating2-1" "{root}/OTB100/Skating2-2"') + elif not updated_exist: + raise RuntimeError("Not all files needed for setup are present.") + + self.root = f'{root}/OTB100' + self.dataset = OTBDATASET(self.root) + @property def name(self): return self.__class__.__name__ def eval(self, model): for v_idx, video in enumerate(self.dataset): - toc = 0 + total_time = 0 pred_bboxes = [] scores = [] track_times = [] + for idx, (img, gt_bbox) in enumerate(video): - # convert bgr to rgb img = cv.cvtColor(img, cv.COLOR_BGR2RGB) tic = cv.getTickCount() + if idx == 0: cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox)) gt_bbox_ = (int(cx - w / 2), int(cy - h / 2), int(w), int(h)) model.init(img, gt_bbox_) pred_bbox = gt_bbox_ - pred_bboxes.append(pred_bbox) scores.append(None) else: isLocated, bbox, score = model.infer(img) pred_bbox = bbox - pred_bboxes.append(pred_bbox) scores.append(score) - toc += cv.getTickCount() - tic - track_times.append((cv.getTickCount() - tic) / cv.getTickFrequency()) - if idx == 0: - cv.destroyAllWindows() - toc /= cv.getTickFrequency() + + pred_bboxes.append(pred_bbox) + toc = (cv.getTickCount() - tic) / cv.getTickFrequency() + total_time += toc + track_times.append(toc) + model_path = os.path.join('OTB_results') - if not os.path.isdir(model_path): - os.makedirs(model_path) - result_path = os.path.join(model_path,'{}.txt'.format(video.name)) + os.makedirs(model_path, exist_ok=True) + result_path = os.path.join(model_path, '{}.txt'.format(video.name)) with open(result_path, 'w') as f: - for x in pred_bboxes: - f.write(','.join([str(i) for i in x]) + '\n') - print('({:3d}) Video: {:12s} Time: {:5.1f}s Speed: {:3.1f}fps'.format( - v_idx + 1, video.name, toc, idx / toc)) - + for bbox in pred_bboxes: + f.write(','.join(map(str, bbox)) + '\n') - def get_result(self): - return self.top1_acc, self.top5_acc + avg_fps = len(video) / total_time if total_time > 0 else 0 + print('({:3d}) Video: {:12s} Time: {:5.1f}s Speed: {:3.1f}fps'.format( + v_idx + 1, video.name, total_time, avg_fps)) def print_result(self): benchmark = OPEBenchmark(self.dataset) - success_ret = {} - with Pool(processes=1) as pool: - for ret in tqdm(pool.imap_unordered(benchmark.eval_success,"tracker"), desc='eval success', total=1, ncols=100): - success_ret.update(ret) - precision_ret = {} - with Pool(processes=1) as pool: - for ret in tqdm(pool.imap_unordered(benchmark.eval_precision,"tracker"), desc='eval precision', total=1, ncols=100): - precision_ret.update(ret) - benchmark.show_result(success_ret, precision_ret, - show_video_level=False) + num_cores = cpu_count() + evaluation_results = {} + metrics = ["success", "precision", "norm_precision"] + for metric in metrics: + with Pool(processes=min(num_cores, max(1, num_cores - 1))) as pool: + for ret in tqdm(pool.imap_unordered(benchmark.evaluate, [metric], 1), desc=f'eval {metric}', total=1, ncols=100): + evaluation_results[metric] = ret + + benchmark.show_result(**evaluation_results, show_video_level=False) diff --git a/tools/eval/eval.py b/tools/eval/eval.py index 1d9dfb4f..3246cfe1 100644 --- a/tools/eval/eval.py +++ b/tools/eval/eval.py @@ -95,12 +95,10 @@ name="PPHumanSeg", topic="human_segmentation", modelPath=os.path.join(root_dir, "models/human_segmentation_pphumanseg/human_segmentation_pphumanseg_2023mar_int8.onnx")), - dasiamrpn=dict( - name="DaSiamRPN", + vittrack=dict( + name="VitTrack", topic="object_tracking", - kernel_cls1_path=os.path.join(root_dir, "models/object_tracking_dasiamrpn/object_tracking_dasiamrpn_kernel_cls1_2021nov.onnx"), - kernel_r1_path=os.path.join(root_dir, "models/object_tracking_dasiamrpn/object_tracking_dasiamrpn_kernel_r1_2021nov.onnx"), - model_path=os.path.join(root_dir, "models/object_tracking_dasiamrpn/object_tracking_dasiamrpn_model_2021nov.onnx")), + model_path=os.path.join(root_dir, "models/object_tracking_vittrack/object_tracking_vittrack_2023sep.onnx")), ) datasets = dict( From d1217e297a9179fd2cc1033614411cd972a5b555 Mon Sep 17 00:00:00 2001 From: Ryan Lee Date: Tue, 19 Mar 2024 03:08:31 -0400 Subject: [PATCH 03/17] Partial update to OTB-2015 --- tools/eval/datasets/otb.py | 30 +++++++++++++++--------------- tools/eval/eval.py | 4 ++-- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/tools/eval/datasets/otb.py b/tools/eval/datasets/otb.py index 9470003b..dee05701 100644 --- a/tools/eval/datasets/otb.py +++ b/tools/eval/datasets/otb.py @@ -247,34 +247,34 @@ class OTB: def __init__(self, root): # Go up one if directory is provided root = os.path.abspath(root) - if root.endswith("OTB100"): + if root.endswith("OTB2015"): root = os.path.dirname(root) print(root) - # Unzip the OTB100.zip file - if os.path.exists(f'{root}/OTB100.zip'): - os.system(f'unzip -q "{os.path.join(root, "OTB100.zip")}" -d "{root}"') - os.remove(f'{root}/OTB100.zip') + # Unzip the OTB2015.zip file + if os.path.exists(f'{root}/OTB2015.zip'): + os.system(f'unzip -q "{os.path.join(root, "OTB2015.zip")}" -d "{root}"') + os.remove(f'{root}/OTB2015.zip') # Move the JSON label in if it's outside if os.path.exists(f'{root}/OTB.json'): - os.rename(f'{root}/OTB.json', f'{root}/OTB100/OTB.json') + os.rename(f'{root}/OTB.json', f'{root}/OTB2015/OTB.json') - if os.path.exists(f'{root}/OTB100'): + if os.path.exists(f'{root}/OTB2015'): original_directories = ['Jogging', 'Skating2', 'Human4'] updated_directories = ['Jogging-1', 'Jogging-2', 'Skating2-1', 'Skating2-2', 'Human4-2', 'OTB.json'] - original_exist = all(os.path.exists(f'{root}/OTB100/{dir}') for dir in original_directories) - updated_exist = all(os.path.exists(f'{root}/OTB100/{dir}') for dir in updated_directories) + original_exist = all(os.path.exists(f'{root}/OTB2015/{dir}') for dir in original_directories) + updated_exist = all(os.path.exists(f'{root}/OTB2015/{dir}') for dir in updated_directories) if original_exist: - os.rename(f'{root}/OTB100/Jogging', f'{root}/OTB100/Jogging-1') - os.rename(f'{root}/OTB100/Skating2', f'{root}/OTB100/Skating2-1') - os.rename(f'{root}/OTB100/Human4', f'{root}/OTB100/Human4-2') - os.system(f'cp -r "{root}/OTB100/Jogging-1" "{root}/OTB100/Jogging-2"') - os.system(f'cp -r "{root}/OTB100/Skating2-1" "{root}/OTB100/Skating2-2"') + os.rename(f'{root}/OTB2015/Jogging', f'{root}/OTB2015/Jogging-1') + os.rename(f'{root}/OTB2015/Skating2', f'{root}/OTB2015/Skating2-1') + os.rename(f'{root}/OTB2015/Human4', f'{root}/OTB2015/Human4-2') + os.system(f'cp -r "{root}/OTB2015/Jogging-1" "{root}/OTB2015/Jogging-2"') + os.system(f'cp -r "{root}/OTB2015/Skating2-1" "{root}/OTB2015/Skating2-2"') elif not updated_exist: raise RuntimeError("Not all files needed for setup are present.") - self.root = f'{root}/OTB100' + self.root = f'{root}/OTB2015' self.dataset = OTBDATASET(self.root) @property diff --git a/tools/eval/eval.py b/tools/eval/eval.py index 3246cfe1..e6ae15dc 100644 --- a/tools/eval/eval.py +++ b/tools/eval/eval.py @@ -122,8 +122,8 @@ mini_supervisely=dict( name="MiniSupervisely", topic="human_segmentation"), - otb=dict( - name="OTB", + otb2015=dict( + name="OTB-2015", topic="object_tracking"), ) From f88eeabd74c8d946a0edd0662651a64c9836fe83 Mon Sep 17 00:00:00 2001 From: Ryan Lee Date: Tue, 19 Mar 2024 03:10:25 -0400 Subject: [PATCH 04/17] README Updates for OTB-2015 --- tools/eval/README.md | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tools/eval/README.md b/tools/eval/README.md index 1665ffbf..ba10f636 100644 --- a/tools/eval/README.md +++ b/tools/eval/README.md @@ -22,7 +22,7 @@ Supported datasets: - [ICDAR](#icdar2003) - [IIIT5K](#iiit5k) - [Mini Supervisely](#mini-supervisely) -- [OTB](#otb) +- [OTB-2015](#otb-2015) ## ImageNet @@ -214,16 +214,19 @@ Run evaluation on quantized model with the following command : python eval.py -m pphumanseg_q -d mini_supervisely -dr /path/to/pphumanseg ``` -## OTB +## OTB-2015 ### Prepare data -Please visit [here](https://drive.google.com/drive/folders/1DZvtlnG9U94cgLD6Yi3eU7r6QZJkjdl-?usp=sharing) to download the OTB dataset and the json file. Please both files together in a directory. +1. The official site is http://cvlab.hanyang.ac.kr/. +2. In case it is down, users can download from the alternative link we provide. + +Alternative Link: TBA ### Evaluation Run evaluation with the following command: ```shell -python eval.py -m vittrack -d otb -dr /path/to/otb +python eval.py -m vittrack -d otb2015 -dr /path/to/otb2015 ``` From 8c07b2d685c9f72f1fdcd5c73601d9067bd4691c Mon Sep 17 00:00:00 2001 From: Ryan Lee Date: Wed, 20 Mar 2024 02:09:42 -0400 Subject: [PATCH 05/17] Updated success to AUC and confirmed functionality --- tools/eval/datasets/__init__.py | 4 ++-- tools/eval/datasets/{otb.py => otb2015.py} | 21 +++++++++++---------- tools/eval/eval.py | 2 +- 3 files changed, 14 insertions(+), 13 deletions(-) rename tools/eval/datasets/{otb.py => otb2015.py} (96%) diff --git a/tools/eval/datasets/__init__.py b/tools/eval/datasets/__init__.py index bb1a5375..9acaafdc 100644 --- a/tools/eval/datasets/__init__.py +++ b/tools/eval/datasets/__init__.py @@ -4,7 +4,7 @@ from .icdar import ICDAR from .iiit5k import IIIT5K from .minisupervisely import MiniSupervisely -from .otb import OTB +from .otb2015 import OTB2015 class Registery: def __init__(self, name): @@ -24,4 +24,4 @@ def register(self, item): DATASETS.register(ICDAR) DATASETS.register(IIIT5K) DATASETS.register(MiniSupervisely) -DATASETS.register(OTB) +DATASETS.register(OTB2015) diff --git a/tools/eval/datasets/otb.py b/tools/eval/datasets/otb2015.py similarity index 96% rename from tools/eval/datasets/otb.py rename to tools/eval/datasets/otb2015.py index dee05701..029e272d 100644 --- a/tools/eval/datasets/otb.py +++ b/tools/eval/datasets/otb2015.py @@ -8,16 +8,17 @@ def overlap_ratio(rect1, rect2): """Calculate the Intersection over Union (IoU) overlap ratio between two sets of rectangles.""" - left = np.maximum(rect1[:, 0], rect2[:, 0]) - right = np.minimum(rect1[:, 0] + rect1[:, 2], rect2[:, 0] + rect2[:, 2]) - top = np.maximum(rect1[:, 1], rect2[:, 1]) - bottom = np.minimum(rect1[:, 1] + rect1[:, 3], rect2[:, 1] + rect2[:, 3]) - intersect = np.maximum(right - left, 0) * np.maximum(bottom - top, 0) - union = rect1[:, 2] * rect1[:, 3] + rect2[:, 2] * rect2[:, 3] - intersect - iou = intersect / union - iou = np.clip(iou, 0, 1) + tl = np.maximum(rect1[:, :2], rect2[:, :2]) + br = np.minimum(rect1[:, :2] + rect1[:, 2:] - 1.0, rect2[:, :2] + rect2[:, 2:] - 1.0) + sz = np.maximum(br - tl + 1.0, 0) + + # Area + intersection = np.prod(sz, axis=1) + union = np.prod(rect1[:, 2:], axis=1) + np.prod(rect2[:, 2:], axis=1) - intersection + iou = np.clip(intersection / union, 0, 1) return iou + def success_overlap(gt_bb, result_bb, n_frame): """Calculate the success rate based on the overlap ratio between ground truth and predicted bounding boxes.""" thresholds_overlap = np.arange(0, 1.05, 0.05) @@ -80,7 +81,7 @@ def show_result(self, success, precision=None, norm_precision=None, show_video_l tracker_names = [x[0] for x in tracker_auc] tracker_name_len = max(max(len(x) for x in success.keys()) + 2, 12) header = ("|{:^" + str(tracker_name_len) + "}|{:^9}|{:^11}|{:^16}|").format( - "Tracker name", "Success", "Precision", "Norm Precision") + "Tracker name", "IOU", "Precision", "Norm Precision") formatter = "|{:^" + str(tracker_name_len) + "}|{:^9.3f}|{:^11.3f}|{:^16.3f}|" print('-' * len(header)) @@ -243,7 +244,7 @@ def get_axis_aligned_bbox(region): cy = y + h / 2 return cx, cy, w, h -class OTB: +class OTB2015: def __init__(self, root): # Go up one if directory is provided root = os.path.abspath(root) diff --git a/tools/eval/eval.py b/tools/eval/eval.py index e6ae15dc..f5d66e9d 100644 --- a/tools/eval/eval.py +++ b/tools/eval/eval.py @@ -123,7 +123,7 @@ name="MiniSupervisely", topic="human_segmentation"), otb2015=dict( - name="OTB-2015", + name="OTB2015", topic="object_tracking"), ) From e7aba825afbd0e1504f232dca057fd314d40d8df Mon Sep 17 00:00:00 2001 From: Ryan Lee Date: Wed, 20 Mar 2024 02:10:40 -0400 Subject: [PATCH 06/17] Updated misnamed IOU to AUC --- tools/eval/datasets/otb2015.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/eval/datasets/otb2015.py b/tools/eval/datasets/otb2015.py index 029e272d..bbcb5674 100644 --- a/tools/eval/datasets/otb2015.py +++ b/tools/eval/datasets/otb2015.py @@ -81,7 +81,7 @@ def show_result(self, success, precision=None, norm_precision=None, show_video_l tracker_names = [x[0] for x in tracker_auc] tracker_name_len = max(max(len(x) for x in success.keys()) + 2, 12) header = ("|{:^" + str(tracker_name_len) + "}|{:^9}|{:^11}|{:^16}|").format( - "Tracker name", "IOU", "Precision", "Norm Precision") + "Tracker name", "AUC", "Precision", "Norm Precision") formatter = "|{:^" + str(tracker_name_len) + "}|{:^9.3f}|{:^11.3f}|{:^16.3f}|" print('-' * len(header)) From 40a3b9b77f2cb54091d36b910806755b1df057ad Mon Sep 17 00:00:00 2001 From: Ryan Lee Date: Wed, 20 Mar 2024 02:25:39 -0400 Subject: [PATCH 07/17] Updated with OpenCV's GDrive Dataset Link --- tools/eval/README.md | 4 +--- tools/eval/datasets/otb2015.py | 26 +++++++++++++------------- 2 files changed, 14 insertions(+), 16 deletions(-) diff --git a/tools/eval/README.md b/tools/eval/README.md index ba10f636..ae42e287 100644 --- a/tools/eval/README.md +++ b/tools/eval/README.md @@ -219,9 +219,7 @@ python eval.py -m pphumanseg_q -d mini_supervisely -dr /path/to/pphumanseg ### Prepare data 1. The official site is http://cvlab.hanyang.ac.kr/. -2. In case it is down, users can download from the alternative link we provide. - -Alternative Link: TBA +2. In case it is down, users can download from the alternative [Google Drive Link](https://drive.google.com/drive/folders/1iTwCQAMgzdWWrlwncOjpshuHvipIWPMN?usp=sharing). ### Evaluation diff --git a/tools/eval/datasets/otb2015.py b/tools/eval/datasets/otb2015.py index bbcb5674..c8c44d07 100644 --- a/tools/eval/datasets/otb2015.py +++ b/tools/eval/datasets/otb2015.py @@ -252,26 +252,26 @@ def __init__(self, root): root = os.path.dirname(root) print(root) - # Unzip the OTB2015.zip file - if os.path.exists(f'{root}/OTB2015.zip'): - os.system(f'unzip -q "{os.path.join(root, "OTB2015.zip")}" -d "{root}"') - os.remove(f'{root}/OTB2015.zip') + # Unzip the OTB100.zip file + if os.path.exists(f'{root}/OTB100.zip'): + os.system(f'unzip -q "{os.path.join(root, "OTB100.zip")}" -d "{root}"') + os.remove(f'{root}/OTB100.zip') # Move the JSON label in if it's outside if os.path.exists(f'{root}/OTB.json'): - os.rename(f'{root}/OTB.json', f'{root}/OTB2015/OTB.json') + os.rename(f'{root}/OTB.json', f'{root}/OTB100/OTB.json') - if os.path.exists(f'{root}/OTB2015'): + if os.path.exists(f'{root}/OTB100'): original_directories = ['Jogging', 'Skating2', 'Human4'] updated_directories = ['Jogging-1', 'Jogging-2', 'Skating2-1', 'Skating2-2', 'Human4-2', 'OTB.json'] - original_exist = all(os.path.exists(f'{root}/OTB2015/{dir}') for dir in original_directories) - updated_exist = all(os.path.exists(f'{root}/OTB2015/{dir}') for dir in updated_directories) + original_exist = all(os.path.exists(f'{root}/OTB100/{dir}') for dir in original_directories) + updated_exist = all(os.path.exists(f'{root}/OTB100/{dir}') for dir in updated_directories) if original_exist: - os.rename(f'{root}/OTB2015/Jogging', f'{root}/OTB2015/Jogging-1') - os.rename(f'{root}/OTB2015/Skating2', f'{root}/OTB2015/Skating2-1') - os.rename(f'{root}/OTB2015/Human4', f'{root}/OTB2015/Human4-2') - os.system(f'cp -r "{root}/OTB2015/Jogging-1" "{root}/OTB2015/Jogging-2"') - os.system(f'cp -r "{root}/OTB2015/Skating2-1" "{root}/OTB2015/Skating2-2"') + os.rename(f'{root}/OTB100/Jogging', f'{root}/OTB100/Jogging-1') + os.rename(f'{root}/OTB100/Skating2', f'{root}/OTB100/Skating2-1') + os.rename(f'{root}/OTB100/Human4', f'{root}/OTB100/Human4-2') + os.system(f'cp -r "{root}/OTB100/Jogging-1" "{root}/OTB100/Jogging-2"') + os.system(f'cp -r "{root}/OTB100/Skating2-1" "{root}/OTB100/Skating2-2"') elif not updated_exist: raise RuntimeError("Not all files needed for setup are present.") From 9b9d4028d7210f7887fe2cca590baf2a93b69ea8 Mon Sep 17 00:00:00 2001 From: Ryan Lee Date: Mon, 1 Apr 2024 03:17:28 -0400 Subject: [PATCH 08/17] Updated to use the GT bbox from the dataset, not a separate JSON file. Removed all moving or renaming or files and directories --- tools/eval/README.md | 14 + tools/eval/datasets/__init__.py | 4 +- tools/eval/datasets/otb100.py | 536 ++++++++++++++++++++++++++++++++ tools/eval/datasets/otb2015.py | 333 -------------------- tools/eval/eval.py | 4 +- 5 files changed, 554 insertions(+), 337 deletions(-) create mode 100644 tools/eval/datasets/otb100.py delete mode 100644 tools/eval/datasets/otb2015.py diff --git a/tools/eval/README.md b/tools/eval/README.md index ae42e287..72824c88 100644 --- a/tools/eval/README.md +++ b/tools/eval/README.md @@ -221,6 +221,20 @@ python eval.py -m pphumanseg_q -d mini_supervisely -dr /path/to/pphumanseg 1. The official site is http://cvlab.hanyang.ac.kr/. 2. In case it is down, users can download from the alternative [Google Drive Link](https://drive.google.com/drive/folders/1iTwCQAMgzdWWrlwncOjpshuHvipIWPMN?usp=sharing). +Download both the `OTB100.zip` and `OTB.json`, organize files as follow: + +```shell +$ tree -L 2 /path/to/otb100 +. +├── Basketball +│   ├── groundtruth_rect.txt +│   └── img +├── ... +├── Woman +└── OTB.json + +``` + ### Evaluation Run evaluation with the following command: diff --git a/tools/eval/datasets/__init__.py b/tools/eval/datasets/__init__.py index 9acaafdc..c6f3c915 100644 --- a/tools/eval/datasets/__init__.py +++ b/tools/eval/datasets/__init__.py @@ -4,7 +4,7 @@ from .icdar import ICDAR from .iiit5k import IIIT5K from .minisupervisely import MiniSupervisely -from .otb2015 import OTB2015 +from .otb100 import OTB100 class Registery: def __init__(self, name): @@ -24,4 +24,4 @@ def register(self, item): DATASETS.register(ICDAR) DATASETS.register(IIIT5K) DATASETS.register(MiniSupervisely) -DATASETS.register(OTB2015) +DATASETS.register(OTB100) diff --git a/tools/eval/datasets/otb100.py b/tools/eval/datasets/otb100.py new file mode 100644 index 00000000..904587f2 --- /dev/null +++ b/tools/eval/datasets/otb100.py @@ -0,0 +1,536 @@ +import os +import json +import numpy as np +import cv2 as cv +from colorama import Style, Fore +from tqdm import tqdm +from multiprocessing import Pool, cpu_count + +def overlap_ratio(rect1, rect2): + """Calculate the Intersection over Union (IoU) overlap ratio between two sets of rectangles.""" + tl = np.maximum(rect1[:, :2], rect2[:, :2]) + br = np.minimum(rect1[:, :2] + rect1[:, 2:] - 1.0, rect2[:, :2] + rect2[:, 2:] - 1.0) + sz = np.maximum(br - tl + 1.0, 0) + + # Area + intersection = np.prod(sz, axis=1) + union = np.prod(rect1[:, 2:], axis=1) + np.prod(rect2[:, 2:], axis=1) - intersection + iou = np.clip(intersection / union, 0, 1) + return iou + + +def success_overlap(gt_bb, result_bb, n_frame): + """Calculate the success rate based on the overlap ratio between ground truth and predicted bounding boxes.""" + thresholds_overlap = np.arange(0, 1.05, 0.05) + success = np.zeros(len(thresholds_overlap)) + mask = np.sum(gt_bb[:, 2:] > 0, axis=1) == 2 + iou = overlap_ratio(gt_bb[mask], result_bb[mask]) + for i, threshold in enumerate(thresholds_overlap): + success[i] = np.sum(iou > threshold) / n_frame + return success + +def success_error(gt_center, result_center, thresholds, n_frame): + """Calculate the success rate based on the error distance between ground truth and predicted bounding box centers.""" + success = np.zeros(len(thresholds)) + mask = np.sum(gt_center > 0, axis=1) == 2 + dist = np.linalg.norm(gt_center[mask] - result_center[mask], axis=1) + for i, threshold in enumerate(thresholds): + success[i] = np.sum(dist <= threshold) / n_frame + return success + +class OPEBenchmark: + def __init__(self, dataset): + self.dataset = dataset + + def convert_bb_to_center(self, bboxes): + """Convert bounding box coordinates to centers.""" + return np.array([(bboxes[:, 0] + (bboxes[:, 2] - 1) / 2), + (bboxes[:, 1] + (bboxes[:, 3] - 1) / 2)]).T + + def convert_bb_to_norm_center(self, bboxes, gt_wh): + """Convert bounding box coordinates to normalized centers.""" + return self.convert_bb_to_center(bboxes) / (gt_wh + 1e-16) + + def evaluate(self, metric): + """Evaluate the tracking performance based on the specified metric.""" + evaluation_ret = {} + for video in self.dataset: + gt_traj = np.array(video.gt_traj) + tracker_traj = np.array(video.load_tracker()) + n_frame = len(gt_traj) + if hasattr(video, 'absent'): + gt_traj = gt_traj[video.absent == 1] + tracker_traj = tracker_traj[video.absent == 1] + if metric == 'success': + evaluation_ret[video.name] = success_overlap(gt_traj, tracker_traj, n_frame) + elif metric == 'precision': + gt_center = self.convert_bb_to_center(gt_traj) + tracker_center = self.convert_bb_to_center(tracker_traj) + thresholds = np.arange(0, 51, 1) + evaluation_ret[video.name] = success_error(gt_center, tracker_center, thresholds, n_frame) + elif metric == 'norm_precision': + gt_center_norm = self.convert_bb_to_norm_center(gt_traj, gt_traj[:, 2:4]) + tracker_center_norm = self.convert_bb_to_norm_center(tracker_traj, gt_traj[:, 2:4]) + thresholds = np.arange(0, 51, 1) / 100 + evaluation_ret[video.name] = success_error(gt_center_norm, tracker_center_norm, thresholds, n_frame) + return {"tracker": evaluation_ret} + + def show_result(self, success, precision=None, norm_precision=None, show_video_level=False, height_threshold=0.6): + tracker_auc = {tracker_name: np.mean(list(scores.values())) for tracker_name, scores in success.items()} + tracker_auc = sorted(tracker_auc.items(), key=lambda x: x[1], reverse=True)[:20] + tracker_names = [x[0] for x in tracker_auc] + tracker_name_len = max(max(len(x) for x in success.keys()) + 2, 12) + header = ("|{:^" + str(tracker_name_len) + "}|{:^9}|{:^11}|{:^16}|").format( + "Tracker name", "AUC", "Precision", "Norm Precision") + formatter = "|{:^" + str(tracker_name_len) + "}|{:^9.3f}|{:^11.3f}|{:^16.3f}|" + + print('-' * len(header)) + print(header) + print('-' * len(header)) + + for tracker_name in tracker_names: + success_score = np.mean(list(success[tracker_name].values())) + precision_score = np.mean(list(precision[tracker_name].values()), axis=0)[20] if precision else 0 + norm_precision_score = np.mean(list(norm_precision[tracker_name].values()), axis=0)[20] if norm_precision else 0 + print(formatter.format(tracker_name, success_score, precision_score, norm_precision_score)) + + print('-' * len(header)) + + if show_video_level and len(success) < 10 and precision and len(precision) < 10: + print("\n\n") + header1 = "|{:^21}|".format("Tracker name") + header2 = "|{:^21}|".format("Video name") + + for tracker_name in success.keys(): + header1 += ("{:^21}|").format(tracker_name) + header2 += "{:^9}|{:^11}|".format("success", "precision") + + print('-' * len(header1)) + print(header1) + print('-' * len(header1)) + print(header2) + print('-' * len(header1)) + + for video, scores in success.items(): + row = "|{:^21}|".format(video) + + for tracker_name in tracker_names: + success_score = np.mean(success[tracker_name][video]) + precision_score = np.mean(precision[tracker_name][video]) + success_str = f'{success_score:.3f}' if success_score < height_threshold else f'{success_score:.3f}' + precision_str = f'{precision_score:.3f}' if precision_score < height_threshold else f'{precision_score:.3f}' + row += f"{success_str:^9}|{precision_str:^11}|" + + print(row) + + print('-' * len(header1)) + +class Video: + def __init__(self, name, root, video_dir, init_rect, img_names, gt_rect, attr): + self.name = name + self.video_dir = video_dir + self.init_rect = init_rect + self.gt_traj = gt_rect + self.attr = attr + self.pred_trajs = {} + self.img_names = [os.path.join(root, x) for x in img_names] + self.imgs = None + img = cv.imread(self.img_names[0]) + assert img is not None, self.img_names[0] + self.width = img.shape[1] + self.height = img.shape[0] + + def __len__(self): + return len(self.img_names) + + def __getitem__(self, idx): + if self.imgs is None: + return cv.imread(self.img_names[idx]), self.gt_traj[idx] + else: + return self.imgs[idx], self.gt_traj[idx] + + def __iter__(self): + for i in range(len(self.img_names)): + if self.imgs is not None: + yield self.imgs[i], self.gt_traj[i] + else: + yield cv.imread(self.img_names[i]), self.gt_traj[i] + + def load_tracker(self): + """Load tracker results from file.""" + traj_file = os.path.join("OTB_results", self.name+'.txt') + if os.path.exists(traj_file): + with open(traj_file, 'r') as f: + pred_traj = [list(map(float, x.strip().split(','))) for x in f.readlines()] + if len(pred_traj) != len(self.gt_traj): + print("tracker", len(pred_traj), len(self.gt_traj), self.name) + else: + return pred_traj + else: + print(traj_file) + +class OTBDATASET: + def __init__(self, root): + meta_data = {} + for sequence_info in sequence_info_list: + sequence_path = sequence_info['path'] + nz = sequence_info['nz'] + ext = sequence_info['ext'] + start_frame = sequence_info['startFrame'] + end_frame = sequence_info['endFrame'] + + init_omit = 0 + if 'initOmit' in sequence_info: + init_omit = sequence_info['initOmit'] + frames = [f'{root}/OTB100/{sequence_path}/{frame_num:0{nz}}.{ext}' for \ + frame_num in range(start_frame+init_omit, end_frame+1)] + + anno_path = f'{root}/OTB100/{sequence_info["anno_path"]}' + + ground_truth_rect = load_text_numpy(str(anno_path), (',', None), np.float64)[init_omit:,:] + meta_data[sequence_info['name']] = {} + meta_data[sequence_info['name']]['video_dir'] = sequence_info['path'] + meta_data[sequence_info['name']]['init_rect'] = ground_truth_rect[0] + meta_data[sequence_info['name']]['img_names'] = frames + meta_data[sequence_info['name']]['gt_rect'] = ground_truth_rect + meta_data[sequence_info['name']]['attr'] = [sequence_info["object_class"]] + + self.data = meta_data + self.root = root + self.videos = {} + pbar = tqdm(meta_data.keys(), desc='Loading OTB', ncols=100) + for video in pbar: + pbar.set_postfix_str(video) + self.videos[video] = Video(video, + self.root, + meta_data[video]['video_dir'], + meta_data[video]['init_rect'], + meta_data[video]['img_names'], + meta_data[video]['gt_rect'], + meta_data[video]['attr']) + self.attr = {'ALL': list(self.videos.keys())} + all_attributes = [x.attr for x in self.videos.values()] + all_attributes = set(sum(all_attributes, [])) + for attr_ in all_attributes: + self.attr[attr_] = [] + for k, v in self.videos.items(): + for attr_ in v.attr: + self.attr[attr_].append(k) + + def __getitem__(self, idx): + if isinstance(idx, str): + return self.videos[idx] + elif isinstance(idx, int): + sorted_keys = sorted(list(self.videos.keys())) + return self.videos[sorted_keys[idx]] + + def __len__(self): + return len(self.videos) + + def __iter__(self): + sorted_keys = sorted(list(self.videos.keys())) + for key in sorted_keys: + yield self.videos[key] + +def get_axis_aligned_bbox(region): + """Converts a region to (cx, cy, w, h) representing an axis-aligned box.""" + nv = region.size + if nv == 8: + cx = np.mean(region[0::2]) + cy = np.mean(region[1::2]) + x1 = min(region[0::2]) + x2 = max(region[0::2]) + y1 = min(region[1::2]) + y2 = max(region[1::2]) + A1 = np.linalg.norm(region[0:2] - region[2:4]) * np.linalg.norm(region[2:4] - region[4:6]) + A2 = (x2 - x1) * (y2 - y1) + s = np.sqrt(A1 / A2) + w = s * (x2 - x1) + 1 + h = s * (y2 - y1) + 1 + else: + x, y, w, h = region + cx = x + w / 2 + cy = y + h / 2 + return cx, cy, w, h + +def load_text_numpy(path, delimiter, dtype): + if isinstance(delimiter, (tuple, list)): + for d in delimiter: + try: + ground_truth_rect = np.loadtxt(path, delimiter=d, dtype=dtype) + return ground_truth_rect + except: + pass + + raise Exception('Could not read file {}'.format(path)) + else: + ground_truth_rect = np.loadtxt(path, delimiter=delimiter, dtype=dtype) + return ground_truth_rect + +class OTB100: + def __init__(self, root): + # Go up one if directory is provided + root = os.path.abspath(root) + if root.endswith("OTB100"): + root = os.path.dirname(root) + + self.dataset = OTBDATASET(root) + + @property + def name(self): + return self.__class__.__name__ + + def eval(self, model): + for v_idx, video in enumerate(self.dataset): + total_time = 0 + pred_bboxes = [] + scores = [] + track_times = [] + + for idx, (img, gt_bbox) in enumerate(video): + img = cv.cvtColor(img, cv.COLOR_BGR2RGB) + tic = cv.getTickCount() + + if idx == 0: + cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox)) + gt_bbox_ = (int(cx - w / 2), int(cy - h / 2), int(w), int(h)) + model.init(img, gt_bbox_) + pred_bbox = gt_bbox_ + scores.append(None) + else: + isLocated, bbox, score = model.infer(img) + pred_bbox = bbox + scores.append(score) + + pred_bboxes.append(pred_bbox) + toc = (cv.getTickCount() - tic) / cv.getTickFrequency() + total_time += toc + track_times.append(toc) + + model_path = os.path.join('OTB_results') + os.makedirs(model_path, exist_ok=True) + result_path = os.path.join(model_path, '{}.txt'.format(video.name)) + print(result_path) + with open(result_path, 'w') as f: + for bbox in pred_bboxes: + f.write(','.join(map(str, bbox)) + '\n') + + avg_fps = len(video) / total_time if total_time > 0 else 0 + print('({:3d}) Video: {:12s} Time: {:5.1f}s Speed: {:3.1f}fps'.format( + v_idx + 1, video.name, total_time, avg_fps)) + + def print_result(self): + benchmark = OPEBenchmark(self.dataset) + num_cores = cpu_count() + evaluation_results = {} + metrics = ["success", "precision", "norm_precision"] + for metric in metrics: + with Pool(processes=min(num_cores, max(1, num_cores - 1))) as pool: + for ret in tqdm(pool.imap_unordered(benchmark.evaluate, [metric], 1), desc=f'eval {metric}', total=1, ncols=100): + evaluation_results[metric] = ret + + benchmark.show_result(**evaluation_results, show_video_level=False) + + +sequence_info_list = [ + {"name": "Basketball", "path": "Basketball/img", "startFrame": 1, "endFrame": 725, "nz": 4, "ext": "jpg", "anno_path": "Basketball/groundtruth_rect.txt", + "object_class": "person"}, + # {"name": "Biker", "path": "Biker/img", "startFrame": 1, "endFrame": 142, "nz": 4, "ext": "jpg", "anno_path": "Biker/groundtruth_rect.txt", + # "object_class": "person head"}, + # {"name": "Bird1", "path": "Bird1/img", "startFrame": 1, "endFrame": 408, "nz": 4, "ext": "jpg", "anno_path": "Bird1/groundtruth_rect.txt", + # "object_class": "bird"}, + # {"name": "Bird2", "path": "Bird2/img", "startFrame": 1, "endFrame": 99, "nz": 4, "ext": "jpg", "anno_path": "Bird2/groundtruth_rect.txt", + # "object_class": "bird"}, + # {"name": "BlurBody", "path": "BlurBody/img", "startFrame": 1, "endFrame": 334, "nz": 4, "ext": "jpg", "anno_path": "BlurBody/groundtruth_rect.txt", + # "object_class": "person"}, + # {"name": "BlurCar1", "path": "BlurCar1/img", "startFrame": 247, "endFrame": 988, "nz": 4, "ext": "jpg", "anno_path": "BlurCar1/groundtruth_rect.txt", + # "object_class": "car"}, + # {"name": "BlurCar2", "path": "BlurCar2/img", "startFrame": 1, "endFrame": 585, "nz": 4, "ext": "jpg", "anno_path": "BlurCar2/groundtruth_rect.txt", + # "object_class": "car"}, + # {"name": "BlurCar3", "path": "BlurCar3/img", "startFrame": 3, "endFrame": 359, "nz": 4, "ext": "jpg", "anno_path": "BlurCar3/groundtruth_rect.txt", + # "object_class": "car"}, + # {"name": "BlurCar4", "path": "BlurCar4/img", "startFrame": 18, "endFrame": 397, "nz": 4, "ext": "jpg", "anno_path": "BlurCar4/groundtruth_rect.txt", + # "object_class": "car"}, + # {"name": "BlurFace", "path": "BlurFace/img", "startFrame": 1, "endFrame": 493, "nz": 4, "ext": "jpg", "anno_path": "BlurFace/groundtruth_rect.txt", + # "object_class": "face"}, + # {"name": "BlurOwl", "path": "BlurOwl/img", "startFrame": 1, "endFrame": 631, "nz": 4, "ext": "jpg", "anno_path": "BlurOwl/groundtruth_rect.txt", + # "object_class": "other"}, + # {"name": "Board", "path": "Board/img", "startFrame": 1, "endFrame": 698, "nz": 5, "ext": "jpg", "anno_path": "Board/groundtruth_rect.txt", + # "object_class": "other"}, + # {"name": "Bolt", "path": "Bolt/img", "startFrame": 1, "endFrame": 350, "nz": 4, "ext": "jpg", "anno_path": "Bolt/groundtruth_rect.txt", + # "object_class": "person"}, + # {"name": "Bolt2", "path": "Bolt2/img", "startFrame": 1, "endFrame": 293, "nz": 4, "ext": "jpg", "anno_path": "Bolt2/groundtruth_rect.txt", + # "object_class": "person"}, + # {"name": "Box", "path": "Box/img", "startFrame": 1, "endFrame": 1161, "nz": 4, "ext": "jpg", "anno_path": "Box/groundtruth_rect.txt", + # "object_class": "other"}, + # {"name": "Boy", "path": "Boy/img", "startFrame": 1, "endFrame": 602, "nz": 4, "ext": "jpg", "anno_path": "Boy/groundtruth_rect.txt", + # "object_class": "face"}, + # {"name": "Car1", "path": "Car1/img", "startFrame": 1, "endFrame": 1020, "nz": 4, "ext": "jpg", "anno_path": "Car1/groundtruth_rect.txt", + # "object_class": "car"}, + # {"name": "Car2", "path": "Car2/img", "startFrame": 1, "endFrame": 913, "nz": 4, "ext": "jpg", "anno_path": "Car2/groundtruth_rect.txt", + # "object_class": "car"}, + # {"name": "Car24", "path": "Car24/img", "startFrame": 1, "endFrame": 3059, "nz": 4, "ext": "jpg", "anno_path": "Car24/groundtruth_rect.txt", + # "object_class": "car"}, + # {"name": "Car4", "path": "Car4/img", "startFrame": 1, "endFrame": 659, "nz": 4, "ext": "jpg", "anno_path": "Car4/groundtruth_rect.txt", + # "object_class": "car"}, + # {"name": "CarDark", "path": "CarDark/img", "startFrame": 1, "endFrame": 393, "nz": 4, "ext": "jpg", "anno_path": "CarDark/groundtruth_rect.txt", + # "object_class": "car"}, + # {"name": "CarScale", "path": "CarScale/img", "startFrame": 1, "endFrame": 252, "nz": 4, "ext": "jpg", "anno_path": "CarScale/groundtruth_rect.txt", + # "object_class": "car"}, + # {"name": "ClifBar", "path": "ClifBar/img", "startFrame": 1, "endFrame": 472, "nz": 4, "ext": "jpg", "anno_path": "ClifBar/groundtruth_rect.txt", + # "object_class": "other"}, + # {"name": "Coke", "path": "Coke/img", "startFrame": 1, "endFrame": 291, "nz": 4, "ext": "jpg", "anno_path": "Coke/groundtruth_rect.txt", + # "object_class": "other"}, + # {"name": "Couple", "path": "Couple/img", "startFrame": 1, "endFrame": 140, "nz": 4, "ext": "jpg", "anno_path": "Couple/groundtruth_rect.txt", + # "object_class": "person"}, + # {"name": "Coupon", "path": "Coupon/img", "startFrame": 1, "endFrame": 327, "nz": 4, "ext": "jpg", "anno_path": "Coupon/groundtruth_rect.txt", + # "object_class": "other"}, + # {"name": "Crossing", "path": "Crossing/img", "startFrame": 1, "endFrame": 120, "nz": 4, "ext": "jpg", "anno_path": "Crossing/groundtruth_rect.txt", + # "object_class": "person"}, + # {"name": "Crowds", "path": "Crowds/img", "startFrame": 1, "endFrame": 347, "nz": 4, "ext": "jpg", "anno_path": "Crowds/groundtruth_rect.txt", + # "object_class": "person"}, + # {"name": "Dancer", "path": "Dancer/img", "startFrame": 1, "endFrame": 225, "nz": 4, "ext": "jpg", "anno_path": "Dancer/groundtruth_rect.txt", + # "object_class": "person"}, + # {"name": "Dancer2", "path": "Dancer2/img", "startFrame": 1, "endFrame": 150, "nz": 4, "ext": "jpg", "anno_path": "Dancer2/groundtruth_rect.txt", + # "object_class": "person"}, + # {"name": "David", "path": "David/img", "startFrame": 300, "endFrame": 770, "nz": 4, "ext": "jpg", "anno_path": "David/groundtruth_rect.txt", + # "object_class": "face"}, + # {"name": "David2", "path": "David2/img", "startFrame": 1, "endFrame": 537, "nz": 4, "ext": "jpg", "anno_path": "David2/groundtruth_rect.txt", + # "object_class": "face"}, + # {"name": "David3", "path": "David3/img", "startFrame": 1, "endFrame": 252, "nz": 4, "ext": "jpg", "anno_path": "David3/groundtruth_rect.txt", + # "object_class": "person"}, + # {"name": "Deer", "path": "Deer/img", "startFrame": 1, "endFrame": 71, "nz": 4, "ext": "jpg", "anno_path": "Deer/groundtruth_rect.txt", + # "object_class": "mammal"}, + # {"name": "Diving", "path": "Diving/img", "startFrame": 1, "endFrame": 215, "nz": 4, "ext": "jpg", "anno_path": "Diving/groundtruth_rect.txt", + # "object_class": "person"}, + # {"name": "Dog", "path": "Dog/img", "startFrame": 1, "endFrame": 127, "nz": 4, "ext": "jpg", "anno_path": "Dog/groundtruth_rect.txt", + # "object_class": "dog"}, + # {"name": "Dog1", "path": "Dog1/img", "startFrame": 1, "endFrame": 1350, "nz": 4, "ext": "jpg", "anno_path": "Dog1/groundtruth_rect.txt", + # "object_class": "dog"}, + # {"name": "Doll", "path": "Doll/img", "startFrame": 1, "endFrame": 3872, "nz": 4, "ext": "jpg", "anno_path": "Doll/groundtruth_rect.txt", + # "object_class": "other"}, + # {"name": "DragonBaby", "path": "DragonBaby/img", "startFrame": 1, "endFrame": 113, "nz": 4, "ext": "jpg", "anno_path": "DragonBaby/groundtruth_rect.txt", + # "object_class": "face"}, + # {"name": "Dudek", "path": "Dudek/img", "startFrame": 1, "endFrame": 1145, "nz": 4, "ext": "jpg", "anno_path": "Dudek/groundtruth_rect.txt", + # "object_class": "face"}, + # {"name": "FaceOcc1", "path": "FaceOcc1/img", "startFrame": 1, "endFrame": 892, "nz": 4, "ext": "jpg", "anno_path": "FaceOcc1/groundtruth_rect.txt", + # "object_class": "face"}, + # {"name": "FaceOcc2", "path": "FaceOcc2/img", "startFrame": 1, "endFrame": 812, "nz": 4, "ext": "jpg", "anno_path": "FaceOcc2/groundtruth_rect.txt", + # "object_class": "face"}, + # {"name": "Fish", "path": "Fish/img", "startFrame": 1, "endFrame": 476, "nz": 4, "ext": "jpg", "anno_path": "Fish/groundtruth_rect.txt", + # "object_class": "other"}, + # {"name": "FleetFace", "path": "FleetFace/img", "startFrame": 1, "endFrame": 707, "nz": 4, "ext": "jpg", "anno_path": "FleetFace/groundtruth_rect.txt", + # "object_class": "face"}, + # {"name": "Football", "path": "Football/img", "startFrame": 1, "endFrame": 362, "nz": 4, "ext": "jpg", "anno_path": "Football/groundtruth_rect.txt", + # "object_class": "person head"}, + # {"name": "Football1", "path": "Football1/img", "startFrame": 1, "endFrame": 74, "nz": 4, "ext": "jpg", "anno_path": "Football1/groundtruth_rect.txt", + # "object_class": "face"}, + # {"name": "Freeman1", "path": "Freeman1/img", "startFrame": 1, "endFrame": 326, "nz": 4, "ext": "jpg", "anno_path": "Freeman1/groundtruth_rect.txt", + # "object_class": "face"}, + # {"name": "Freeman3", "path": "Freeman3/img", "startFrame": 1, "endFrame": 460, "nz": 4, "ext": "jpg", "anno_path": "Freeman3/groundtruth_rect.txt", + # "object_class": "face"}, + # {"name": "Freeman4", "path": "Freeman4/img", "startFrame": 1, "endFrame": 283, "nz": 4, "ext": "jpg", "anno_path": "Freeman4/groundtruth_rect.txt", + # "object_class": "face"}, + # {"name": "Girl", "path": "Girl/img", "startFrame": 1, "endFrame": 500, "nz": 4, "ext": "jpg", "anno_path": "Girl/groundtruth_rect.txt", + # "object_class": "face"}, + # {"name": "Girl2", "path": "Girl2/img", "startFrame": 1, "endFrame": 1500, "nz": 4, "ext": "jpg", "anno_path": "Girl2/groundtruth_rect.txt", + # "object_class": "person"}, + # {"name": "Gym", "path": "Gym/img", "startFrame": 1, "endFrame": 767, "nz": 4, "ext": "jpg", "anno_path": "Gym/groundtruth_rect.txt", + # "object_class": "person"}, + # {"name": "Human2", "path": "Human2/img", "startFrame": 1, "endFrame": 1128, "nz": 4, "ext": "jpg", "anno_path": "Human2/groundtruth_rect.txt", + # "object_class": "person"}, + # {"name": "Human3", "path": "Human3/img", "startFrame": 1, "endFrame": 1698, "nz": 4, "ext": "jpg", "anno_path": "Human3/groundtruth_rect.txt", + # "object_class": "person"}, + # {"name": "Human4", "path": "Human4/img", "startFrame": 1, "endFrame": 667, "nz": 4, "ext": "jpg", "anno_path": "Human4/groundtruth_rect.2.txt", + # "object_class": "person"}, + # {"name": "Human5", "path": "Human5/img", "startFrame": 1, "endFrame": 713, "nz": 4, "ext": "jpg", "anno_path": "Human5/groundtruth_rect.txt", + # "object_class": "person"}, + # {"name": "Human6", "path": "Human6/img", "startFrame": 1, "endFrame": 792, "nz": 4, "ext": "jpg", "anno_path": "Human6/groundtruth_rect.txt", + # "object_class": "person"}, + # {"name": "Human7", "path": "Human7/img", "startFrame": 1, "endFrame": 250, "nz": 4, "ext": "jpg", "anno_path": "Human7/groundtruth_rect.txt", + # "object_class": "person"}, + # {"name": "Human8", "path": "Human8/img", "startFrame": 1, "endFrame": 128, "nz": 4, "ext": "jpg", "anno_path": "Human8/groundtruth_rect.txt", + # "object_class": "person"}, + # {"name": "Human9", "path": "Human9/img", "startFrame": 1, "endFrame": 305, "nz": 4, "ext": "jpg", "anno_path": "Human9/groundtruth_rect.txt", + # "object_class": "person"}, + # {"name": "Ironman", "path": "Ironman/img", "startFrame": 1, "endFrame": 166, "nz": 4, "ext": "jpg", "anno_path": "Ironman/groundtruth_rect.txt", + # "object_class": "person head"}, + # {"name": "Jogging_1", "path": "Jogging/img", "startFrame": 1, "endFrame": 307, "nz": 4, "ext": "jpg", "anno_path": "Jogging/groundtruth_rect.1.txt", + # "object_class": "person"}, + # {"name": "Jogging_2", "path": "Jogging/img", "startFrame": 1, "endFrame": 307, "nz": 4, "ext": "jpg", "anno_path": "Jogging/groundtruth_rect.2.txt", + # "object_class": "person"}, + # {"name": "Jump", "path": "Jump/img", "startFrame": 1, "endFrame": 122, "nz": 4, "ext": "jpg", "anno_path": "Jump/groundtruth_rect.txt", + # "object_class": "person"}, + # {"name": "Jumping", "path": "Jumping/img", "startFrame": 1, "endFrame": 313, "nz": 4, "ext": "jpg", "anno_path": "Jumping/groundtruth_rect.txt", + # "object_class": "face"}, + # {"name": "KiteSurf", "path": "KiteSurf/img", "startFrame": 1, "endFrame": 84, "nz": 4, "ext": "jpg", "anno_path": "KiteSurf/groundtruth_rect.txt", + # "object_class": "face"}, + # {"name": "Lemming", "path": "Lemming/img", "startFrame": 1, "endFrame": 1336, "nz": 4, "ext": "jpg", "anno_path": "Lemming/groundtruth_rect.txt", + # "object_class": "other"}, + # {"name": "Liquor", "path": "Liquor/img", "startFrame": 1, "endFrame": 1741, "nz": 4, "ext": "jpg", "anno_path": "Liquor/groundtruth_rect.txt", + # "object_class": "other"}, + # {"name": "Man", "path": "Man/img", "startFrame": 1, "endFrame": 134, "nz": 4, "ext": "jpg", "anno_path": "Man/groundtruth_rect.txt", + # "object_class": "face"}, + # {"name": "Matrix", "path": "Matrix/img", "startFrame": 1, "endFrame": 100, "nz": 4, "ext": "jpg", "anno_path": "Matrix/groundtruth_rect.txt", + # "object_class": "person head"}, + # {"name": "Mhyang", "path": "Mhyang/img", "startFrame": 1, "endFrame": 1490, "nz": 4, "ext": "jpg", "anno_path": "Mhyang/groundtruth_rect.txt", + # "object_class": "face"}, + # {"name": "MotorRolling", "path": "MotorRolling/img", "startFrame": 1, "endFrame": 164, "nz": 4, "ext": "jpg", "anno_path": "MotorRolling/groundtruth_rect.txt", + # "object_class": "vehicle"}, + # {"name": "MountainBike", "path": "MountainBike/img", "startFrame": 1, "endFrame": 228, "nz": 4, "ext": "jpg", "anno_path": "MountainBike/groundtruth_rect.txt", + # "object_class": "bicycle"}, + # {"name": "Panda", "path": "Panda/img", "startFrame": 1, "endFrame": 1000, "nz": 4, "ext": "jpg", "anno_path": "Panda/groundtruth_rect.txt", + # "object_class": "mammal"}, + # {"name": "RedTeam", "path": "RedTeam/img", "startFrame": 1, "endFrame": 1918, "nz": 4, "ext": "jpg", "anno_path": "RedTeam/groundtruth_rect.txt", + # "object_class": "vehicle"}, + # {"name": "Rubik", "path": "Rubik/img", "startFrame": 1, "endFrame": 1997, "nz": 4, "ext": "jpg", "anno_path": "Rubik/groundtruth_rect.txt", + # "object_class": "other"}, + # {"name": "Shaking", "path": "Shaking/img", "startFrame": 1, "endFrame": 365, "nz": 4, "ext": "jpg", "anno_path": "Shaking/groundtruth_rect.txt", + # "object_class": "face"}, + # {"name": "Singer1", "path": "Singer1/img", "startFrame": 1, "endFrame": 351, "nz": 4, "ext": "jpg", "anno_path": "Singer1/groundtruth_rect.txt", + # "object_class": "person"}, + # {"name": "Singer2", "path": "Singer2/img", "startFrame": 1, "endFrame": 366, "nz": 4, "ext": "jpg", "anno_path": "Singer2/groundtruth_rect.txt", + # "object_class": "person"}, + # {"name": "Skater", "path": "Skater/img", "startFrame": 1, "endFrame": 160, "nz": 4, "ext": "jpg", "anno_path": "Skater/groundtruth_rect.txt", + # "object_class": "person"}, + # {"name": "Skater2", "path": "Skater2/img", "startFrame": 1, "endFrame": 435, "nz": 4, "ext": "jpg", "anno_path": "Skater2/groundtruth_rect.txt", + # "object_class": "person"}, + # {"name": "Skating1", "path": "Skating1/img", "startFrame": 1, "endFrame": 400, "nz": 4, "ext": "jpg", "anno_path": "Skating1/groundtruth_rect.txt", + # "object_class": "person"}, + # {"name": "Skating2_1", "path": "Skating2/img", "startFrame": 1, "endFrame": 473, "nz": 4, "ext": "jpg", "anno_path": "Skating2/groundtruth_rect.1.txt", + # "object_class": "person"}, + # {"name": "Skating2_2", "path": "Skating2/img", "startFrame": 1, "endFrame": 473, "nz": 4, "ext": "jpg", "anno_path": "Skating2/groundtruth_rect.2.txt", + # "object_class": "person"}, + # {"name": "Skiing", "path": "Skiing/img", "startFrame": 1, "endFrame": 81, "nz": 4, "ext": "jpg", "anno_path": "Skiing/groundtruth_rect.txt", + # "object_class": "person"}, + # {"name": "Soccer", "path": "Soccer/img", "startFrame": 1, "endFrame": 392, "nz": 4, "ext": "jpg", "anno_path": "Soccer/groundtruth_rect.txt", + # "object_class": "face"}, + # {"name": "Subway", "path": "Subway/img", "startFrame": 1, "endFrame": 175, "nz": 4, "ext": "jpg", "anno_path": "Subway/groundtruth_rect.txt", + # "object_class": "person"}, + # {"name": "Surfer", "path": "Surfer/img", "startFrame": 1, "endFrame": 376, "nz": 4, "ext": "jpg", "anno_path": "Surfer/groundtruth_rect.txt", + # "object_class": "person head"}, + # {"name": "Suv", "path": "Suv/img", "startFrame": 1, "endFrame": 945, "nz": 4, "ext": "jpg", "anno_path": "Suv/groundtruth_rect.txt", + # "object_class": "car"}, + # {"name": "Sylvester", "path": "Sylvester/img", "startFrame": 1, "endFrame": 1345, "nz": 4, "ext": "jpg", "anno_path": "Sylvester/groundtruth_rect.txt", + # "object_class": "other"}, + # {"name": "Tiger1", "path": "Tiger1/img", "startFrame": 1, "endFrame": 354, "nz": 4, "ext": "jpg", "anno_path": "Tiger1/groundtruth_rect.txt", "initOmit": 5, + # "object_class": "other"}, + # {"name": "Tiger2", "path": "Tiger2/img", "startFrame": 1, "endFrame": 365, "nz": 4, "ext": "jpg", "anno_path": "Tiger2/groundtruth_rect.txt", + # "object_class": "other"}, + # {"name": "Toy", "path": "Toy/img", "startFrame": 1, "endFrame": 271, "nz": 4, "ext": "jpg", "anno_path": "Toy/groundtruth_rect.txt", + # "object_class": "other"}, + # {"name": "Trans", "path": "Trans/img", "startFrame": 1, "endFrame": 124, "nz": 4, "ext": "jpg", "anno_path": "Trans/groundtruth_rect.txt", + # "object_class": "other"}, + # {"name": "Trellis", "path": "Trellis/img", "startFrame": 1, "endFrame": 569, "nz": 4, "ext": "jpg", "anno_path": "Trellis/groundtruth_rect.txt", + # "object_class": "face"}, + # {"name": "Twinnings", "path": "Twinnings/img", "startFrame": 1, "endFrame": 472, "nz": 4, "ext": "jpg", "anno_path": "Twinnings/groundtruth_rect.txt", + # "object_class": "other"}, + # {"name": "Vase", "path": "Vase/img", "startFrame": 1, "endFrame": 271, "nz": 4, "ext": "jpg", "anno_path": "Vase/groundtruth_rect.txt", + # "object_class": "other"}, + # {"name": "Walking", "path": "Walking/img", "startFrame": 1, "endFrame": 412, "nz": 4, "ext": "jpg", "anno_path": "Walking/groundtruth_rect.txt", + # "object_class": "person"}, + # {"name": "Walking2", "path": "Walking2/img", "startFrame": 1, "endFrame": 500, "nz": 4, "ext": "jpg", "anno_path": "Walking2/groundtruth_rect.txt", + # "object_class": "person"}, + # {"name": "Woman", "path": "Woman/img", "startFrame": 1, "endFrame": 597, "nz": 4, "ext": "jpg", "anno_path": "Woman/groundtruth_rect.txt", + # "object_class": "person"} +] diff --git a/tools/eval/datasets/otb2015.py b/tools/eval/datasets/otb2015.py deleted file mode 100644 index c8c44d07..00000000 --- a/tools/eval/datasets/otb2015.py +++ /dev/null @@ -1,333 +0,0 @@ -import os -import json -import numpy as np -import cv2 as cv -from colorama import Style, Fore -from tqdm import tqdm -from multiprocessing import Pool, cpu_count - -def overlap_ratio(rect1, rect2): - """Calculate the Intersection over Union (IoU) overlap ratio between two sets of rectangles.""" - tl = np.maximum(rect1[:, :2], rect2[:, :2]) - br = np.minimum(rect1[:, :2] + rect1[:, 2:] - 1.0, rect2[:, :2] + rect2[:, 2:] - 1.0) - sz = np.maximum(br - tl + 1.0, 0) - - # Area - intersection = np.prod(sz, axis=1) - union = np.prod(rect1[:, 2:], axis=1) + np.prod(rect2[:, 2:], axis=1) - intersection - iou = np.clip(intersection / union, 0, 1) - return iou - - -def success_overlap(gt_bb, result_bb, n_frame): - """Calculate the success rate based on the overlap ratio between ground truth and predicted bounding boxes.""" - thresholds_overlap = np.arange(0, 1.05, 0.05) - success = np.zeros(len(thresholds_overlap)) - mask = np.sum(gt_bb[:, 2:] > 0, axis=1) == 2 - iou = overlap_ratio(gt_bb[mask], result_bb[mask]) - for i, threshold in enumerate(thresholds_overlap): - success[i] = np.sum(iou > threshold) / n_frame - return success - -def success_error(gt_center, result_center, thresholds, n_frame): - """Calculate the success rate based on the error distance between ground truth and predicted bounding box centers.""" - success = np.zeros(len(thresholds)) - mask = np.sum(gt_center > 0, axis=1) == 2 - dist = np.linalg.norm(gt_center[mask] - result_center[mask], axis=1) - for i, threshold in enumerate(thresholds): - success[i] = np.sum(dist <= threshold) / n_frame - return success - -class OPEBenchmark: - def __init__(self, dataset): - self.dataset = dataset - - def convert_bb_to_center(self, bboxes): - """Convert bounding box coordinates to centers.""" - return np.array([(bboxes[:, 0] + (bboxes[:, 2] - 1) / 2), - (bboxes[:, 1] + (bboxes[:, 3] - 1) / 2)]).T - - def convert_bb_to_norm_center(self, bboxes, gt_wh): - """Convert bounding box coordinates to normalized centers.""" - return self.convert_bb_to_center(bboxes) / (gt_wh + 1e-16) - - def evaluate(self, metric): - """Evaluate the tracking performance based on the specified metric.""" - evaluation_ret = {} - for video in self.dataset: - gt_traj = np.array(video.gt_traj) - tracker_traj = np.array(video.load_tracker()) - n_frame = len(gt_traj) - if hasattr(video, 'absent'): - gt_traj = gt_traj[video.absent == 1] - tracker_traj = tracker_traj[video.absent == 1] - if metric == 'success': - evaluation_ret[video.name] = success_overlap(gt_traj, tracker_traj, n_frame) - elif metric == 'precision': - gt_center = self.convert_bb_to_center(gt_traj) - tracker_center = self.convert_bb_to_center(tracker_traj) - thresholds = np.arange(0, 51, 1) - evaluation_ret[video.name] = success_error(gt_center, tracker_center, thresholds, n_frame) - elif metric == 'norm_precision': - gt_center_norm = self.convert_bb_to_norm_center(gt_traj, gt_traj[:, 2:4]) - tracker_center_norm = self.convert_bb_to_norm_center(tracker_traj, gt_traj[:, 2:4]) - thresholds = np.arange(0, 51, 1) / 100 - evaluation_ret[video.name] = success_error(gt_center_norm, tracker_center_norm, thresholds, n_frame) - return {"tracker": evaluation_ret} - - def show_result(self, success, precision=None, norm_precision=None, show_video_level=False, height_threshold=0.6): - tracker_auc = {tracker_name: np.mean(list(scores.values())) for tracker_name, scores in success.items()} - tracker_auc = sorted(tracker_auc.items(), key=lambda x: x[1], reverse=True)[:20] - tracker_names = [x[0] for x in tracker_auc] - tracker_name_len = max(max(len(x) for x in success.keys()) + 2, 12) - header = ("|{:^" + str(tracker_name_len) + "}|{:^9}|{:^11}|{:^16}|").format( - "Tracker name", "AUC", "Precision", "Norm Precision") - formatter = "|{:^" + str(tracker_name_len) + "}|{:^9.3f}|{:^11.3f}|{:^16.3f}|" - - print('-' * len(header)) - print(header) - print('-' * len(header)) - - for tracker_name in tracker_names: - success_score = np.mean(list(success[tracker_name].values())) - precision_score = np.mean(list(precision[tracker_name].values()), axis=0)[20] if precision else 0 - norm_precision_score = np.mean(list(norm_precision[tracker_name].values()), axis=0)[20] if norm_precision else 0 - print(formatter.format(tracker_name, success_score, precision_score, norm_precision_score)) - - print('-' * len(header)) - - if show_video_level and len(success) < 10 and precision and len(precision) < 10: - print("\n\n") - header1 = "|{:^21}|".format("Tracker name") - header2 = "|{:^21}|".format("Video name") - - for tracker_name in success.keys(): - header1 += ("{:^21}|").format(tracker_name) - header2 += "{:^9}|{:^11}|".format("success", "precision") - - print('-' * len(header1)) - print(header1) - print('-' * len(header1)) - print(header2) - print('-' * len(header1)) - - for video, scores in success.items(): - row = "|{:^21}|".format(video) - - for tracker_name in tracker_names: - success_score = np.mean(success[tracker_name][video]) - precision_score = np.mean(precision[tracker_name][video]) - success_str = f'{success_score:.3f}' if success_score < height_threshold else f'{success_score:.3f}' - precision_str = f'{precision_score:.3f}' if precision_score < height_threshold else f'{precision_score:.3f}' - row += f"{success_str:^9}|{precision_str:^11}|" - - print(row) - - print('-' * len(header1)) - -class Video: - def __init__(self, name, root, video_dir, init_rect, img_names, gt_rect, attr): - self.name = name - self.video_dir = video_dir - self.init_rect = init_rect - self.gt_traj = gt_rect - self.attr = attr - self.pred_trajs = {} - self.img_names = [os.path.join(root, x) for x in img_names] - self.imgs = None - img = cv.imread(self.img_names[0]) - assert img is not None, self.img_names[0] - self.width = img.shape[1] - self.height = img.shape[0] - - def __len__(self): - return len(self.img_names) - - def __getitem__(self, idx): - if self.imgs is None: - return cv.imread(self.img_names[idx]), self.gt_traj[idx] - else: - return self.imgs[idx], self.gt_traj[idx] - - def __iter__(self): - for i in range(len(self.img_names)): - if self.imgs is not None: - yield self.imgs[i], self.gt_traj[i] - else: - yield cv.imread(self.img_names[i]), self.gt_traj[i] - - def load_tracker(self): - """Load tracker results from file.""" - traj_file = os.path.join("OTB_results", self.name+'.txt') - if not os.path.exists(traj_file): - txt_names = { - 'FleetFace': 'fleetface.txt', - 'Jogging-1': 'jogging_1.txt', - 'Jogging-2': 'jogging_2.txt', - 'Skating2-1': 'skating2_1.txt', - 'Skating2-2': 'skating2_2.txt', - 'FaceOcc1': 'faceocc1.txt', - 'FaceOcc2': 'faceocc2.txt', - 'Human4-2': 'human4_2.txt' - } - txt_name = txt_names.get(self.name, self.name[0].lower() + self.name[1:] + '.txt') - traj_file = os.path.join("OTB_results", txt_name) - - if os.path.exists(traj_file): - with open(traj_file, 'r') as f: - pred_traj = [list(map(float, x.strip().split(','))) for x in f.readlines()] - if len(pred_traj) != len(self.gt_traj): - print("tracker", len(pred_traj), len(self.gt_traj), self.name) - else: - return pred_traj - else: - print(traj_file) - -class OTBDATASET: - def __init__(self, root): - with open(os.path.join(root, 'OTB.json'), 'r') as f: - meta_data = json.load(f) - self.root = root - self.videos = {} - pbar = tqdm(meta_data.keys(), desc='Loading OTB', ncols=100) - for video in pbar: - pbar.set_postfix_str(video) - self.videos[video] = Video(video, - self.root, - meta_data[video]['video_dir'], - meta_data[video]['init_rect'], - meta_data[video]['img_names'], - meta_data[video]['gt_rect'], - meta_data[video]['attr']) - self.attr = {'ALL': list(self.videos.keys())} - all_attributes = [x.attr for x in self.videos.values()] - all_attributes = set(sum(all_attributes, [])) - for attr_ in all_attributes: - self.attr[attr_] = [] - for k, v in self.videos.items(): - for attr_ in v.attr: - self.attr[attr_].append(k) - - def __getitem__(self, idx): - if isinstance(idx, str): - return self.videos[idx] - elif isinstance(idx, int): - sorted_keys = sorted(list(self.videos.keys())) - return self.videos[sorted_keys[idx]] - - def __len__(self): - return len(self.videos) - - def __iter__(self): - sorted_keys = sorted(list(self.videos.keys())) - for key in sorted_keys: - yield self.videos[key] - -def get_axis_aligned_bbox(region): - """Converts a region to (cx, cy, w, h) representing an axis-aligned box.""" - nv = region.size - if nv == 8: - cx = np.mean(region[0::2]) - cy = np.mean(region[1::2]) - x1 = min(region[0::2]) - x2 = max(region[0::2]) - y1 = min(region[1::2]) - y2 = max(region[1::2]) - A1 = np.linalg.norm(region[0:2] - region[2:4]) * np.linalg.norm(region[2:4] - region[4:6]) - A2 = (x2 - x1) * (y2 - y1) - s = np.sqrt(A1 / A2) - w = s * (x2 - x1) + 1 - h = s * (y2 - y1) + 1 - else: - x, y, w, h = region - cx = x + w / 2 - cy = y + h / 2 - return cx, cy, w, h - -class OTB2015: - def __init__(self, root): - # Go up one if directory is provided - root = os.path.abspath(root) - if root.endswith("OTB2015"): - root = os.path.dirname(root) - print(root) - - # Unzip the OTB100.zip file - if os.path.exists(f'{root}/OTB100.zip'): - os.system(f'unzip -q "{os.path.join(root, "OTB100.zip")}" -d "{root}"') - os.remove(f'{root}/OTB100.zip') - - # Move the JSON label in if it's outside - if os.path.exists(f'{root}/OTB.json'): - os.rename(f'{root}/OTB.json', f'{root}/OTB100/OTB.json') - - if os.path.exists(f'{root}/OTB100'): - original_directories = ['Jogging', 'Skating2', 'Human4'] - updated_directories = ['Jogging-1', 'Jogging-2', 'Skating2-1', 'Skating2-2', 'Human4-2', 'OTB.json'] - original_exist = all(os.path.exists(f'{root}/OTB100/{dir}') for dir in original_directories) - updated_exist = all(os.path.exists(f'{root}/OTB100/{dir}') for dir in updated_directories) - if original_exist: - os.rename(f'{root}/OTB100/Jogging', f'{root}/OTB100/Jogging-1') - os.rename(f'{root}/OTB100/Skating2', f'{root}/OTB100/Skating2-1') - os.rename(f'{root}/OTB100/Human4', f'{root}/OTB100/Human4-2') - os.system(f'cp -r "{root}/OTB100/Jogging-1" "{root}/OTB100/Jogging-2"') - os.system(f'cp -r "{root}/OTB100/Skating2-1" "{root}/OTB100/Skating2-2"') - elif not updated_exist: - raise RuntimeError("Not all files needed for setup are present.") - - self.root = f'{root}/OTB2015' - self.dataset = OTBDATASET(self.root) - - @property - def name(self): - return self.__class__.__name__ - - def eval(self, model): - for v_idx, video in enumerate(self.dataset): - total_time = 0 - pred_bboxes = [] - scores = [] - track_times = [] - - for idx, (img, gt_bbox) in enumerate(video): - img = cv.cvtColor(img, cv.COLOR_BGR2RGB) - tic = cv.getTickCount() - - if idx == 0: - cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox)) - gt_bbox_ = (int(cx - w / 2), int(cy - h / 2), int(w), int(h)) - model.init(img, gt_bbox_) - pred_bbox = gt_bbox_ - scores.append(None) - else: - isLocated, bbox, score = model.infer(img) - pred_bbox = bbox - scores.append(score) - - pred_bboxes.append(pred_bbox) - toc = (cv.getTickCount() - tic) / cv.getTickFrequency() - total_time += toc - track_times.append(toc) - - model_path = os.path.join('OTB_results') - os.makedirs(model_path, exist_ok=True) - result_path = os.path.join(model_path, '{}.txt'.format(video.name)) - with open(result_path, 'w') as f: - for bbox in pred_bboxes: - f.write(','.join(map(str, bbox)) + '\n') - - avg_fps = len(video) / total_time if total_time > 0 else 0 - print('({:3d}) Video: {:12s} Time: {:5.1f}s Speed: {:3.1f}fps'.format( - v_idx + 1, video.name, total_time, avg_fps)) - - def print_result(self): - benchmark = OPEBenchmark(self.dataset) - num_cores = cpu_count() - evaluation_results = {} - metrics = ["success", "precision", "norm_precision"] - for metric in metrics: - with Pool(processes=min(num_cores, max(1, num_cores - 1))) as pool: - for ret in tqdm(pool.imap_unordered(benchmark.evaluate, [metric], 1), desc=f'eval {metric}', total=1, ncols=100): - evaluation_results[metric] = ret - - benchmark.show_result(**evaluation_results, show_video_level=False) diff --git a/tools/eval/eval.py b/tools/eval/eval.py index f5d66e9d..0fd3b553 100644 --- a/tools/eval/eval.py +++ b/tools/eval/eval.py @@ -122,8 +122,8 @@ mini_supervisely=dict( name="MiniSupervisely", topic="human_segmentation"), - otb2015=dict( - name="OTB2015", + otb100=dict( + name="OTB100", topic="object_tracking"), ) From 27694a208a9d82de9ec81a12ddeae1260fb91f1c Mon Sep 17 00:00:00 2001 From: Ryan Lee Date: Mon, 1 Apr 2024 03:23:06 -0400 Subject: [PATCH 09/17] Uncomment all sequence infos --- tools/eval/datasets/otb100.py | 396 +++++++++++++++++----------------- 1 file changed, 198 insertions(+), 198 deletions(-) diff --git a/tools/eval/datasets/otb100.py b/tools/eval/datasets/otb100.py index 904587f2..fa4b0e7e 100644 --- a/tools/eval/datasets/otb100.py +++ b/tools/eval/datasets/otb100.py @@ -335,202 +335,202 @@ def print_result(self): sequence_info_list = [ {"name": "Basketball", "path": "Basketball/img", "startFrame": 1, "endFrame": 725, "nz": 4, "ext": "jpg", "anno_path": "Basketball/groundtruth_rect.txt", "object_class": "person"}, - # {"name": "Biker", "path": "Biker/img", "startFrame": 1, "endFrame": 142, "nz": 4, "ext": "jpg", "anno_path": "Biker/groundtruth_rect.txt", - # "object_class": "person head"}, - # {"name": "Bird1", "path": "Bird1/img", "startFrame": 1, "endFrame": 408, "nz": 4, "ext": "jpg", "anno_path": "Bird1/groundtruth_rect.txt", - # "object_class": "bird"}, - # {"name": "Bird2", "path": "Bird2/img", "startFrame": 1, "endFrame": 99, "nz": 4, "ext": "jpg", "anno_path": "Bird2/groundtruth_rect.txt", - # "object_class": "bird"}, - # {"name": "BlurBody", "path": "BlurBody/img", "startFrame": 1, "endFrame": 334, "nz": 4, "ext": "jpg", "anno_path": "BlurBody/groundtruth_rect.txt", - # "object_class": "person"}, - # {"name": "BlurCar1", "path": "BlurCar1/img", "startFrame": 247, "endFrame": 988, "nz": 4, "ext": "jpg", "anno_path": "BlurCar1/groundtruth_rect.txt", - # "object_class": "car"}, - # {"name": "BlurCar2", "path": "BlurCar2/img", "startFrame": 1, "endFrame": 585, "nz": 4, "ext": "jpg", "anno_path": "BlurCar2/groundtruth_rect.txt", - # "object_class": "car"}, - # {"name": "BlurCar3", "path": "BlurCar3/img", "startFrame": 3, "endFrame": 359, "nz": 4, "ext": "jpg", "anno_path": "BlurCar3/groundtruth_rect.txt", - # "object_class": "car"}, - # {"name": "BlurCar4", "path": "BlurCar4/img", "startFrame": 18, "endFrame": 397, "nz": 4, "ext": "jpg", "anno_path": "BlurCar4/groundtruth_rect.txt", - # "object_class": "car"}, - # {"name": "BlurFace", "path": "BlurFace/img", "startFrame": 1, "endFrame": 493, "nz": 4, "ext": "jpg", "anno_path": "BlurFace/groundtruth_rect.txt", - # "object_class": "face"}, - # {"name": "BlurOwl", "path": "BlurOwl/img", "startFrame": 1, "endFrame": 631, "nz": 4, "ext": "jpg", "anno_path": "BlurOwl/groundtruth_rect.txt", - # "object_class": "other"}, - # {"name": "Board", "path": "Board/img", "startFrame": 1, "endFrame": 698, "nz": 5, "ext": "jpg", "anno_path": "Board/groundtruth_rect.txt", - # "object_class": "other"}, - # {"name": "Bolt", "path": "Bolt/img", "startFrame": 1, "endFrame": 350, "nz": 4, "ext": "jpg", "anno_path": "Bolt/groundtruth_rect.txt", - # "object_class": "person"}, - # {"name": "Bolt2", "path": "Bolt2/img", "startFrame": 1, "endFrame": 293, "nz": 4, "ext": "jpg", "anno_path": "Bolt2/groundtruth_rect.txt", - # "object_class": "person"}, - # {"name": "Box", "path": "Box/img", "startFrame": 1, "endFrame": 1161, "nz": 4, "ext": "jpg", "anno_path": "Box/groundtruth_rect.txt", - # "object_class": "other"}, - # {"name": "Boy", "path": "Boy/img", "startFrame": 1, "endFrame": 602, "nz": 4, "ext": "jpg", "anno_path": "Boy/groundtruth_rect.txt", - # "object_class": "face"}, - # {"name": "Car1", "path": "Car1/img", "startFrame": 1, "endFrame": 1020, "nz": 4, "ext": "jpg", "anno_path": "Car1/groundtruth_rect.txt", - # "object_class": "car"}, - # {"name": "Car2", "path": "Car2/img", "startFrame": 1, "endFrame": 913, "nz": 4, "ext": "jpg", "anno_path": "Car2/groundtruth_rect.txt", - # "object_class": "car"}, - # {"name": "Car24", "path": "Car24/img", "startFrame": 1, "endFrame": 3059, "nz": 4, "ext": "jpg", "anno_path": "Car24/groundtruth_rect.txt", - # "object_class": "car"}, - # {"name": "Car4", "path": "Car4/img", "startFrame": 1, "endFrame": 659, "nz": 4, "ext": "jpg", "anno_path": "Car4/groundtruth_rect.txt", - # "object_class": "car"}, - # {"name": "CarDark", "path": "CarDark/img", "startFrame": 1, "endFrame": 393, "nz": 4, "ext": "jpg", "anno_path": "CarDark/groundtruth_rect.txt", - # "object_class": "car"}, - # {"name": "CarScale", "path": "CarScale/img", "startFrame": 1, "endFrame": 252, "nz": 4, "ext": "jpg", "anno_path": "CarScale/groundtruth_rect.txt", - # "object_class": "car"}, - # {"name": "ClifBar", "path": "ClifBar/img", "startFrame": 1, "endFrame": 472, "nz": 4, "ext": "jpg", "anno_path": "ClifBar/groundtruth_rect.txt", - # "object_class": "other"}, - # {"name": "Coke", "path": "Coke/img", "startFrame": 1, "endFrame": 291, "nz": 4, "ext": "jpg", "anno_path": "Coke/groundtruth_rect.txt", - # "object_class": "other"}, - # {"name": "Couple", "path": "Couple/img", "startFrame": 1, "endFrame": 140, "nz": 4, "ext": "jpg", "anno_path": "Couple/groundtruth_rect.txt", - # "object_class": "person"}, - # {"name": "Coupon", "path": "Coupon/img", "startFrame": 1, "endFrame": 327, "nz": 4, "ext": "jpg", "anno_path": "Coupon/groundtruth_rect.txt", - # "object_class": "other"}, - # {"name": "Crossing", "path": "Crossing/img", "startFrame": 1, "endFrame": 120, "nz": 4, "ext": "jpg", "anno_path": "Crossing/groundtruth_rect.txt", - # "object_class": "person"}, - # {"name": "Crowds", "path": "Crowds/img", "startFrame": 1, "endFrame": 347, "nz": 4, "ext": "jpg", "anno_path": "Crowds/groundtruth_rect.txt", - # "object_class": "person"}, - # {"name": "Dancer", "path": "Dancer/img", "startFrame": 1, "endFrame": 225, "nz": 4, "ext": "jpg", "anno_path": "Dancer/groundtruth_rect.txt", - # "object_class": "person"}, - # {"name": "Dancer2", "path": "Dancer2/img", "startFrame": 1, "endFrame": 150, "nz": 4, "ext": "jpg", "anno_path": "Dancer2/groundtruth_rect.txt", - # "object_class": "person"}, - # {"name": "David", "path": "David/img", "startFrame": 300, "endFrame": 770, "nz": 4, "ext": "jpg", "anno_path": "David/groundtruth_rect.txt", - # "object_class": "face"}, - # {"name": "David2", "path": "David2/img", "startFrame": 1, "endFrame": 537, "nz": 4, "ext": "jpg", "anno_path": "David2/groundtruth_rect.txt", - # "object_class": "face"}, - # {"name": "David3", "path": "David3/img", "startFrame": 1, "endFrame": 252, "nz": 4, "ext": "jpg", "anno_path": "David3/groundtruth_rect.txt", - # "object_class": "person"}, - # {"name": "Deer", "path": "Deer/img", "startFrame": 1, "endFrame": 71, "nz": 4, "ext": "jpg", "anno_path": "Deer/groundtruth_rect.txt", - # "object_class": "mammal"}, - # {"name": "Diving", "path": "Diving/img", "startFrame": 1, "endFrame": 215, "nz": 4, "ext": "jpg", "anno_path": "Diving/groundtruth_rect.txt", - # "object_class": "person"}, - # {"name": "Dog", "path": "Dog/img", "startFrame": 1, "endFrame": 127, "nz": 4, "ext": "jpg", "anno_path": "Dog/groundtruth_rect.txt", - # "object_class": "dog"}, - # {"name": "Dog1", "path": "Dog1/img", "startFrame": 1, "endFrame": 1350, "nz": 4, "ext": "jpg", "anno_path": "Dog1/groundtruth_rect.txt", - # "object_class": "dog"}, - # {"name": "Doll", "path": "Doll/img", "startFrame": 1, "endFrame": 3872, "nz": 4, "ext": "jpg", "anno_path": "Doll/groundtruth_rect.txt", - # "object_class": "other"}, - # {"name": "DragonBaby", "path": "DragonBaby/img", "startFrame": 1, "endFrame": 113, "nz": 4, "ext": "jpg", "anno_path": "DragonBaby/groundtruth_rect.txt", - # "object_class": "face"}, - # {"name": "Dudek", "path": "Dudek/img", "startFrame": 1, "endFrame": 1145, "nz": 4, "ext": "jpg", "anno_path": "Dudek/groundtruth_rect.txt", - # "object_class": "face"}, - # {"name": "FaceOcc1", "path": "FaceOcc1/img", "startFrame": 1, "endFrame": 892, "nz": 4, "ext": "jpg", "anno_path": "FaceOcc1/groundtruth_rect.txt", - # "object_class": "face"}, - # {"name": "FaceOcc2", "path": "FaceOcc2/img", "startFrame": 1, "endFrame": 812, "nz": 4, "ext": "jpg", "anno_path": "FaceOcc2/groundtruth_rect.txt", - # "object_class": "face"}, - # {"name": "Fish", "path": "Fish/img", "startFrame": 1, "endFrame": 476, "nz": 4, "ext": "jpg", "anno_path": "Fish/groundtruth_rect.txt", - # "object_class": "other"}, - # {"name": "FleetFace", "path": "FleetFace/img", "startFrame": 1, "endFrame": 707, "nz": 4, "ext": "jpg", "anno_path": "FleetFace/groundtruth_rect.txt", - # "object_class": "face"}, - # {"name": "Football", "path": "Football/img", "startFrame": 1, "endFrame": 362, "nz": 4, "ext": "jpg", "anno_path": "Football/groundtruth_rect.txt", - # "object_class": "person head"}, - # {"name": "Football1", "path": "Football1/img", "startFrame": 1, "endFrame": 74, "nz": 4, "ext": "jpg", "anno_path": "Football1/groundtruth_rect.txt", - # "object_class": "face"}, - # {"name": "Freeman1", "path": "Freeman1/img", "startFrame": 1, "endFrame": 326, "nz": 4, "ext": "jpg", "anno_path": "Freeman1/groundtruth_rect.txt", - # "object_class": "face"}, - # {"name": "Freeman3", "path": "Freeman3/img", "startFrame": 1, "endFrame": 460, "nz": 4, "ext": "jpg", "anno_path": "Freeman3/groundtruth_rect.txt", - # "object_class": "face"}, - # {"name": "Freeman4", "path": "Freeman4/img", "startFrame": 1, "endFrame": 283, "nz": 4, "ext": "jpg", "anno_path": "Freeman4/groundtruth_rect.txt", - # "object_class": "face"}, - # {"name": "Girl", "path": "Girl/img", "startFrame": 1, "endFrame": 500, "nz": 4, "ext": "jpg", "anno_path": "Girl/groundtruth_rect.txt", - # "object_class": "face"}, - # {"name": "Girl2", "path": "Girl2/img", "startFrame": 1, "endFrame": 1500, "nz": 4, "ext": "jpg", "anno_path": "Girl2/groundtruth_rect.txt", - # "object_class": "person"}, - # {"name": "Gym", "path": "Gym/img", "startFrame": 1, "endFrame": 767, "nz": 4, "ext": "jpg", "anno_path": "Gym/groundtruth_rect.txt", - # "object_class": "person"}, - # {"name": "Human2", "path": "Human2/img", "startFrame": 1, "endFrame": 1128, "nz": 4, "ext": "jpg", "anno_path": "Human2/groundtruth_rect.txt", - # "object_class": "person"}, - # {"name": "Human3", "path": "Human3/img", "startFrame": 1, "endFrame": 1698, "nz": 4, "ext": "jpg", "anno_path": "Human3/groundtruth_rect.txt", - # "object_class": "person"}, - # {"name": "Human4", "path": "Human4/img", "startFrame": 1, "endFrame": 667, "nz": 4, "ext": "jpg", "anno_path": "Human4/groundtruth_rect.2.txt", - # "object_class": "person"}, - # {"name": "Human5", "path": "Human5/img", "startFrame": 1, "endFrame": 713, "nz": 4, "ext": "jpg", "anno_path": "Human5/groundtruth_rect.txt", - # "object_class": "person"}, - # {"name": "Human6", "path": "Human6/img", "startFrame": 1, "endFrame": 792, "nz": 4, "ext": "jpg", "anno_path": "Human6/groundtruth_rect.txt", - # "object_class": "person"}, - # {"name": "Human7", "path": "Human7/img", "startFrame": 1, "endFrame": 250, "nz": 4, "ext": "jpg", "anno_path": "Human7/groundtruth_rect.txt", - # "object_class": "person"}, - # {"name": "Human8", "path": "Human8/img", "startFrame": 1, "endFrame": 128, "nz": 4, "ext": "jpg", "anno_path": "Human8/groundtruth_rect.txt", - # "object_class": "person"}, - # {"name": "Human9", "path": "Human9/img", "startFrame": 1, "endFrame": 305, "nz": 4, "ext": "jpg", "anno_path": "Human9/groundtruth_rect.txt", - # "object_class": "person"}, - # {"name": "Ironman", "path": "Ironman/img", "startFrame": 1, "endFrame": 166, "nz": 4, "ext": "jpg", "anno_path": "Ironman/groundtruth_rect.txt", - # "object_class": "person head"}, - # {"name": "Jogging_1", "path": "Jogging/img", "startFrame": 1, "endFrame": 307, "nz": 4, "ext": "jpg", "anno_path": "Jogging/groundtruth_rect.1.txt", - # "object_class": "person"}, - # {"name": "Jogging_2", "path": "Jogging/img", "startFrame": 1, "endFrame": 307, "nz": 4, "ext": "jpg", "anno_path": "Jogging/groundtruth_rect.2.txt", - # "object_class": "person"}, - # {"name": "Jump", "path": "Jump/img", "startFrame": 1, "endFrame": 122, "nz": 4, "ext": "jpg", "anno_path": "Jump/groundtruth_rect.txt", - # "object_class": "person"}, - # {"name": "Jumping", "path": "Jumping/img", "startFrame": 1, "endFrame": 313, "nz": 4, "ext": "jpg", "anno_path": "Jumping/groundtruth_rect.txt", - # "object_class": "face"}, - # {"name": "KiteSurf", "path": "KiteSurf/img", "startFrame": 1, "endFrame": 84, "nz": 4, "ext": "jpg", "anno_path": "KiteSurf/groundtruth_rect.txt", - # "object_class": "face"}, - # {"name": "Lemming", "path": "Lemming/img", "startFrame": 1, "endFrame": 1336, "nz": 4, "ext": "jpg", "anno_path": "Lemming/groundtruth_rect.txt", - # "object_class": "other"}, - # {"name": "Liquor", "path": "Liquor/img", "startFrame": 1, "endFrame": 1741, "nz": 4, "ext": "jpg", "anno_path": "Liquor/groundtruth_rect.txt", - # "object_class": "other"}, - # {"name": "Man", "path": "Man/img", "startFrame": 1, "endFrame": 134, "nz": 4, "ext": "jpg", "anno_path": "Man/groundtruth_rect.txt", - # "object_class": "face"}, - # {"name": "Matrix", "path": "Matrix/img", "startFrame": 1, "endFrame": 100, "nz": 4, "ext": "jpg", "anno_path": "Matrix/groundtruth_rect.txt", - # "object_class": "person head"}, - # {"name": "Mhyang", "path": "Mhyang/img", "startFrame": 1, "endFrame": 1490, "nz": 4, "ext": "jpg", "anno_path": "Mhyang/groundtruth_rect.txt", - # "object_class": "face"}, - # {"name": "MotorRolling", "path": "MotorRolling/img", "startFrame": 1, "endFrame": 164, "nz": 4, "ext": "jpg", "anno_path": "MotorRolling/groundtruth_rect.txt", - # "object_class": "vehicle"}, - # {"name": "MountainBike", "path": "MountainBike/img", "startFrame": 1, "endFrame": 228, "nz": 4, "ext": "jpg", "anno_path": "MountainBike/groundtruth_rect.txt", - # "object_class": "bicycle"}, - # {"name": "Panda", "path": "Panda/img", "startFrame": 1, "endFrame": 1000, "nz": 4, "ext": "jpg", "anno_path": "Panda/groundtruth_rect.txt", - # "object_class": "mammal"}, - # {"name": "RedTeam", "path": "RedTeam/img", "startFrame": 1, "endFrame": 1918, "nz": 4, "ext": "jpg", "anno_path": "RedTeam/groundtruth_rect.txt", - # "object_class": "vehicle"}, - # {"name": "Rubik", "path": "Rubik/img", "startFrame": 1, "endFrame": 1997, "nz": 4, "ext": "jpg", "anno_path": "Rubik/groundtruth_rect.txt", - # "object_class": "other"}, - # {"name": "Shaking", "path": "Shaking/img", "startFrame": 1, "endFrame": 365, "nz": 4, "ext": "jpg", "anno_path": "Shaking/groundtruth_rect.txt", - # "object_class": "face"}, - # {"name": "Singer1", "path": "Singer1/img", "startFrame": 1, "endFrame": 351, "nz": 4, "ext": "jpg", "anno_path": "Singer1/groundtruth_rect.txt", - # "object_class": "person"}, - # {"name": "Singer2", "path": "Singer2/img", "startFrame": 1, "endFrame": 366, "nz": 4, "ext": "jpg", "anno_path": "Singer2/groundtruth_rect.txt", - # "object_class": "person"}, - # {"name": "Skater", "path": "Skater/img", "startFrame": 1, "endFrame": 160, "nz": 4, "ext": "jpg", "anno_path": "Skater/groundtruth_rect.txt", - # "object_class": "person"}, - # {"name": "Skater2", "path": "Skater2/img", "startFrame": 1, "endFrame": 435, "nz": 4, "ext": "jpg", "anno_path": "Skater2/groundtruth_rect.txt", - # "object_class": "person"}, - # {"name": "Skating1", "path": "Skating1/img", "startFrame": 1, "endFrame": 400, "nz": 4, "ext": "jpg", "anno_path": "Skating1/groundtruth_rect.txt", - # "object_class": "person"}, - # {"name": "Skating2_1", "path": "Skating2/img", "startFrame": 1, "endFrame": 473, "nz": 4, "ext": "jpg", "anno_path": "Skating2/groundtruth_rect.1.txt", - # "object_class": "person"}, - # {"name": "Skating2_2", "path": "Skating2/img", "startFrame": 1, "endFrame": 473, "nz": 4, "ext": "jpg", "anno_path": "Skating2/groundtruth_rect.2.txt", - # "object_class": "person"}, - # {"name": "Skiing", "path": "Skiing/img", "startFrame": 1, "endFrame": 81, "nz": 4, "ext": "jpg", "anno_path": "Skiing/groundtruth_rect.txt", - # "object_class": "person"}, - # {"name": "Soccer", "path": "Soccer/img", "startFrame": 1, "endFrame": 392, "nz": 4, "ext": "jpg", "anno_path": "Soccer/groundtruth_rect.txt", - # "object_class": "face"}, - # {"name": "Subway", "path": "Subway/img", "startFrame": 1, "endFrame": 175, "nz": 4, "ext": "jpg", "anno_path": "Subway/groundtruth_rect.txt", - # "object_class": "person"}, - # {"name": "Surfer", "path": "Surfer/img", "startFrame": 1, "endFrame": 376, "nz": 4, "ext": "jpg", "anno_path": "Surfer/groundtruth_rect.txt", - # "object_class": "person head"}, - # {"name": "Suv", "path": "Suv/img", "startFrame": 1, "endFrame": 945, "nz": 4, "ext": "jpg", "anno_path": "Suv/groundtruth_rect.txt", - # "object_class": "car"}, - # {"name": "Sylvester", "path": "Sylvester/img", "startFrame": 1, "endFrame": 1345, "nz": 4, "ext": "jpg", "anno_path": "Sylvester/groundtruth_rect.txt", - # "object_class": "other"}, - # {"name": "Tiger1", "path": "Tiger1/img", "startFrame": 1, "endFrame": 354, "nz": 4, "ext": "jpg", "anno_path": "Tiger1/groundtruth_rect.txt", "initOmit": 5, - # "object_class": "other"}, - # {"name": "Tiger2", "path": "Tiger2/img", "startFrame": 1, "endFrame": 365, "nz": 4, "ext": "jpg", "anno_path": "Tiger2/groundtruth_rect.txt", - # "object_class": "other"}, - # {"name": "Toy", "path": "Toy/img", "startFrame": 1, "endFrame": 271, "nz": 4, "ext": "jpg", "anno_path": "Toy/groundtruth_rect.txt", - # "object_class": "other"}, - # {"name": "Trans", "path": "Trans/img", "startFrame": 1, "endFrame": 124, "nz": 4, "ext": "jpg", "anno_path": "Trans/groundtruth_rect.txt", - # "object_class": "other"}, - # {"name": "Trellis", "path": "Trellis/img", "startFrame": 1, "endFrame": 569, "nz": 4, "ext": "jpg", "anno_path": "Trellis/groundtruth_rect.txt", - # "object_class": "face"}, - # {"name": "Twinnings", "path": "Twinnings/img", "startFrame": 1, "endFrame": 472, "nz": 4, "ext": "jpg", "anno_path": "Twinnings/groundtruth_rect.txt", - # "object_class": "other"}, - # {"name": "Vase", "path": "Vase/img", "startFrame": 1, "endFrame": 271, "nz": 4, "ext": "jpg", "anno_path": "Vase/groundtruth_rect.txt", - # "object_class": "other"}, - # {"name": "Walking", "path": "Walking/img", "startFrame": 1, "endFrame": 412, "nz": 4, "ext": "jpg", "anno_path": "Walking/groundtruth_rect.txt", - # "object_class": "person"}, - # {"name": "Walking2", "path": "Walking2/img", "startFrame": 1, "endFrame": 500, "nz": 4, "ext": "jpg", "anno_path": "Walking2/groundtruth_rect.txt", - # "object_class": "person"}, - # {"name": "Woman", "path": "Woman/img", "startFrame": 1, "endFrame": 597, "nz": 4, "ext": "jpg", "anno_path": "Woman/groundtruth_rect.txt", - # "object_class": "person"} + {"name": "Biker", "path": "Biker/img", "startFrame": 1, "endFrame": 142, "nz": 4, "ext": "jpg", "anno_path": "Biker/groundtruth_rect.txt", + "object_class": "person head"}, + {"name": "Bird1", "path": "Bird1/img", "startFrame": 1, "endFrame": 408, "nz": 4, "ext": "jpg", "anno_path": "Bird1/groundtruth_rect.txt", + "object_class": "bird"}, + {"name": "Bird2", "path": "Bird2/img", "startFrame": 1, "endFrame": 99, "nz": 4, "ext": "jpg", "anno_path": "Bird2/groundtruth_rect.txt", + "object_class": "bird"}, + {"name": "BlurBody", "path": "BlurBody/img", "startFrame": 1, "endFrame": 334, "nz": 4, "ext": "jpg", "anno_path": "BlurBody/groundtruth_rect.txt", + "object_class": "person"}, + {"name": "BlurCar1", "path": "BlurCar1/img", "startFrame": 247, "endFrame": 988, "nz": 4, "ext": "jpg", "anno_path": "BlurCar1/groundtruth_rect.txt", + "object_class": "car"}, + {"name": "BlurCar2", "path": "BlurCar2/img", "startFrame": 1, "endFrame": 585, "nz": 4, "ext": "jpg", "anno_path": "BlurCar2/groundtruth_rect.txt", + "object_class": "car"}, + {"name": "BlurCar3", "path": "BlurCar3/img", "startFrame": 3, "endFrame": 359, "nz": 4, "ext": "jpg", "anno_path": "BlurCar3/groundtruth_rect.txt", + "object_class": "car"}, + {"name": "BlurCar4", "path": "BlurCar4/img", "startFrame": 18, "endFrame": 397, "nz": 4, "ext": "jpg", "anno_path": "BlurCar4/groundtruth_rect.txt", + "object_class": "car"}, + {"name": "BlurFace", "path": "BlurFace/img", "startFrame": 1, "endFrame": 493, "nz": 4, "ext": "jpg", "anno_path": "BlurFace/groundtruth_rect.txt", + "object_class": "face"}, + {"name": "BlurOwl", "path": "BlurOwl/img", "startFrame": 1, "endFrame": 631, "nz": 4, "ext": "jpg", "anno_path": "BlurOwl/groundtruth_rect.txt", + "object_class": "other"}, + {"name": "Board", "path": "Board/img", "startFrame": 1, "endFrame": 698, "nz": 5, "ext": "jpg", "anno_path": "Board/groundtruth_rect.txt", + "object_class": "other"}, + {"name": "Bolt", "path": "Bolt/img", "startFrame": 1, "endFrame": 350, "nz": 4, "ext": "jpg", "anno_path": "Bolt/groundtruth_rect.txt", + "object_class": "person"}, + {"name": "Bolt2", "path": "Bolt2/img", "startFrame": 1, "endFrame": 293, "nz": 4, "ext": "jpg", "anno_path": "Bolt2/groundtruth_rect.txt", + "object_class": "person"}, + {"name": "Box", "path": "Box/img", "startFrame": 1, "endFrame": 1161, "nz": 4, "ext": "jpg", "anno_path": "Box/groundtruth_rect.txt", + "object_class": "other"}, + {"name": "Boy", "path": "Boy/img", "startFrame": 1, "endFrame": 602, "nz": 4, "ext": "jpg", "anno_path": "Boy/groundtruth_rect.txt", + "object_class": "face"}, + {"name": "Car1", "path": "Car1/img", "startFrame": 1, "endFrame": 1020, "nz": 4, "ext": "jpg", "anno_path": "Car1/groundtruth_rect.txt", + "object_class": "car"}, + {"name": "Car2", "path": "Car2/img", "startFrame": 1, "endFrame": 913, "nz": 4, "ext": "jpg", "anno_path": "Car2/groundtruth_rect.txt", + "object_class": "car"}, + {"name": "Car24", "path": "Car24/img", "startFrame": 1, "endFrame": 3059, "nz": 4, "ext": "jpg", "anno_path": "Car24/groundtruth_rect.txt", + "object_class": "car"}, + {"name": "Car4", "path": "Car4/img", "startFrame": 1, "endFrame": 659, "nz": 4, "ext": "jpg", "anno_path": "Car4/groundtruth_rect.txt", + "object_class": "car"}, + {"name": "CarDark", "path": "CarDark/img", "startFrame": 1, "endFrame": 393, "nz": 4, "ext": "jpg", "anno_path": "CarDark/groundtruth_rect.txt", + "object_class": "car"}, + {"name": "CarScale", "path": "CarScale/img", "startFrame": 1, "endFrame": 252, "nz": 4, "ext": "jpg", "anno_path": "CarScale/groundtruth_rect.txt", + "object_class": "car"}, + {"name": "ClifBar", "path": "ClifBar/img", "startFrame": 1, "endFrame": 472, "nz": 4, "ext": "jpg", "anno_path": "ClifBar/groundtruth_rect.txt", + "object_class": "other"}, + {"name": "Coke", "path": "Coke/img", "startFrame": 1, "endFrame": 291, "nz": 4, "ext": "jpg", "anno_path": "Coke/groundtruth_rect.txt", + "object_class": "other"}, + {"name": "Couple", "path": "Couple/img", "startFrame": 1, "endFrame": 140, "nz": 4, "ext": "jpg", "anno_path": "Couple/groundtruth_rect.txt", + "object_class": "person"}, + {"name": "Coupon", "path": "Coupon/img", "startFrame": 1, "endFrame": 327, "nz": 4, "ext": "jpg", "anno_path": "Coupon/groundtruth_rect.txt", + "object_class": "other"}, + {"name": "Crossing", "path": "Crossing/img", "startFrame": 1, "endFrame": 120, "nz": 4, "ext": "jpg", "anno_path": "Crossing/groundtruth_rect.txt", + "object_class": "person"}, + {"name": "Crowds", "path": "Crowds/img", "startFrame": 1, "endFrame": 347, "nz": 4, "ext": "jpg", "anno_path": "Crowds/groundtruth_rect.txt", + "object_class": "person"}, + {"name": "Dancer", "path": "Dancer/img", "startFrame": 1, "endFrame": 225, "nz": 4, "ext": "jpg", "anno_path": "Dancer/groundtruth_rect.txt", + "object_class": "person"}, + {"name": "Dancer2", "path": "Dancer2/img", "startFrame": 1, "endFrame": 150, "nz": 4, "ext": "jpg", "anno_path": "Dancer2/groundtruth_rect.txt", + "object_class": "person"}, + {"name": "David", "path": "David/img", "startFrame": 300, "endFrame": 770, "nz": 4, "ext": "jpg", "anno_path": "David/groundtruth_rect.txt", + "object_class": "face"}, + {"name": "David2", "path": "David2/img", "startFrame": 1, "endFrame": 537, "nz": 4, "ext": "jpg", "anno_path": "David2/groundtruth_rect.txt", + "object_class": "face"}, + {"name": "David3", "path": "David3/img", "startFrame": 1, "endFrame": 252, "nz": 4, "ext": "jpg", "anno_path": "David3/groundtruth_rect.txt", + "object_class": "person"}, + {"name": "Deer", "path": "Deer/img", "startFrame": 1, "endFrame": 71, "nz": 4, "ext": "jpg", "anno_path": "Deer/groundtruth_rect.txt", + "object_class": "mammal"}, + {"name": "Diving", "path": "Diving/img", "startFrame": 1, "endFrame": 215, "nz": 4, "ext": "jpg", "anno_path": "Diving/groundtruth_rect.txt", + "object_class": "person"}, + {"name": "Dog", "path": "Dog/img", "startFrame": 1, "endFrame": 127, "nz": 4, "ext": "jpg", "anno_path": "Dog/groundtruth_rect.txt", + "object_class": "dog"}, + {"name": "Dog1", "path": "Dog1/img", "startFrame": 1, "endFrame": 1350, "nz": 4, "ext": "jpg", "anno_path": "Dog1/groundtruth_rect.txt", + "object_class": "dog"}, + {"name": "Doll", "path": "Doll/img", "startFrame": 1, "endFrame": 3872, "nz": 4, "ext": "jpg", "anno_path": "Doll/groundtruth_rect.txt", + "object_class": "other"}, + {"name": "DragonBaby", "path": "DragonBaby/img", "startFrame": 1, "endFrame": 113, "nz": 4, "ext": "jpg", "anno_path": "DragonBaby/groundtruth_rect.txt", + "object_class": "face"}, + {"name": "Dudek", "path": "Dudek/img", "startFrame": 1, "endFrame": 1145, "nz": 4, "ext": "jpg", "anno_path": "Dudek/groundtruth_rect.txt", + "object_class": "face"}, + {"name": "FaceOcc1", "path": "FaceOcc1/img", "startFrame": 1, "endFrame": 892, "nz": 4, "ext": "jpg", "anno_path": "FaceOcc1/groundtruth_rect.txt", + "object_class": "face"}, + {"name": "FaceOcc2", "path": "FaceOcc2/img", "startFrame": 1, "endFrame": 812, "nz": 4, "ext": "jpg", "anno_path": "FaceOcc2/groundtruth_rect.txt", + "object_class": "face"}, + {"name": "Fish", "path": "Fish/img", "startFrame": 1, "endFrame": 476, "nz": 4, "ext": "jpg", "anno_path": "Fish/groundtruth_rect.txt", + "object_class": "other"}, + {"name": "FleetFace", "path": "FleetFace/img", "startFrame": 1, "endFrame": 707, "nz": 4, "ext": "jpg", "anno_path": "FleetFace/groundtruth_rect.txt", + "object_class": "face"}, + {"name": "Football", "path": "Football/img", "startFrame": 1, "endFrame": 362, "nz": 4, "ext": "jpg", "anno_path": "Football/groundtruth_rect.txt", + "object_class": "person head"}, + {"name": "Football1", "path": "Football1/img", "startFrame": 1, "endFrame": 74, "nz": 4, "ext": "jpg", "anno_path": "Football1/groundtruth_rect.txt", + "object_class": "face"}, + {"name": "Freeman1", "path": "Freeman1/img", "startFrame": 1, "endFrame": 326, "nz": 4, "ext": "jpg", "anno_path": "Freeman1/groundtruth_rect.txt", + "object_class": "face"}, + {"name": "Freeman3", "path": "Freeman3/img", "startFrame": 1, "endFrame": 460, "nz": 4, "ext": "jpg", "anno_path": "Freeman3/groundtruth_rect.txt", + "object_class": "face"}, + {"name": "Freeman4", "path": "Freeman4/img", "startFrame": 1, "endFrame": 283, "nz": 4, "ext": "jpg", "anno_path": "Freeman4/groundtruth_rect.txt", + "object_class": "face"}, + {"name": "Girl", "path": "Girl/img", "startFrame": 1, "endFrame": 500, "nz": 4, "ext": "jpg", "anno_path": "Girl/groundtruth_rect.txt", + "object_class": "face"}, + {"name": "Girl2", "path": "Girl2/img", "startFrame": 1, "endFrame": 1500, "nz": 4, "ext": "jpg", "anno_path": "Girl2/groundtruth_rect.txt", + "object_class": "person"}, + {"name": "Gym", "path": "Gym/img", "startFrame": 1, "endFrame": 767, "nz": 4, "ext": "jpg", "anno_path": "Gym/groundtruth_rect.txt", + "object_class": "person"}, + {"name": "Human2", "path": "Human2/img", "startFrame": 1, "endFrame": 1128, "nz": 4, "ext": "jpg", "anno_path": "Human2/groundtruth_rect.txt", + "object_class": "person"}, + {"name": "Human3", "path": "Human3/img", "startFrame": 1, "endFrame": 1698, "nz": 4, "ext": "jpg", "anno_path": "Human3/groundtruth_rect.txt", + "object_class": "person"}, + {"name": "Human4", "path": "Human4/img", "startFrame": 1, "endFrame": 667, "nz": 4, "ext": "jpg", "anno_path": "Human4/groundtruth_rect.2.txt", + "object_class": "person"}, + {"name": "Human5", "path": "Human5/img", "startFrame": 1, "endFrame": 713, "nz": 4, "ext": "jpg", "anno_path": "Human5/groundtruth_rect.txt", + "object_class": "person"}, + {"name": "Human6", "path": "Human6/img", "startFrame": 1, "endFrame": 792, "nz": 4, "ext": "jpg", "anno_path": "Human6/groundtruth_rect.txt", + "object_class": "person"}, + {"name": "Human7", "path": "Human7/img", "startFrame": 1, "endFrame": 250, "nz": 4, "ext": "jpg", "anno_path": "Human7/groundtruth_rect.txt", + "object_class": "person"}, + {"name": "Human8", "path": "Human8/img", "startFrame": 1, "endFrame": 128, "nz": 4, "ext": "jpg", "anno_path": "Human8/groundtruth_rect.txt", + "object_class": "person"}, + {"name": "Human9", "path": "Human9/img", "startFrame": 1, "endFrame": 305, "nz": 4, "ext": "jpg", "anno_path": "Human9/groundtruth_rect.txt", + "object_class": "person"}, + {"name": "Ironman", "path": "Ironman/img", "startFrame": 1, "endFrame": 166, "nz": 4, "ext": "jpg", "anno_path": "Ironman/groundtruth_rect.txt", + "object_class": "person head"}, + {"name": "Jogging_1", "path": "Jogging/img", "startFrame": 1, "endFrame": 307, "nz": 4, "ext": "jpg", "anno_path": "Jogging/groundtruth_rect.1.txt", + "object_class": "person"}, + {"name": "Jogging_2", "path": "Jogging/img", "startFrame": 1, "endFrame": 307, "nz": 4, "ext": "jpg", "anno_path": "Jogging/groundtruth_rect.2.txt", + "object_class": "person"}, + {"name": "Jump", "path": "Jump/img", "startFrame": 1, "endFrame": 122, "nz": 4, "ext": "jpg", "anno_path": "Jump/groundtruth_rect.txt", + "object_class": "person"}, + {"name": "Jumping", "path": "Jumping/img", "startFrame": 1, "endFrame": 313, "nz": 4, "ext": "jpg", "anno_path": "Jumping/groundtruth_rect.txt", + "object_class": "face"}, + {"name": "KiteSurf", "path": "KiteSurf/img", "startFrame": 1, "endFrame": 84, "nz": 4, "ext": "jpg", "anno_path": "KiteSurf/groundtruth_rect.txt", + "object_class": "face"}, + {"name": "Lemming", "path": "Lemming/img", "startFrame": 1, "endFrame": 1336, "nz": 4, "ext": "jpg", "anno_path": "Lemming/groundtruth_rect.txt", + "object_class": "other"}, + {"name": "Liquor", "path": "Liquor/img", "startFrame": 1, "endFrame": 1741, "nz": 4, "ext": "jpg", "anno_path": "Liquor/groundtruth_rect.txt", + "object_class": "other"}, + {"name": "Man", "path": "Man/img", "startFrame": 1, "endFrame": 134, "nz": 4, "ext": "jpg", "anno_path": "Man/groundtruth_rect.txt", + "object_class": "face"}, + {"name": "Matrix", "path": "Matrix/img", "startFrame": 1, "endFrame": 100, "nz": 4, "ext": "jpg", "anno_path": "Matrix/groundtruth_rect.txt", + "object_class": "person head"}, + {"name": "Mhyang", "path": "Mhyang/img", "startFrame": 1, "endFrame": 1490, "nz": 4, "ext": "jpg", "anno_path": "Mhyang/groundtruth_rect.txt", + "object_class": "face"}, + {"name": "MotorRolling", "path": "MotorRolling/img", "startFrame": 1, "endFrame": 164, "nz": 4, "ext": "jpg", "anno_path": "MotorRolling/groundtruth_rect.txt", + "object_class": "vehicle"}, + {"name": "MountainBike", "path": "MountainBike/img", "startFrame": 1, "endFrame": 228, "nz": 4, "ext": "jpg", "anno_path": "MountainBike/groundtruth_rect.txt", + "object_class": "bicycle"}, + {"name": "Panda", "path": "Panda/img", "startFrame": 1, "endFrame": 1000, "nz": 4, "ext": "jpg", "anno_path": "Panda/groundtruth_rect.txt", + "object_class": "mammal"}, + {"name": "RedTeam", "path": "RedTeam/img", "startFrame": 1, "endFrame": 1918, "nz": 4, "ext": "jpg", "anno_path": "RedTeam/groundtruth_rect.txt", + "object_class": "vehicle"}, + {"name": "Rubik", "path": "Rubik/img", "startFrame": 1, "endFrame": 1997, "nz": 4, "ext": "jpg", "anno_path": "Rubik/groundtruth_rect.txt", + "object_class": "other"}, + {"name": "Shaking", "path": "Shaking/img", "startFrame": 1, "endFrame": 365, "nz": 4, "ext": "jpg", "anno_path": "Shaking/groundtruth_rect.txt", + "object_class": "face"}, + {"name": "Singer1", "path": "Singer1/img", "startFrame": 1, "endFrame": 351, "nz": 4, "ext": "jpg", "anno_path": "Singer1/groundtruth_rect.txt", + "object_class": "person"}, + {"name": "Singer2", "path": "Singer2/img", "startFrame": 1, "endFrame": 366, "nz": 4, "ext": "jpg", "anno_path": "Singer2/groundtruth_rect.txt", + "object_class": "person"}, + {"name": "Skater", "path": "Skater/img", "startFrame": 1, "endFrame": 160, "nz": 4, "ext": "jpg", "anno_path": "Skater/groundtruth_rect.txt", + "object_class": "person"}, + {"name": "Skater2", "path": "Skater2/img", "startFrame": 1, "endFrame": 435, "nz": 4, "ext": "jpg", "anno_path": "Skater2/groundtruth_rect.txt", + "object_class": "person"}, + {"name": "Skating1", "path": "Skating1/img", "startFrame": 1, "endFrame": 400, "nz": 4, "ext": "jpg", "anno_path": "Skating1/groundtruth_rect.txt", + "object_class": "person"}, + {"name": "Skating2_1", "path": "Skating2/img", "startFrame": 1, "endFrame": 473, "nz": 4, "ext": "jpg", "anno_path": "Skating2/groundtruth_rect.1.txt", + "object_class": "person"}, + {"name": "Skating2_2", "path": "Skating2/img", "startFrame": 1, "endFrame": 473, "nz": 4, "ext": "jpg", "anno_path": "Skating2/groundtruth_rect.2.txt", + "object_class": "person"}, + {"name": "Skiing", "path": "Skiing/img", "startFrame": 1, "endFrame": 81, "nz": 4, "ext": "jpg", "anno_path": "Skiing/groundtruth_rect.txt", + "object_class": "person"}, + {"name": "Soccer", "path": "Soccer/img", "startFrame": 1, "endFrame": 392, "nz": 4, "ext": "jpg", "anno_path": "Soccer/groundtruth_rect.txt", + "object_class": "face"}, + {"name": "Subway", "path": "Subway/img", "startFrame": 1, "endFrame": 175, "nz": 4, "ext": "jpg", "anno_path": "Subway/groundtruth_rect.txt", + "object_class": "person"}, + {"name": "Surfer", "path": "Surfer/img", "startFrame": 1, "endFrame": 376, "nz": 4, "ext": "jpg", "anno_path": "Surfer/groundtruth_rect.txt", + "object_class": "person head"}, + {"name": "Suv", "path": "Suv/img", "startFrame": 1, "endFrame": 945, "nz": 4, "ext": "jpg", "anno_path": "Suv/groundtruth_rect.txt", + "object_class": "car"}, + {"name": "Sylvester", "path": "Sylvester/img", "startFrame": 1, "endFrame": 1345, "nz": 4, "ext": "jpg", "anno_path": "Sylvester/groundtruth_rect.txt", + "object_class": "other"}, + {"name": "Tiger1", "path": "Tiger1/img", "startFrame": 1, "endFrame": 354, "nz": 4, "ext": "jpg", "anno_path": "Tiger1/groundtruth_rect.txt", "initOmit": 5, + "object_class": "other"}, + {"name": "Tiger2", "path": "Tiger2/img", "startFrame": 1, "endFrame": 365, "nz": 4, "ext": "jpg", "anno_path": "Tiger2/groundtruth_rect.txt", + "object_class": "other"}, + {"name": "Toy", "path": "Toy/img", "startFrame": 1, "endFrame": 271, "nz": 4, "ext": "jpg", "anno_path": "Toy/groundtruth_rect.txt", + "object_class": "other"}, + {"name": "Trans", "path": "Trans/img", "startFrame": 1, "endFrame": 124, "nz": 4, "ext": "jpg", "anno_path": "Trans/groundtruth_rect.txt", + "object_class": "other"}, + {"name": "Trellis", "path": "Trellis/img", "startFrame": 1, "endFrame": 569, "nz": 4, "ext": "jpg", "anno_path": "Trellis/groundtruth_rect.txt", + "object_class": "face"}, + {"name": "Twinnings", "path": "Twinnings/img", "startFrame": 1, "endFrame": 472, "nz": 4, "ext": "jpg", "anno_path": "Twinnings/groundtruth_rect.txt", + "object_class": "other"}, + {"name": "Vase", "path": "Vase/img", "startFrame": 1, "endFrame": 271, "nz": 4, "ext": "jpg", "anno_path": "Vase/groundtruth_rect.txt", + "object_class": "other"}, + {"name": "Walking", "path": "Walking/img", "startFrame": 1, "endFrame": 412, "nz": 4, "ext": "jpg", "anno_path": "Walking/groundtruth_rect.txt", + "object_class": "person"}, + {"name": "Walking2", "path": "Walking2/img", "startFrame": 1, "endFrame": 500, "nz": 4, "ext": "jpg", "anno_path": "Walking2/groundtruth_rect.txt", + "object_class": "person"}, + {"name": "Woman", "path": "Woman/img", "startFrame": 1, "endFrame": 597, "nz": 4, "ext": "jpg", "anno_path": "Woman/groundtruth_rect.txt", + "object_class": "person"} ] From e3af11ed96fe3b1a1ff25839604c4135ef15d896 Mon Sep 17 00:00:00 2001 From: Ryan Lee Date: Mon, 1 Apr 2024 03:32:19 -0400 Subject: [PATCH 10/17] Light cleanup of unused variables --- tools/eval/datasets/otb100.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tools/eval/datasets/otb100.py b/tools/eval/datasets/otb100.py index fa4b0e7e..6754348c 100644 --- a/tools/eval/datasets/otb100.py +++ b/tools/eval/datasets/otb100.py @@ -195,14 +195,12 @@ def __init__(self, root): meta_data[sequence_info['name']]['gt_rect'] = ground_truth_rect meta_data[sequence_info['name']]['attr'] = [sequence_info["object_class"]] - self.data = meta_data - self.root = root self.videos = {} pbar = tqdm(meta_data.keys(), desc='Loading OTB', ncols=100) for video in pbar: pbar.set_postfix_str(video) self.videos[video] = Video(video, - self.root, + root, meta_data[video]['video_dir'], meta_data[video]['init_rect'], meta_data[video]['img_names'], From 8f74f73e1dc15500cab7ea6baaffa6984ce519af Mon Sep 17 00:00:00 2001 From: Ryan Lee Date: Mon, 1 Apr 2024 03:34:23 -0400 Subject: [PATCH 11/17] Add citation and slight cleanup --- tools/eval/datasets/otb100.py | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/tools/eval/datasets/otb100.py b/tools/eval/datasets/otb100.py index 6754348c..f48aabc0 100644 --- a/tools/eval/datasets/otb100.py +++ b/tools/eval/datasets/otb100.py @@ -184,10 +184,9 @@ def __init__(self, root): init_omit = sequence_info['initOmit'] frames = [f'{root}/OTB100/{sequence_path}/{frame_num:0{nz}}.{ext}' for \ frame_num in range(start_frame+init_omit, end_frame+1)] - anno_path = f'{root}/OTB100/{sequence_info["anno_path"]}' - ground_truth_rect = load_text_numpy(str(anno_path), (',', None), np.float64)[init_omit:,:] + meta_data[sequence_info['name']] = {} meta_data[sequence_info['name']]['video_dir'] = sequence_info['path'] meta_data[sequence_info['name']]['init_rect'] = ground_truth_rect[0] @@ -252,18 +251,14 @@ def get_axis_aligned_bbox(region): return cx, cy, w, h def load_text_numpy(path, delimiter, dtype): - if isinstance(delimiter, (tuple, list)): - for d in delimiter: - try: - ground_truth_rect = np.loadtxt(path, delimiter=d, dtype=dtype) - return ground_truth_rect - except: - pass - - raise Exception('Could not read file {}'.format(path)) - else: - ground_truth_rect = np.loadtxt(path, delimiter=delimiter, dtype=dtype) - return ground_truth_rect + for d in delimiter: + try: + ground_truth_rect = np.loadtxt(path, delimiter=d, dtype=dtype) + return ground_truth_rect + except: + pass + + raise Exception('Could not read file {}'.format(path)) class OTB100: def __init__(self, root): @@ -329,7 +324,7 @@ def print_result(self): benchmark.show_result(**evaluation_results, show_video_level=False) - +# Sourced from https://github.com/lpylpy0514/VitTracker sequence_info_list = [ {"name": "Basketball", "path": "Basketball/img", "startFrame": 1, "endFrame": 725, "nz": 4, "ext": "jpg", "anno_path": "Basketball/groundtruth_rect.txt", "object_class": "person"}, From 93fb68e3db48c07c02a570bc9fe06b27e2a968bd Mon Sep 17 00:00:00 2001 From: Ryan Lee Date: Mon, 1 Apr 2024 03:47:12 -0400 Subject: [PATCH 12/17] Update README --- tools/eval/README.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tools/eval/README.md b/tools/eval/README.md index 72824c88..fc6643e6 100644 --- a/tools/eval/README.md +++ b/tools/eval/README.md @@ -221,7 +221,7 @@ python eval.py -m pphumanseg_q -d mini_supervisely -dr /path/to/pphumanseg 1. The official site is http://cvlab.hanyang.ac.kr/. 2. In case it is down, users can download from the alternative [Google Drive Link](https://drive.google.com/drive/folders/1iTwCQAMgzdWWrlwncOjpshuHvipIWPMN?usp=sharing). -Download both the `OTB100.zip` and `OTB.json`, organize files as follow: +Download `OTB100.zip` and unzip it in a directory, the tree structure should appear as follows: ```shell $ tree -L 2 /path/to/otb100 @@ -230,8 +230,7 @@ $ tree -L 2 /path/to/otb100 │   ├── groundtruth_rect.txt │   └── img ├── ... -├── Woman -└── OTB.json +└── Woman ``` From 8936ff3233fb40fff0c2a08829c44b20df1dcbd4 Mon Sep 17 00:00:00 2001 From: Ryan Lee Date: Mon, 8 Apr 2024 03:10:29 -0400 Subject: [PATCH 13/17] Update README --- tools/eval/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/eval/README.md b/tools/eval/README.md index fc6643e6..9380a7dd 100644 --- a/tools/eval/README.md +++ b/tools/eval/README.md @@ -22,7 +22,7 @@ Supported datasets: - [ICDAR](#icdar2003) - [IIIT5K](#iiit5k) - [Mini Supervisely](#mini-supervisely) -- [OTB-2015](#otb-2015) +- [OTB-100](#otb-100) ## ImageNet @@ -214,7 +214,7 @@ Run evaluation on quantized model with the following command : python eval.py -m pphumanseg_q -d mini_supervisely -dr /path/to/pphumanseg ``` -## OTB-2015 +## OTB-100 ### Prepare data From b232a9b28b431de8185e67018c636c877e453e5c Mon Sep 17 00:00:00 2001 From: Ryan Lee Date: Mon, 8 Apr 2024 03:32:36 -0400 Subject: [PATCH 14/17] Update progress bars to show the necessary outer loops only --- tools/eval/datasets/otb100.py | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/tools/eval/datasets/otb100.py b/tools/eval/datasets/otb100.py index f48aabc0..1ccea8bc 100644 --- a/tools/eval/datasets/otb100.py +++ b/tools/eval/datasets/otb100.py @@ -195,9 +195,7 @@ def __init__(self, root): meta_data[sequence_info['name']]['attr'] = [sequence_info["object_class"]] self.videos = {} - pbar = tqdm(meta_data.keys(), desc='Loading OTB', ncols=100) - for video in pbar: - pbar.set_postfix_str(video) + for video in meta_data.keys(): self.videos[video] = Video(video, root, meta_data[video]['video_dir'], @@ -274,7 +272,7 @@ def name(self): return self.__class__.__name__ def eval(self, model): - for v_idx, video in enumerate(self.dataset): + for video in tqdm(self.dataset, desc="Evaluating: ", total=100, ncols=100): total_time = 0 pred_bboxes = [] scores = [] @@ -303,15 +301,10 @@ def eval(self, model): model_path = os.path.join('OTB_results') os.makedirs(model_path, exist_ok=True) result_path = os.path.join(model_path, '{}.txt'.format(video.name)) - print(result_path) with open(result_path, 'w') as f: for bbox in pred_bboxes: f.write(','.join(map(str, bbox)) + '\n') - avg_fps = len(video) / total_time if total_time > 0 else 0 - print('({:3d}) Video: {:12s} Time: {:5.1f}s Speed: {:3.1f}fps'.format( - v_idx + 1, video.name, total_time, avg_fps)) - def print_result(self): benchmark = OPEBenchmark(self.dataset) num_cores = cpu_count() @@ -319,7 +312,7 @@ def print_result(self): metrics = ["success", "precision", "norm_precision"] for metric in metrics: with Pool(processes=min(num_cores, max(1, num_cores - 1))) as pool: - for ret in tqdm(pool.imap_unordered(benchmark.evaluate, [metric], 1), desc=f'eval {metric}', total=1, ncols=100): + for ret in pool.imap_unordered(benchmark.evaluate, [metric], 1): evaluation_results[metric] = ret benchmark.show_result(**evaluation_results, show_video_level=False) From 4e575578bf9b2f84b218f3464e2604153643cb5f Mon Sep 17 00:00:00 2001 From: Ryan Lee Date: Mon, 8 Apr 2024 04:22:24 -0400 Subject: [PATCH 15/17] Remove unnecessary code --- tools/eval/datasets/otb100.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/tools/eval/datasets/otb100.py b/tools/eval/datasets/otb100.py index 1ccea8bc..f780b7a9 100644 --- a/tools/eval/datasets/otb100.py +++ b/tools/eval/datasets/otb100.py @@ -273,30 +273,20 @@ def name(self): def eval(self, model): for video in tqdm(self.dataset, desc="Evaluating: ", total=100, ncols=100): - total_time = 0 pred_bboxes = [] - scores = [] - track_times = [] for idx, (img, gt_bbox) in enumerate(video): img = cv.cvtColor(img, cv.COLOR_BGR2RGB) - tic = cv.getTickCount() - if idx == 0: cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox)) gt_bbox_ = (int(cx - w / 2), int(cy - h / 2), int(w), int(h)) model.init(img, gt_bbox_) pred_bbox = gt_bbox_ - scores.append(None) else: isLocated, bbox, score = model.infer(img) pred_bbox = bbox - scores.append(score) pred_bboxes.append(pred_bbox) - toc = (cv.getTickCount() - tic) / cv.getTickFrequency() - total_time += toc - track_times.append(toc) model_path = os.path.join('OTB_results') os.makedirs(model_path, exist_ok=True) From b73105962360c215efcddfe2c546cb191cf9f3c1 Mon Sep 17 00:00:00 2001 From: Ryan Lee Date: Mon, 8 Apr 2024 04:40:57 -0400 Subject: [PATCH 16/17] Instead of saving, put it in a global dictionary --- tools/eval/datasets/otb100.py | 32 ++++++++------------------------ 1 file changed, 8 insertions(+), 24 deletions(-) diff --git a/tools/eval/datasets/otb100.py b/tools/eval/datasets/otb100.py index f780b7a9..af3373b2 100644 --- a/tools/eval/datasets/otb100.py +++ b/tools/eval/datasets/otb100.py @@ -1,24 +1,23 @@ import os -import json import numpy as np import cv2 as cv from colorama import Style, Fore from tqdm import tqdm from multiprocessing import Pool, cpu_count +PRED_BBOXES_DICT = {} + def overlap_ratio(rect1, rect2): """Calculate the Intersection over Union (IoU) overlap ratio between two sets of rectangles.""" tl = np.maximum(rect1[:, :2], rect2[:, :2]) br = np.minimum(rect1[:, :2] + rect1[:, 2:] - 1.0, rect2[:, :2] + rect2[:, 2:] - 1.0) sz = np.maximum(br - tl + 1.0, 0) - # Area intersection = np.prod(sz, axis=1) union = np.prod(rect1[:, 2:], axis=1) + np.prod(rect2[:, 2:], axis=1) - intersection iou = np.clip(intersection / union, 0, 1) return iou - def success_overlap(gt_bb, result_bb, n_frame): """Calculate the success rate based on the overlap ratio between ground truth and predicted bounding boxes.""" thresholds_overlap = np.arange(0, 1.05, 0.05) @@ -157,17 +156,11 @@ def __iter__(self): yield cv.imread(self.img_names[i]), self.gt_traj[i] def load_tracker(self): - """Load tracker results from file.""" - traj_file = os.path.join("OTB_results", self.name+'.txt') - if os.path.exists(traj_file): - with open(traj_file, 'r') as f: - pred_traj = [list(map(float, x.strip().split(','))) for x in f.readlines()] - if len(pred_traj) != len(self.gt_traj): - print("tracker", len(pred_traj), len(self.gt_traj), self.name) - else: - return pred_traj + if self.name in PRED_BBOXES_DICT: + return PRED_BBOXES_DICT[self.name] else: - print(traj_file) + print(f"No prediction found for video {self.name}") + return None class OTBDATASET: def __init__(self, root): @@ -274,7 +267,6 @@ def name(self): def eval(self, model): for video in tqdm(self.dataset, desc="Evaluating: ", total=100, ncols=100): pred_bboxes = [] - for idx, (img, gt_bbox) in enumerate(video): img = cv.cvtColor(img, cv.COLOR_BGR2RGB) if idx == 0: @@ -283,17 +275,9 @@ def eval(self, model): model.init(img, gt_bbox_) pred_bbox = gt_bbox_ else: - isLocated, bbox, score = model.infer(img) - pred_bbox = bbox - + pred_bbox = model.infer(img)[1] pred_bboxes.append(pred_bbox) - - model_path = os.path.join('OTB_results') - os.makedirs(model_path, exist_ok=True) - result_path = os.path.join(model_path, '{}.txt'.format(video.name)) - with open(result_path, 'w') as f: - for bbox in pred_bboxes: - f.write(','.join(map(str, bbox)) + '\n') + PRED_BBOXES_DICT[video.name] = pred_bboxes def print_result(self): benchmark = OPEBenchmark(self.dataset) From 66651cf95f8bf4c67d0fe27252b2d19d31be7947 Mon Sep 17 00:00:00 2001 From: Ryan Lee Date: Thu, 6 Jun 2024 03:03:44 -0400 Subject: [PATCH 17/17] Remove PRED_BBOXES_DICT as a global variable --- tools/eval/datasets/otb100.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/tools/eval/datasets/otb100.py b/tools/eval/datasets/otb100.py index af3373b2..1684ceed 100644 --- a/tools/eval/datasets/otb100.py +++ b/tools/eval/datasets/otb100.py @@ -5,8 +5,6 @@ from tqdm import tqdm from multiprocessing import Pool, cpu_count -PRED_BBOXES_DICT = {} - def overlap_ratio(rect1, rect2): """Calculate the Intersection over Union (IoU) overlap ratio between two sets of rectangles.""" tl = np.maximum(rect1[:, :2], rect2[:, :2]) @@ -125,7 +123,7 @@ def show_result(self, success, precision=None, norm_precision=None, show_video_l print('-' * len(header1)) class Video: - def __init__(self, name, root, video_dir, init_rect, img_names, gt_rect, attr): + def __init__(self, name, root, video_dir, init_rect, img_names, gt_rect, attr, pred_bboxes_dict): self.name = name self.video_dir = video_dir self.init_rect = init_rect @@ -138,6 +136,7 @@ def __init__(self, name, root, video_dir, init_rect, img_names, gt_rect, attr): assert img is not None, self.img_names[0] self.width = img.shape[1] self.height = img.shape[0] + self.pred_bboxes_dict = pred_bboxes_dict def __len__(self): return len(self.img_names) @@ -156,14 +155,15 @@ def __iter__(self): yield cv.imread(self.img_names[i]), self.gt_traj[i] def load_tracker(self): - if self.name in PRED_BBOXES_DICT: - return PRED_BBOXES_DICT[self.name] + if self.name in self.pred_bboxes_dict: + return self.pred_bboxes_dict[self.name] else: print(f"No prediction found for video {self.name}") return None class OTBDATASET: def __init__(self, root): + self.pred_bboxes_dict = {} meta_data = {} for sequence_info in sequence_info_list: sequence_path = sequence_info['path'] @@ -195,7 +195,8 @@ def __init__(self, root): meta_data[video]['init_rect'], meta_data[video]['img_names'], meta_data[video]['gt_rect'], - meta_data[video]['attr']) + meta_data[video]['attr'], + self.pred_bboxes_dict) self.attr = {'ALL': list(self.videos.keys())} all_attributes = [x.attr for x in self.videos.values()] all_attributes = set(sum(all_attributes, [])) @@ -277,7 +278,7 @@ def eval(self, model): else: pred_bbox = model.infer(img)[1] pred_bboxes.append(pred_bbox) - PRED_BBOXES_DICT[video.name] = pred_bboxes + self.dataset.pred_bboxes_dict[video.name] = pred_bboxes def print_result(self): benchmark = OPEBenchmark(self.dataset)