From cfdedf805b6a5fb251dd528290252b6260b9b487 Mon Sep 17 00:00:00 2001
From: Ryan Lee <ryanl1288@hotmail.com>
Date: Wed, 6 Mar 2024 01:09:51 -0500
Subject: [PATCH 01/17] Starting point from previous PR

---
 tools/eval/README.md            |  29 +++
 tools/eval/datasets/__init__.py |   2 +
 tools/eval/datasets/otb.py      | 378 ++++++++++++++++++++++++++++++++
 tools/eval/eval.py              |   9 +
 4 files changed, 418 insertions(+)
 create mode 100644 tools/eval/datasets/otb.py

diff --git a/tools/eval/README.md b/tools/eval/README.md
index 1453d8e8..1b216cd8 100644
--- a/tools/eval/README.md
+++ b/tools/eval/README.md
@@ -22,6 +22,7 @@ Supported datasets:
 - [ICDAR](#icdar2003)
 - [IIIT5K](#iiit5k)
 - [Mini Supervisely](#mini-supervisely)
+- [OTB](#otb)
 
 ## ImageNet
 
@@ -211,4 +212,32 @@ Run evaluation on quantized model with the following command :
 
 ```shell
 python eval.py -m pphumanseg_q -d mini_supervisely -dr /path/to/pphumanseg
+```
+
+## OTB
+
+### Prepare data
+
+Please visit [here](https://drive.google.com/drive/folders/1DZvtlnG9U94cgLD6Yi3eU7r6QZJkjdl-?usp=sharing) to download the OTB dataset and the json file. Organize files as follow:
+
+```shell
+$ tree -L 2 /path/to/imagenet
+.
+├── Basketball
+│   ├── groundtruth_rect.txt
+│   ├── img
+│   │        └── XXXX.jpg
+├── Biker
+│   ├── groundtruth_rect.txt
+│   ├── img
+│   │        └── XXXX.jpg
+└── OTB.json
+```
+
+### Evaluation
+
+Run evaluation with the following command:
+
+```shell
+python eval.py -m dasiamrpn -d otb -dr /path/to/otb
 ```
\ No newline at end of file
diff --git a/tools/eval/datasets/__init__.py b/tools/eval/datasets/__init__.py
index 5ed59faa..bb1a5375 100644
--- a/tools/eval/datasets/__init__.py
+++ b/tools/eval/datasets/__init__.py
@@ -4,6 +4,7 @@
 from .icdar import ICDAR
 from .iiit5k import IIIT5K
 from .minisupervisely import MiniSupervisely
+from .otb import OTB
 
 class Registery:
     def __init__(self, name):
@@ -23,3 +24,4 @@ def register(self, item):
 DATASETS.register(ICDAR)
 DATASETS.register(IIIT5K)
 DATASETS.register(MiniSupervisely)
+DATASETS.register(OTB)
diff --git a/tools/eval/datasets/otb.py b/tools/eval/datasets/otb.py
new file mode 100644
index 00000000..7ebe6c5f
--- /dev/null
+++ b/tools/eval/datasets/otb.py
@@ -0,0 +1,378 @@
+import os
+import json
+import numpy as np
+import cv2 as cv
+from colorama import Style, Fore
+from tqdm import tqdm
+from multiprocessing import Pool
+
+def overlap_ratio(rect1, rect2):
+    '''Compute overlap ratio between two rects
+    Args
+        rect:2d array of N x [x,y,w,h]
+    Return:
+        iou
+    '''
+    left = np.maximum(rect1[:,0], rect2[:,0])
+    right = np.minimum(rect1[:,0]+rect1[:,2], rect2[:,0]+rect2[:,2])
+    top = np.maximum(rect1[:,1], rect2[:,1])
+    bottom = np.minimum(rect1[:,1]+rect1[:,3], rect2[:,1]+rect2[:,3])
+
+    intersect = np.maximum(0,right - left) * np.maximum(0,bottom - top)
+    union = rect1[:,2]*rect1[:,3] + rect2[:,2]*rect2[:,3] - intersect
+    iou = intersect / union
+    iou = np.maximum(np.minimum(1, iou), 0)
+    return iou
+def success_overlap(gt_bb, result_bb, n_frame):
+    thresholds_overlap = np.arange(0, 1.05, 0.05)
+    success = np.zeros(len(thresholds_overlap))
+    iou = np.ones(len(gt_bb)) * (-1)
+    mask = np.sum(gt_bb[:, 2:] > 0, axis=1) == 2
+    iou[mask] = overlap_ratio(gt_bb[mask], result_bb[mask])
+    for i in range(len(thresholds_overlap)):
+        success[i] = np.sum(iou > thresholds_overlap[i]) / float(n_frame)
+    return success
+
+def success_error(gt_center, result_center, thresholds, n_frame):
+    success = np.zeros(len(thresholds))
+    dist = np.ones(len(gt_center)) * (-1)
+    mask = np.sum(gt_center > 0, axis=1) == 2
+    dist[mask] = np.sqrt(np.sum(
+        np.power(gt_center[mask] - result_center[mask], 2), axis=1))
+    for i in range(len(thresholds)):
+        success[i] = np.sum(dist <= thresholds[i]) / float(n_frame)
+    return success
+
+class OPEBenchmark:
+    def __init__(self, dataset):
+        self.dataset = dataset
+
+    def convert_bb_to_center(self, bboxes):
+        return np.array([(bboxes[:, 0] + (bboxes[:, 2] - 1) / 2),
+                         (bboxes[:, 1] + (bboxes[:, 3] - 1) / 2)]).T
+
+    def convert_bb_to_norm_center(self, bboxes, gt_wh):
+        return self.convert_bb_to_center(bboxes) / (gt_wh+1e-16)
+
+    def eval_success(self,tracker):
+        success_ret = {}
+        success_ret_ = {}
+        for video in self.dataset:
+            gt_traj = np.array(video.gt_traj)
+            tracker_traj = video.load_tracker()
+            tracker_traj = np.array(tracker_traj)
+            n_frame = len(gt_traj)
+            if hasattr(video, 'absent'):
+                gt_traj = gt_traj[video.absent == 1]
+                tracker_traj = tracker_traj[video.absent == 1]
+            success_ret_[video.name] = success_overlap(gt_traj, tracker_traj, n_frame)
+        success_ret["tracker"] = success_ret_
+        return success_ret
+
+    def eval_precision(self,tracker):
+        precision_ret = {}
+        precision_ret_ = {}
+        for video in self.dataset:
+            gt_traj = np.array(video.gt_traj)
+            tracker_traj = video.load_tracker()
+            tracker_traj = np.array(tracker_traj)
+            n_frame = len(gt_traj)
+            if hasattr(video, 'absent'):
+                gt_traj = gt_traj[video.absent == 1]
+                tracker_traj = tracker_traj[video.absent == 1]
+            gt_center = self.convert_bb_to_center(gt_traj)
+            tracker_center = self.convert_bb_to_center(tracker_traj)
+            thresholds = np.arange(0, 51, 1)
+            precision_ret_[video.name] = success_error(gt_center, tracker_center,
+                    thresholds, n_frame)
+        precision_ret["tracker"] = precision_ret_
+        return precision_ret
+
+    def eval_norm_precision(self,tracker):
+        norm_precision_ret = {}
+        norm_precision_ret_ = {}
+        for video in self.dataset:
+            gt_traj = np.array(video.gt_traj)
+            tracker_traj = video.load_tracker()
+            tracker_traj = np.array(tracker_traj)
+            n_frame = len(gt_traj)
+            if hasattr(video, 'absent'):
+                gt_traj = gt_traj[video.absent == 1]
+                tracker_traj = tracker_traj[video.absent == 1]
+            gt_center_norm = self.convert_bb_to_norm_center(gt_traj, gt_traj[:, 2:4])
+            tracker_center_norm = self.convert_bb_to_norm_center(tracker_traj, gt_traj[:, 2:4])
+            thresholds = np.arange(0, 51, 1) / 100
+            norm_precision_ret_[video.name] = success_error(gt_center_norm,
+                    tracker_center_norm, thresholds, n_frame)
+        norm_precision_ret["tracker"] = norm_precision_ret_
+        return norm_precision_ret
+
+    def show_result(self, success_ret, precision_ret=None,
+            norm_precision_ret=None, show_video_level=False, helight_threshold=0.6):
+        """pretty print result
+        Args:
+            result: returned dict from function eval
+        """
+        # sort tracker
+        tracker_auc = {}
+        for tracker_name in success_ret.keys():
+            auc = np.mean(list(success_ret[tracker_name].values()))
+            tracker_auc[tracker_name] = auc
+        tracker_auc_ = sorted(tracker_auc.items(),
+                             key=lambda x:x[1],
+                             reverse=True)[:20]
+        tracker_names = [x[0] for x in tracker_auc_]
+
+
+        tracker_name_len = max((max([len(x) for x in success_ret.keys()])+2), 12)
+        header = ("|{:^"+str(tracker_name_len)+"}|{:^9}|{:^16}|{:^11}|").format(
+                "Tracker name", "Success", "Norm Precision", "Precision")
+        formatter = "|{:^"+str(tracker_name_len)+"}|{:^9.3f}|{:^16.3f}|{:^11.3f}|"
+        print('-'*len(header))
+        print(header)
+        print('-'*len(header))
+        for tracker_name in tracker_names:
+            success = tracker_auc[tracker_name]
+            if precision_ret is not None:
+                precision = np.mean(list(precision_ret[tracker_name].values()), axis=0)[20]
+            else:
+                precision = 0
+            if norm_precision_ret is not None:
+                norm_precision = np.mean(list(norm_precision_ret[tracker_name].values()),
+                        axis=0)[20]
+            else:
+                norm_precision = 0
+            print(formatter.format(tracker_name, success, norm_precision, precision))
+        print('-'*len(header))
+
+        if show_video_level and len(success_ret) < 10 \
+                and precision_ret is not None \
+                and len(precision_ret) < 10:
+            print("\n\n")
+            header1 = "|{:^21}|".format("Tracker name")
+            header2 = "|{:^21}|".format("Video name")
+            for tracker_name in success_ret.keys():
+                header1 += ("{:^21}|").format(tracker_name)
+                header2 += "{:^9}|{:^11}|".format("success", "precision")
+            print('-'*len(header1))
+            print(header1)
+            print('-'*len(header1))
+            print(header2)
+            print('-'*len(header1))
+            videos = list(success_ret[tracker_name].keys())
+            for video in videos:
+                row = "|{:^21}|".format(video)
+                for tracker_name in success_ret.keys():
+                    success = np.mean(success_ret[tracker_name][video])
+                    precision = np.mean(precision_ret[tracker_name][video])
+                    success_str = "{:^9.3f}".format(success)
+                    if success < helight_threshold:
+                        row += f'{Fore.RED}{success_str}{Style.RESET_ALL}|'
+                    else:
+                        row += success_str+'|'
+                    precision_str = "{:^11.3f}".format(precision)
+                    if precision < helight_threshold:
+                        row += f'{Fore.RED}{precision_str}{Style.RESET_ALL}|'
+                    else:
+                        row += precision_str+'|'
+                print(row)
+            print('-'*len(header1))
+
+class Video(object):
+    def __init__(self, name, root, video_dir, init_rect, img_names,
+            gt_rect, attr):
+        self.name = name
+        self.video_dir = video_dir
+        self.init_rect = init_rect
+        self.gt_traj = gt_rect
+        self.attr = attr
+        self.pred_trajs = {}
+        self.img_names = [os.path.join(root, x) for x in img_names]
+        self.imgs = None
+        img = cv.imread(self.img_names[0])
+        assert img is not None, self.img_names[0]
+        self.width = img.shape[1]
+        self.height = img.shape[0]
+
+    def __len__(self):
+        return len(self.img_names)
+
+    def __getitem__(self, idx):
+        if self.imgs is None:
+            return cv.imread(self.img_names[idx]), self.gt_traj[idx]
+        else:
+            return self.imgs[idx], self.gt_traj[idx]
+
+    def __iter__(self):
+        for i in range(len(self.img_names)):
+            if self.imgs is not None:
+                yield self.imgs[i], self.gt_traj[i]
+            else:
+                yield cv.imread(self.img_names[i]), self.gt_traj[i]
+    def load_tracker(self):
+        traj_file = os.path.join("OTB_results", self.name+'.txt')
+        if not os.path.exists(traj_file):
+            if self.name == 'FleetFace':
+                txt_name = 'fleetface.txt'
+            elif self.name == 'Jogging-1':
+                txt_name = 'jogging_1.txt'
+            elif self.name == 'Jogging-2':
+                txt_name = 'jogging_2.txt'
+            elif self.name == 'Skating2-1':
+                txt_name = 'skating2_1.txt'
+            elif self.name == 'Skating2-2':
+                txt_name = 'skating2_2.txt'
+            elif self.name == 'FaceOcc1':
+                txt_name = 'faceocc1.txt'
+            elif self.name == 'FaceOcc2':
+                txt_name = 'faceocc2.txt'
+            elif self.name == 'Human4-2':
+                txt_name = 'human4_2.txt'
+            else:
+                txt_name = self.name[0].lower()+self.name[1:]+'.txt'
+            traj_file = os.path.join("OTB_results", txt_name)
+        if os.path.exists(traj_file):
+            with open(traj_file, 'r') as f :
+                pred_traj = [list(map(float, x.strip().split(',')))
+                        for x in f.readlines()]
+                if len(pred_traj) != len(self.gt_traj):
+                    print("tracker", len(pred_traj), len(self.gt_traj), self.name)
+                else:
+                    return pred_traj
+        else:
+            print(traj_file)
+
+
+class OTBDATASET:
+    def __init__(self, root):
+        with open(os.path.join(root, 'OTB.json'), 'r') as f:
+            meta_data = json.load(f)
+        self.root = root
+        # load videos
+        pbar = tqdm(meta_data.keys(), desc='loading OTB', ncols=100)
+        self.videos = {}
+        for video in pbar:
+            pbar.set_postfix_str(video)
+            self.videos[video] = Video(video,
+                                          self.root,
+                                          meta_data[video]['video_dir'],
+                                          meta_data[video]['init_rect'],
+                                          meta_data[video]['img_names'],
+                                          meta_data[video]['gt_rect'],
+                                          meta_data[video]['attr'])
+        # set attr
+        attr = []
+        for x in self.videos.values():
+            attr += x.attr
+        attr = set(attr)
+        self.attr = {}
+        self.attr['ALL'] = list(self.videos.keys())
+        for x in attr:
+            self.attr[x] = []
+        for k, v in self.videos.items():
+            for attr_ in v.attr:
+                self.attr[attr_].append(k)
+
+    def __getitem__(self, idx):
+        if isinstance(idx, str):
+            return self.videos[idx]
+        elif isinstance(idx, int):
+            return self.videos[sorted(list(self.videos.keys()))[idx]]
+
+    def __len__(self):
+        return len(self.videos)
+
+    def __iter__(self):
+        keys = sorted(list(self.videos.keys()))
+        for key in keys:
+            yield self.videos[key]
+
+
+def get_axis_aligned_bbox(region):
+    """ convert region to (cx, cy, w, h) that represent by axis aligned box
+    """
+    nv = region.size
+    if nv == 8:
+        cx = np.mean(region[0::2])
+        cy = np.mean(region[1::2])
+        x1 = min(region[0::2])
+        x2 = max(region[0::2])
+        y1 = min(region[1::2])
+        y2 = max(region[1::2])
+        A1 = np.linalg.norm(region[0:2] - region[2:4]) * \
+            np.linalg.norm(region[2:4] - region[4:6])
+        A2 = (x2 - x1) * (y2 - y1)
+        s = np.sqrt(A1 / A2)
+        w = s * (x2 - x1) + 1
+        h = s * (y2 - y1) + 1
+    else:
+        x = region[0]
+        y = region[1]
+        w = region[2]
+        h = region[3]
+        cx = x+w/2
+        cy = y+h/2
+    return cx, cy, w, h
+
+class OTB:
+
+    def __init__(self, root):
+        self.root = root
+        self.dataset = OTBDATASET(root)
+    @property
+    def name(self):
+        return self.__class__.__name__
+
+    def eval(self, model):
+        for v_idx, video in enumerate(self.dataset):
+            toc = 0
+            pred_bboxes = []
+            scores = []
+            track_times = []
+            for idx, (img, gt_bbox) in enumerate(video):
+                # convert bgr to rgb
+                img = cv.cvtColor(img, cv.COLOR_BGR2RGB)
+                tic = cv.getTickCount()
+                if idx == 0:
+                    cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox))
+                    gt_bbox_ = (int(cx - w / 2), int(cy - h / 2), int(w), int(h))
+                    model.init(img, gt_bbox_)
+                    pred_bbox = gt_bbox_
+                    pred_bboxes.append(pred_bbox)
+                    scores.append(None)
+                else:
+                    isLocated, bbox, score = model.infer(img)
+                    pred_bbox = bbox
+                    pred_bboxes.append(pred_bbox)
+                    scores.append(score)
+                toc += cv.getTickCount() - tic
+                track_times.append((cv.getTickCount() - tic) / cv.getTickFrequency())
+                if idx == 0:
+                    cv.destroyAllWindows()
+            toc /= cv.getTickFrequency()
+            model_path = os.path.join('OTB_results')
+            if not os.path.isdir(model_path):
+                os.makedirs(model_path)
+            result_path = os.path.join(model_path,'{}.txt'.format(video.name))
+            with open(result_path, 'w') as f:
+                for x in pred_bboxes:
+                    f.write(','.join([str(i) for i in x]) + '\n')
+            print('({:3d}) Video: {:12s} Time: {:5.1f}s Speed: {:3.1f}fps'.format(
+                v_idx + 1, video.name, toc, idx / toc))
+
+
+    def get_result(self):
+        return self.top1_acc, self.top5_acc
+
+    def print_result(self):
+        benchmark = OPEBenchmark(self.dataset)
+        success_ret = {}
+        with Pool(processes=1) as pool:
+            for ret in tqdm(pool.imap_unordered(benchmark.eval_success,"tracker"), desc='eval success', total=1, ncols=100):
+                success_ret.update(ret)
+        precision_ret = {}
+        with Pool(processes=1) as pool:
+            for ret in tqdm(pool.imap_unordered(benchmark.eval_precision,"tracker"), desc='eval precision', total=1, ncols=100):
+                precision_ret.update(ret)
+        benchmark.show_result(success_ret, precision_ret,
+                              show_video_level=False)
diff --git a/tools/eval/eval.py b/tools/eval/eval.py
index 6c961cc6..1d9dfb4f 100644
--- a/tools/eval/eval.py
+++ b/tools/eval/eval.py
@@ -95,6 +95,12 @@
         name="PPHumanSeg",
         topic="human_segmentation",
         modelPath=os.path.join(root_dir, "models/human_segmentation_pphumanseg/human_segmentation_pphumanseg_2023mar_int8.onnx")),
+    dasiamrpn=dict(
+        name="DaSiamRPN",
+        topic="object_tracking",
+        kernel_cls1_path=os.path.join(root_dir, "models/object_tracking_dasiamrpn/object_tracking_dasiamrpn_kernel_cls1_2021nov.onnx"),
+        kernel_r1_path=os.path.join(root_dir, "models/object_tracking_dasiamrpn/object_tracking_dasiamrpn_kernel_r1_2021nov.onnx"),
+        model_path=os.path.join(root_dir, "models/object_tracking_dasiamrpn/object_tracking_dasiamrpn_model_2021nov.onnx")),
 )
 
 datasets = dict(
@@ -118,6 +124,9 @@
         mini_supervisely=dict(
             name="MiniSupervisely",
             topic="human_segmentation"),
+        otb=dict(
+            name="OTB",
+            topic="object_tracking"),
 )
 
 def main(args):

From aa1b2908ff2b935778e051f0bebec8377d5eadbd Mon Sep 17 00:00:00 2001
From: Ryan Lee <ryanl1288@hotmail.com>
Date: Wed, 13 Mar 2024 03:55:35 -0400
Subject: [PATCH 02/17] Automatic dataset directory setup handling in init

---
 tools/eval/README.md       |  20 +-
 tools/eval/datasets/otb.py | 388 ++++++++++++++++---------------------
 tools/eval/eval.py         |   8 +-
 3 files changed, 177 insertions(+), 239 deletions(-)

diff --git a/tools/eval/README.md b/tools/eval/README.md
index 1b216cd8..1665ffbf 100644
--- a/tools/eval/README.md
+++ b/tools/eval/README.md
@@ -218,26 +218,12 @@ python eval.py -m pphumanseg_q -d mini_supervisely -dr /path/to/pphumanseg
 
 ### Prepare data
 
-Please visit [here](https://drive.google.com/drive/folders/1DZvtlnG9U94cgLD6Yi3eU7r6QZJkjdl-?usp=sharing) to download the OTB dataset and the json file. Organize files as follow:
-
-```shell
-$ tree -L 2 /path/to/imagenet
-.
-├── Basketball
-│   ├── groundtruth_rect.txt
-│   ├── img
-│   │        └── XXXX.jpg
-├── Biker
-│   ├── groundtruth_rect.txt
-│   ├── img
-│   │        └── XXXX.jpg
-└── OTB.json
-```
+Please visit [here](https://drive.google.com/drive/folders/1DZvtlnG9U94cgLD6Yi3eU7r6QZJkjdl-?usp=sharing) to download the OTB dataset and the json file. Please both files together in a directory.
 
 ### Evaluation
 
 Run evaluation with the following command:
 
 ```shell
-python eval.py -m dasiamrpn -d otb -dr /path/to/otb
-```
\ No newline at end of file
+python eval.py -m vittrack -d otb -dr /path/to/otb
+```
diff --git a/tools/eval/datasets/otb.py b/tools/eval/datasets/otb.py
index 7ebe6c5f..9470003b 100644
--- a/tools/eval/datasets/otb.py
+++ b/tools/eval/datasets/otb.py
@@ -4,43 +4,37 @@
 import cv2 as cv
 from colorama import Style, Fore
 from tqdm import tqdm
-from multiprocessing import Pool
+from multiprocessing import Pool, cpu_count
 
 def overlap_ratio(rect1, rect2):
-    '''Compute overlap ratio between two rects
-    Args
-        rect:2d array of N x [x,y,w,h]
-    Return:
-        iou
-    '''
-    left = np.maximum(rect1[:,0], rect2[:,0])
-    right = np.minimum(rect1[:,0]+rect1[:,2], rect2[:,0]+rect2[:,2])
-    top = np.maximum(rect1[:,1], rect2[:,1])
-    bottom = np.minimum(rect1[:,1]+rect1[:,3], rect2[:,1]+rect2[:,3])
-
-    intersect = np.maximum(0,right - left) * np.maximum(0,bottom - top)
-    union = rect1[:,2]*rect1[:,3] + rect2[:,2]*rect2[:,3] - intersect
+    """Calculate the Intersection over Union (IoU) overlap ratio between two sets of rectangles."""  
+    left = np.maximum(rect1[:, 0], rect2[:, 0])
+    right = np.minimum(rect1[:, 0] + rect1[:, 2], rect2[:, 0] + rect2[:, 2])
+    top = np.maximum(rect1[:, 1], rect2[:, 1])
+    bottom = np.minimum(rect1[:, 1] + rect1[:, 3], rect2[:, 1] + rect2[:, 3])
+    intersect = np.maximum(right - left, 0) * np.maximum(bottom - top, 0)
+    union = rect1[:, 2] * rect1[:, 3] + rect2[:, 2] * rect2[:, 3] - intersect
     iou = intersect / union
-    iou = np.maximum(np.minimum(1, iou), 0)
+    iou = np.clip(iou, 0, 1)
     return iou
+
 def success_overlap(gt_bb, result_bb, n_frame):
+    """Calculate the success rate based on the overlap ratio between ground truth and predicted bounding boxes."""
     thresholds_overlap = np.arange(0, 1.05, 0.05)
     success = np.zeros(len(thresholds_overlap))
-    iou = np.ones(len(gt_bb)) * (-1)
     mask = np.sum(gt_bb[:, 2:] > 0, axis=1) == 2
-    iou[mask] = overlap_ratio(gt_bb[mask], result_bb[mask])
-    for i in range(len(thresholds_overlap)):
-        success[i] = np.sum(iou > thresholds_overlap[i]) / float(n_frame)
+    iou = overlap_ratio(gt_bb[mask], result_bb[mask])
+    for i, threshold in enumerate(thresholds_overlap):
+        success[i] = np.sum(iou > threshold) / n_frame
     return success
 
 def success_error(gt_center, result_center, thresholds, n_frame):
+    """Calculate the success rate based on the error distance between ground truth and predicted bounding box centers."""
     success = np.zeros(len(thresholds))
-    dist = np.ones(len(gt_center)) * (-1)
     mask = np.sum(gt_center > 0, axis=1) == 2
-    dist[mask] = np.sqrt(np.sum(
-        np.power(gt_center[mask] - result_center[mask], 2), axis=1))
-    for i in range(len(thresholds)):
-        success[i] = np.sum(dist <= thresholds[i]) / float(n_frame)
+    dist = np.linalg.norm(gt_center[mask] - result_center[mask], axis=1)
+    for i, threshold in enumerate(thresholds):
+        success[i] = np.sum(dist <= threshold) / n_frame
     return success
 
 class OPEBenchmark:
@@ -48,139 +42,90 @@ def __init__(self, dataset):
         self.dataset = dataset
 
     def convert_bb_to_center(self, bboxes):
+        """Convert bounding box coordinates to centers."""
         return np.array([(bboxes[:, 0] + (bboxes[:, 2] - 1) / 2),
                          (bboxes[:, 1] + (bboxes[:, 3] - 1) / 2)]).T
 
     def convert_bb_to_norm_center(self, bboxes, gt_wh):
-        return self.convert_bb_to_center(bboxes) / (gt_wh+1e-16)
+        """Convert bounding box coordinates to normalized centers."""
+        return self.convert_bb_to_center(bboxes) / (gt_wh + 1e-16)
 
-    def eval_success(self,tracker):
-        success_ret = {}
-        success_ret_ = {}
+    def evaluate(self, metric):
+        """Evaluate the tracking performance based on the specified metric."""
+        evaluation_ret = {}
         for video in self.dataset:
             gt_traj = np.array(video.gt_traj)
-            tracker_traj = video.load_tracker()
-            tracker_traj = np.array(tracker_traj)
+            tracker_traj = np.array(video.load_tracker())
             n_frame = len(gt_traj)
             if hasattr(video, 'absent'):
                 gt_traj = gt_traj[video.absent == 1]
                 tracker_traj = tracker_traj[video.absent == 1]
-            success_ret_[video.name] = success_overlap(gt_traj, tracker_traj, n_frame)
-        success_ret["tracker"] = success_ret_
-        return success_ret
+            if metric == 'success':
+                evaluation_ret[video.name] = success_overlap(gt_traj, tracker_traj, n_frame)
+            elif metric == 'precision':
+                gt_center = self.convert_bb_to_center(gt_traj)
+                tracker_center = self.convert_bb_to_center(tracker_traj)
+                thresholds = np.arange(0, 51, 1)
+                evaluation_ret[video.name] = success_error(gt_center, tracker_center, thresholds, n_frame)
+            elif metric == 'norm_precision':
+                gt_center_norm = self.convert_bb_to_norm_center(gt_traj, gt_traj[:, 2:4])
+                tracker_center_norm = self.convert_bb_to_norm_center(tracker_traj, gt_traj[:, 2:4])
+                thresholds = np.arange(0, 51, 1) / 100
+                evaluation_ret[video.name] = success_error(gt_center_norm, tracker_center_norm, thresholds, n_frame)
+        return {"tracker": evaluation_ret}
 
-    def eval_precision(self,tracker):
-        precision_ret = {}
-        precision_ret_ = {}
-        for video in self.dataset:
-            gt_traj = np.array(video.gt_traj)
-            tracker_traj = video.load_tracker()
-            tracker_traj = np.array(tracker_traj)
-            n_frame = len(gt_traj)
-            if hasattr(video, 'absent'):
-                gt_traj = gt_traj[video.absent == 1]
-                tracker_traj = tracker_traj[video.absent == 1]
-            gt_center = self.convert_bb_to_center(gt_traj)
-            tracker_center = self.convert_bb_to_center(tracker_traj)
-            thresholds = np.arange(0, 51, 1)
-            precision_ret_[video.name] = success_error(gt_center, tracker_center,
-                    thresholds, n_frame)
-        precision_ret["tracker"] = precision_ret_
-        return precision_ret
-
-    def eval_norm_precision(self,tracker):
-        norm_precision_ret = {}
-        norm_precision_ret_ = {}
-        for video in self.dataset:
-            gt_traj = np.array(video.gt_traj)
-            tracker_traj = video.load_tracker()
-            tracker_traj = np.array(tracker_traj)
-            n_frame = len(gt_traj)
-            if hasattr(video, 'absent'):
-                gt_traj = gt_traj[video.absent == 1]
-                tracker_traj = tracker_traj[video.absent == 1]
-            gt_center_norm = self.convert_bb_to_norm_center(gt_traj, gt_traj[:, 2:4])
-            tracker_center_norm = self.convert_bb_to_norm_center(tracker_traj, gt_traj[:, 2:4])
-            thresholds = np.arange(0, 51, 1) / 100
-            norm_precision_ret_[video.name] = success_error(gt_center_norm,
-                    tracker_center_norm, thresholds, n_frame)
-        norm_precision_ret["tracker"] = norm_precision_ret_
-        return norm_precision_ret
-
-    def show_result(self, success_ret, precision_ret=None,
-            norm_precision_ret=None, show_video_level=False, helight_threshold=0.6):
-        """pretty print result
-        Args:
-            result: returned dict from function eval
-        """
-        # sort tracker
-        tracker_auc = {}
-        for tracker_name in success_ret.keys():
-            auc = np.mean(list(success_ret[tracker_name].values()))
-            tracker_auc[tracker_name] = auc
-        tracker_auc_ = sorted(tracker_auc.items(),
-                             key=lambda x:x[1],
-                             reverse=True)[:20]
-        tracker_names = [x[0] for x in tracker_auc_]
-
-
-        tracker_name_len = max((max([len(x) for x in success_ret.keys()])+2), 12)
-        header = ("|{:^"+str(tracker_name_len)+"}|{:^9}|{:^16}|{:^11}|").format(
-                "Tracker name", "Success", "Norm Precision", "Precision")
-        formatter = "|{:^"+str(tracker_name_len)+"}|{:^9.3f}|{:^16.3f}|{:^11.3f}|"
-        print('-'*len(header))
+    def show_result(self, success, precision=None, norm_precision=None, show_video_level=False, height_threshold=0.6):
+        tracker_auc = {tracker_name: np.mean(list(scores.values())) for tracker_name, scores in success.items()}
+        tracker_auc = sorted(tracker_auc.items(), key=lambda x: x[1], reverse=True)[:20]
+        tracker_names = [x[0] for x in tracker_auc]
+        tracker_name_len = max(max(len(x) for x in success.keys()) + 2, 12)
+        header = ("|{:^" + str(tracker_name_len) + "}|{:^9}|{:^11}|{:^16}|").format(
+            "Tracker name", "Success", "Precision", "Norm Precision")
+        formatter = "|{:^" + str(tracker_name_len) + "}|{:^9.3f}|{:^11.3f}|{:^16.3f}|"
+
+        print('-' * len(header))
         print(header)
-        print('-'*len(header))
+        print('-' * len(header))
+
         for tracker_name in tracker_names:
-            success = tracker_auc[tracker_name]
-            if precision_ret is not None:
-                precision = np.mean(list(precision_ret[tracker_name].values()), axis=0)[20]
-            else:
-                precision = 0
-            if norm_precision_ret is not None:
-                norm_precision = np.mean(list(norm_precision_ret[tracker_name].values()),
-                        axis=0)[20]
-            else:
-                norm_precision = 0
-            print(formatter.format(tracker_name, success, norm_precision, precision))
-        print('-'*len(header))
+            success_score = np.mean(list(success[tracker_name].values()))
+            precision_score = np.mean(list(precision[tracker_name].values()), axis=0)[20] if precision else 0
+            norm_precision_score = np.mean(list(norm_precision[tracker_name].values()), axis=0)[20] if norm_precision else 0
+            print(formatter.format(tracker_name, success_score, precision_score, norm_precision_score))
+
+        print('-' * len(header))
 
-        if show_video_level and len(success_ret) < 10 \
-                and precision_ret is not None \
-                and len(precision_ret) < 10:
+        if show_video_level and len(success) < 10 and precision and len(precision) < 10:
             print("\n\n")
             header1 = "|{:^21}|".format("Tracker name")
             header2 = "|{:^21}|".format("Video name")
-            for tracker_name in success_ret.keys():
+
+            for tracker_name in success.keys():
                 header1 += ("{:^21}|").format(tracker_name)
                 header2 += "{:^9}|{:^11}|".format("success", "precision")
-            print('-'*len(header1))
+
+            print('-' * len(header1))
             print(header1)
-            print('-'*len(header1))
+            print('-' * len(header1))
             print(header2)
-            print('-'*len(header1))
-            videos = list(success_ret[tracker_name].keys())
-            for video in videos:
+            print('-' * len(header1))
+
+            for video, scores in success.items():
                 row = "|{:^21}|".format(video)
-                for tracker_name in success_ret.keys():
-                    success = np.mean(success_ret[tracker_name][video])
-                    precision = np.mean(precision_ret[tracker_name][video])
-                    success_str = "{:^9.3f}".format(success)
-                    if success < helight_threshold:
-                        row += f'{Fore.RED}{success_str}{Style.RESET_ALL}|'
-                    else:
-                        row += success_str+'|'
-                    precision_str = "{:^11.3f}".format(precision)
-                    if precision < helight_threshold:
-                        row += f'{Fore.RED}{precision_str}{Style.RESET_ALL}|'
-                    else:
-                        row += precision_str+'|'
+
+                for tracker_name in tracker_names:
+                    success_score = np.mean(success[tracker_name][video])
+                    precision_score = np.mean(precision[tracker_name][video])
+                    success_str = f'{success_score:.3f}' if success_score < height_threshold else f'{success_score:.3f}'
+                    precision_str = f'{precision_score:.3f}' if precision_score < height_threshold else f'{precision_score:.3f}'
+                    row += f"{success_str:^9}|{precision_str:^11}|"
+
                 print(row)
-            print('-'*len(header1))
 
-class Video(object):
-    def __init__(self, name, root, video_dir, init_rect, img_names,
-            gt_rect, attr):
+            print('-' * len(header1))
+
+class Video:
+    def __init__(self, name, root, video_dir, init_rect, img_names, gt_rect, attr):
         self.name = name
         self.video_dir = video_dir
         self.init_rect = init_rect
@@ -209,32 +154,27 @@ def __iter__(self):
                 yield self.imgs[i], self.gt_traj[i]
             else:
                 yield cv.imread(self.img_names[i]), self.gt_traj[i]
+
     def load_tracker(self):
+        """Load tracker results from file."""
         traj_file = os.path.join("OTB_results", self.name+'.txt')
         if not os.path.exists(traj_file):
-            if self.name == 'FleetFace':
-                txt_name = 'fleetface.txt'
-            elif self.name == 'Jogging-1':
-                txt_name = 'jogging_1.txt'
-            elif self.name == 'Jogging-2':
-                txt_name = 'jogging_2.txt'
-            elif self.name == 'Skating2-1':
-                txt_name = 'skating2_1.txt'
-            elif self.name == 'Skating2-2':
-                txt_name = 'skating2_2.txt'
-            elif self.name == 'FaceOcc1':
-                txt_name = 'faceocc1.txt'
-            elif self.name == 'FaceOcc2':
-                txt_name = 'faceocc2.txt'
-            elif self.name == 'Human4-2':
-                txt_name = 'human4_2.txt'
-            else:
-                txt_name = self.name[0].lower()+self.name[1:]+'.txt'
+            txt_names = {
+                'FleetFace': 'fleetface.txt',
+                'Jogging-1': 'jogging_1.txt',
+                'Jogging-2': 'jogging_2.txt',
+                'Skating2-1': 'skating2_1.txt',
+                'Skating2-2': 'skating2_2.txt',
+                'FaceOcc1': 'faceocc1.txt',
+                'FaceOcc2': 'faceocc2.txt',
+                'Human4-2': 'human4_2.txt'
+            }
+            txt_name = txt_names.get(self.name, self.name[0].lower() + self.name[1:] + '.txt')
             traj_file = os.path.join("OTB_results", txt_name)
+
         if os.path.exists(traj_file):
-            with open(traj_file, 'r') as f :
-                pred_traj = [list(map(float, x.strip().split(',')))
-                        for x in f.readlines()]
+            with open(traj_file, 'r') as f:
+                pred_traj = [list(map(float, x.strip().split(','))) for x in f.readlines()]
                 if len(pred_traj) != len(self.gt_traj):
                     print("tracker", len(pred_traj), len(self.gt_traj), self.name)
                 else:
@@ -242,33 +182,27 @@ def load_tracker(self):
         else:
             print(traj_file)
 
-
 class OTBDATASET:
     def __init__(self, root):
         with open(os.path.join(root, 'OTB.json'), 'r') as f:
             meta_data = json.load(f)
         self.root = root
-        # load videos
-        pbar = tqdm(meta_data.keys(), desc='loading OTB', ncols=100)
         self.videos = {}
+        pbar = tqdm(meta_data.keys(), desc='Loading OTB', ncols=100)
         for video in pbar:
             pbar.set_postfix_str(video)
             self.videos[video] = Video(video,
-                                          self.root,
-                                          meta_data[video]['video_dir'],
-                                          meta_data[video]['init_rect'],
-                                          meta_data[video]['img_names'],
-                                          meta_data[video]['gt_rect'],
-                                          meta_data[video]['attr'])
-        # set attr
-        attr = []
-        for x in self.videos.values():
-            attr += x.attr
-        attr = set(attr)
-        self.attr = {}
-        self.attr['ALL'] = list(self.videos.keys())
-        for x in attr:
-            self.attr[x] = []
+                                       self.root,
+                                       meta_data[video]['video_dir'],
+                                       meta_data[video]['init_rect'],
+                                       meta_data[video]['img_names'],
+                                       meta_data[video]['gt_rect'],
+                                       meta_data[video]['attr'])
+        self.attr = {'ALL': list(self.videos.keys())}
+        all_attributes = [x.attr for x in self.videos.values()]
+        all_attributes = set(sum(all_attributes, []))
+        for attr_ in all_attributes:
+            self.attr[attr_] = []
         for k, v in self.videos.items():
             for attr_ in v.attr:
                 self.attr[attr_].append(k)
@@ -277,20 +211,19 @@ def __getitem__(self, idx):
         if isinstance(idx, str):
             return self.videos[idx]
         elif isinstance(idx, int):
-            return self.videos[sorted(list(self.videos.keys()))[idx]]
+            sorted_keys = sorted(list(self.videos.keys()))
+            return self.videos[sorted_keys[idx]]
 
     def __len__(self):
         return len(self.videos)
 
     def __iter__(self):
-        keys = sorted(list(self.videos.keys()))
-        for key in keys:
+        sorted_keys = sorted(list(self.videos.keys()))
+        for key in sorted_keys:
             yield self.videos[key]
 
-
 def get_axis_aligned_bbox(region):
-    """ convert region to (cx, cy, w, h) that represent by axis aligned box
-    """
+    """Converts a region to (cx, cy, w, h) representing an axis-aligned box."""
     nv = region.size
     if nv == 8:
         cx = np.mean(region[0::2])
@@ -299,80 +232,101 @@ def get_axis_aligned_bbox(region):
         x2 = max(region[0::2])
         y1 = min(region[1::2])
         y2 = max(region[1::2])
-        A1 = np.linalg.norm(region[0:2] - region[2:4]) * \
-            np.linalg.norm(region[2:4] - region[4:6])
+        A1 = np.linalg.norm(region[0:2] - region[2:4]) * np.linalg.norm(region[2:4] - region[4:6])
         A2 = (x2 - x1) * (y2 - y1)
         s = np.sqrt(A1 / A2)
         w = s * (x2 - x1) + 1
         h = s * (y2 - y1) + 1
     else:
-        x = region[0]
-        y = region[1]
-        w = region[2]
-        h = region[3]
-        cx = x+w/2
-        cy = y+h/2
+        x, y, w, h = region
+        cx = x + w / 2
+        cy = y + h / 2
     return cx, cy, w, h
 
 class OTB:
-
     def __init__(self, root):
-        self.root = root
-        self.dataset = OTBDATASET(root)
+        # Go up one if directory is provided
+        root = os.path.abspath(root)
+        if root.endswith("OTB100"):
+            root = os.path.dirname(root)
+        print(root)
+
+        # Unzip the OTB100.zip file
+        if os.path.exists(f'{root}/OTB100.zip'):
+            os.system(f'unzip -q "{os.path.join(root, "OTB100.zip")}" -d "{root}"')
+            os.remove(f'{root}/OTB100.zip')
+
+        # Move the JSON label in if it's outside
+        if os.path.exists(f'{root}/OTB.json'):
+            os.rename(f'{root}/OTB.json', f'{root}/OTB100/OTB.json')
+
+        if os.path.exists(f'{root}/OTB100'):
+            original_directories = ['Jogging', 'Skating2', 'Human4']
+            updated_directories = ['Jogging-1', 'Jogging-2', 'Skating2-1', 'Skating2-2', 'Human4-2', 'OTB.json']
+            original_exist = all(os.path.exists(f'{root}/OTB100/{dir}') for dir in original_directories)
+            updated_exist = all(os.path.exists(f'{root}/OTB100/{dir}') for dir in updated_directories)
+            if original_exist:
+                os.rename(f'{root}/OTB100/Jogging', f'{root}/OTB100/Jogging-1')
+                os.rename(f'{root}/OTB100/Skating2', f'{root}/OTB100/Skating2-1')
+                os.rename(f'{root}/OTB100/Human4', f'{root}/OTB100/Human4-2')
+                os.system(f'cp -r "{root}/OTB100/Jogging-1" "{root}/OTB100/Jogging-2"')
+                os.system(f'cp -r "{root}/OTB100/Skating2-1" "{root}/OTB100/Skating2-2"')
+            elif not updated_exist:
+                raise RuntimeError("Not all files needed for setup are present.")
+
+        self.root = f'{root}/OTB100'
+        self.dataset = OTBDATASET(self.root)
+
     @property
     def name(self):
         return self.__class__.__name__
 
     def eval(self, model):
         for v_idx, video in enumerate(self.dataset):
-            toc = 0
+            total_time = 0
             pred_bboxes = []
             scores = []
             track_times = []
+
             for idx, (img, gt_bbox) in enumerate(video):
-                # convert bgr to rgb
                 img = cv.cvtColor(img, cv.COLOR_BGR2RGB)
                 tic = cv.getTickCount()
+
                 if idx == 0:
                     cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox))
                     gt_bbox_ = (int(cx - w / 2), int(cy - h / 2), int(w), int(h))
                     model.init(img, gt_bbox_)
                     pred_bbox = gt_bbox_
-                    pred_bboxes.append(pred_bbox)
                     scores.append(None)
                 else:
                     isLocated, bbox, score = model.infer(img)
                     pred_bbox = bbox
-                    pred_bboxes.append(pred_bbox)
                     scores.append(score)
-                toc += cv.getTickCount() - tic
-                track_times.append((cv.getTickCount() - tic) / cv.getTickFrequency())
-                if idx == 0:
-                    cv.destroyAllWindows()
-            toc /= cv.getTickFrequency()
+
+                pred_bboxes.append(pred_bbox)
+                toc = (cv.getTickCount() - tic) / cv.getTickFrequency()
+                total_time += toc
+                track_times.append(toc)
+
             model_path = os.path.join('OTB_results')
-            if not os.path.isdir(model_path):
-                os.makedirs(model_path)
-            result_path = os.path.join(model_path,'{}.txt'.format(video.name))
+            os.makedirs(model_path, exist_ok=True)
+            result_path = os.path.join(model_path, '{}.txt'.format(video.name))
             with open(result_path, 'w') as f:
-                for x in pred_bboxes:
-                    f.write(','.join([str(i) for i in x]) + '\n')
-            print('({:3d}) Video: {:12s} Time: {:5.1f}s Speed: {:3.1f}fps'.format(
-                v_idx + 1, video.name, toc, idx / toc))
-
+                for bbox in pred_bboxes:
+                    f.write(','.join(map(str, bbox)) + '\n')
 
-    def get_result(self):
-        return self.top1_acc, self.top5_acc
+            avg_fps = len(video) / total_time if total_time > 0 else 0
+            print('({:3d}) Video: {:12s} Time: {:5.1f}s Speed: {:3.1f}fps'.format(
+                v_idx + 1, video.name, total_time, avg_fps))
 
     def print_result(self):
         benchmark = OPEBenchmark(self.dataset)
-        success_ret = {}
-        with Pool(processes=1) as pool:
-            for ret in tqdm(pool.imap_unordered(benchmark.eval_success,"tracker"), desc='eval success', total=1, ncols=100):
-                success_ret.update(ret)
-        precision_ret = {}
-        with Pool(processes=1) as pool:
-            for ret in tqdm(pool.imap_unordered(benchmark.eval_precision,"tracker"), desc='eval precision', total=1, ncols=100):
-                precision_ret.update(ret)
-        benchmark.show_result(success_ret, precision_ret,
-                              show_video_level=False)
+        num_cores = cpu_count()
+        evaluation_results = {}
+        metrics = ["success", "precision", "norm_precision"]
+        for metric in metrics:
+            with Pool(processes=min(num_cores, max(1, num_cores - 1))) as pool:
+                for ret in tqdm(pool.imap_unordered(benchmark.evaluate, [metric], 1), desc=f'eval {metric}', total=1, ncols=100):
+                    evaluation_results[metric] = ret
+
+        benchmark.show_result(**evaluation_results, show_video_level=False)
diff --git a/tools/eval/eval.py b/tools/eval/eval.py
index 1d9dfb4f..3246cfe1 100644
--- a/tools/eval/eval.py
+++ b/tools/eval/eval.py
@@ -95,12 +95,10 @@
         name="PPHumanSeg",
         topic="human_segmentation",
         modelPath=os.path.join(root_dir, "models/human_segmentation_pphumanseg/human_segmentation_pphumanseg_2023mar_int8.onnx")),
-    dasiamrpn=dict(
-        name="DaSiamRPN",
+    vittrack=dict(
+        name="VitTrack",
         topic="object_tracking",
-        kernel_cls1_path=os.path.join(root_dir, "models/object_tracking_dasiamrpn/object_tracking_dasiamrpn_kernel_cls1_2021nov.onnx"),
-        kernel_r1_path=os.path.join(root_dir, "models/object_tracking_dasiamrpn/object_tracking_dasiamrpn_kernel_r1_2021nov.onnx"),
-        model_path=os.path.join(root_dir, "models/object_tracking_dasiamrpn/object_tracking_dasiamrpn_model_2021nov.onnx")),
+        model_path=os.path.join(root_dir, "models/object_tracking_vittrack/object_tracking_vittrack_2023sep.onnx")),
 )
 
 datasets = dict(

From d1217e297a9179fd2cc1033614411cd972a5b555 Mon Sep 17 00:00:00 2001
From: Ryan Lee <ryanl1288@hotmail.com>
Date: Tue, 19 Mar 2024 03:08:31 -0400
Subject: [PATCH 03/17] Partial update to OTB-2015

---
 tools/eval/datasets/otb.py | 30 +++++++++++++++---------------
 tools/eval/eval.py         |  4 ++--
 2 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/tools/eval/datasets/otb.py b/tools/eval/datasets/otb.py
index 9470003b..dee05701 100644
--- a/tools/eval/datasets/otb.py
+++ b/tools/eval/datasets/otb.py
@@ -247,34 +247,34 @@ class OTB:
     def __init__(self, root):
         # Go up one if directory is provided
         root = os.path.abspath(root)
-        if root.endswith("OTB100"):
+        if root.endswith("OTB2015"):
             root = os.path.dirname(root)
         print(root)
 
-        # Unzip the OTB100.zip file
-        if os.path.exists(f'{root}/OTB100.zip'):
-            os.system(f'unzip -q "{os.path.join(root, "OTB100.zip")}" -d "{root}"')
-            os.remove(f'{root}/OTB100.zip')
+        # Unzip the OTB2015.zip file
+        if os.path.exists(f'{root}/OTB2015.zip'):
+            os.system(f'unzip -q "{os.path.join(root, "OTB2015.zip")}" -d "{root}"')
+            os.remove(f'{root}/OTB2015.zip')
 
         # Move the JSON label in if it's outside
         if os.path.exists(f'{root}/OTB.json'):
-            os.rename(f'{root}/OTB.json', f'{root}/OTB100/OTB.json')
+            os.rename(f'{root}/OTB.json', f'{root}/OTB2015/OTB.json')
 
-        if os.path.exists(f'{root}/OTB100'):
+        if os.path.exists(f'{root}/OTB2015'):
             original_directories = ['Jogging', 'Skating2', 'Human4']
             updated_directories = ['Jogging-1', 'Jogging-2', 'Skating2-1', 'Skating2-2', 'Human4-2', 'OTB.json']
-            original_exist = all(os.path.exists(f'{root}/OTB100/{dir}') for dir in original_directories)
-            updated_exist = all(os.path.exists(f'{root}/OTB100/{dir}') for dir in updated_directories)
+            original_exist = all(os.path.exists(f'{root}/OTB2015/{dir}') for dir in original_directories)
+            updated_exist = all(os.path.exists(f'{root}/OTB2015/{dir}') for dir in updated_directories)
             if original_exist:
-                os.rename(f'{root}/OTB100/Jogging', f'{root}/OTB100/Jogging-1')
-                os.rename(f'{root}/OTB100/Skating2', f'{root}/OTB100/Skating2-1')
-                os.rename(f'{root}/OTB100/Human4', f'{root}/OTB100/Human4-2')
-                os.system(f'cp -r "{root}/OTB100/Jogging-1" "{root}/OTB100/Jogging-2"')
-                os.system(f'cp -r "{root}/OTB100/Skating2-1" "{root}/OTB100/Skating2-2"')
+                os.rename(f'{root}/OTB2015/Jogging', f'{root}/OTB2015/Jogging-1')
+                os.rename(f'{root}/OTB2015/Skating2', f'{root}/OTB2015/Skating2-1')
+                os.rename(f'{root}/OTB2015/Human4', f'{root}/OTB2015/Human4-2')
+                os.system(f'cp -r "{root}/OTB2015/Jogging-1" "{root}/OTB2015/Jogging-2"')
+                os.system(f'cp -r "{root}/OTB2015/Skating2-1" "{root}/OTB2015/Skating2-2"')
             elif not updated_exist:
                 raise RuntimeError("Not all files needed for setup are present.")
 
-        self.root = f'{root}/OTB100'
+        self.root = f'{root}/OTB2015'
         self.dataset = OTBDATASET(self.root)
 
     @property
diff --git a/tools/eval/eval.py b/tools/eval/eval.py
index 3246cfe1..e6ae15dc 100644
--- a/tools/eval/eval.py
+++ b/tools/eval/eval.py
@@ -122,8 +122,8 @@
         mini_supervisely=dict(
             name="MiniSupervisely",
             topic="human_segmentation"),
-        otb=dict(
-            name="OTB",
+        otb2015=dict(
+            name="OTB-2015",
             topic="object_tracking"),
 )
 

From f88eeabd74c8d946a0edd0662651a64c9836fe83 Mon Sep 17 00:00:00 2001
From: Ryan Lee <ryanl1288@hotmail.com>
Date: Tue, 19 Mar 2024 03:10:25 -0400
Subject: [PATCH 04/17] README Updates for OTB-2015

---
 tools/eval/README.md | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/tools/eval/README.md b/tools/eval/README.md
index 1665ffbf..ba10f636 100644
--- a/tools/eval/README.md
+++ b/tools/eval/README.md
@@ -22,7 +22,7 @@ Supported datasets:
 - [ICDAR](#icdar2003)
 - [IIIT5K](#iiit5k)
 - [Mini Supervisely](#mini-supervisely)
-- [OTB](#otb)
+- [OTB-2015](#otb-2015)
 
 ## ImageNet
 
@@ -214,16 +214,19 @@ Run evaluation on quantized model with the following command :
 python eval.py -m pphumanseg_q -d mini_supervisely -dr /path/to/pphumanseg
 ```
 
-## OTB
+## OTB-2015
 
 ### Prepare data
 
-Please visit [here](https://drive.google.com/drive/folders/1DZvtlnG9U94cgLD6Yi3eU7r6QZJkjdl-?usp=sharing) to download the OTB dataset and the json file. Please both files together in a directory.
+1. The official site is http://cvlab.hanyang.ac.kr/.
+2. In case it is down, users can download from the alternative link we provide.
+
+Alternative Link: TBA
 
 ### Evaluation
 
 Run evaluation with the following command:
 
 ```shell
-python eval.py -m vittrack -d otb -dr /path/to/otb
+python eval.py -m vittrack -d otb2015 -dr /path/to/otb2015
 ```

From 8c07b2d685c9f72f1fdcd5c73601d9067bd4691c Mon Sep 17 00:00:00 2001
From: Ryan Lee <ryanl1288@hotmail.com>
Date: Wed, 20 Mar 2024 02:09:42 -0400
Subject: [PATCH 05/17] Updated success to AUC and confirmed functionality

---
 tools/eval/datasets/__init__.py            |  4 ++--
 tools/eval/datasets/{otb.py => otb2015.py} | 21 +++++++++++----------
 tools/eval/eval.py                         |  2 +-
 3 files changed, 14 insertions(+), 13 deletions(-)
 rename tools/eval/datasets/{otb.py => otb2015.py} (96%)

diff --git a/tools/eval/datasets/__init__.py b/tools/eval/datasets/__init__.py
index bb1a5375..9acaafdc 100644
--- a/tools/eval/datasets/__init__.py
+++ b/tools/eval/datasets/__init__.py
@@ -4,7 +4,7 @@
 from .icdar import ICDAR
 from .iiit5k import IIIT5K
 from .minisupervisely import MiniSupervisely
-from .otb import OTB
+from .otb2015 import OTB2015
 
 class Registery:
     def __init__(self, name):
@@ -24,4 +24,4 @@ def register(self, item):
 DATASETS.register(ICDAR)
 DATASETS.register(IIIT5K)
 DATASETS.register(MiniSupervisely)
-DATASETS.register(OTB)
+DATASETS.register(OTB2015)
diff --git a/tools/eval/datasets/otb.py b/tools/eval/datasets/otb2015.py
similarity index 96%
rename from tools/eval/datasets/otb.py
rename to tools/eval/datasets/otb2015.py
index dee05701..029e272d 100644
--- a/tools/eval/datasets/otb.py
+++ b/tools/eval/datasets/otb2015.py
@@ -8,16 +8,17 @@
 
 def overlap_ratio(rect1, rect2):
     """Calculate the Intersection over Union (IoU) overlap ratio between two sets of rectangles."""  
-    left = np.maximum(rect1[:, 0], rect2[:, 0])
-    right = np.minimum(rect1[:, 0] + rect1[:, 2], rect2[:, 0] + rect2[:, 2])
-    top = np.maximum(rect1[:, 1], rect2[:, 1])
-    bottom = np.minimum(rect1[:, 1] + rect1[:, 3], rect2[:, 1] + rect2[:, 3])
-    intersect = np.maximum(right - left, 0) * np.maximum(bottom - top, 0)
-    union = rect1[:, 2] * rect1[:, 3] + rect2[:, 2] * rect2[:, 3] - intersect
-    iou = intersect / union
-    iou = np.clip(iou, 0, 1)
+    tl = np.maximum(rect1[:, :2], rect2[:, :2])
+    br = np.minimum(rect1[:, :2] + rect1[:, 2:] - 1.0, rect2[:, :2] + rect2[:, 2:] - 1.0)
+    sz = np.maximum(br - tl + 1.0, 0)
+
+    # Area
+    intersection = np.prod(sz, axis=1)
+    union = np.prod(rect1[:, 2:], axis=1) + np.prod(rect2[:, 2:], axis=1) - intersection
+    iou = np.clip(intersection / union, 0, 1)
     return iou
 
+
 def success_overlap(gt_bb, result_bb, n_frame):
     """Calculate the success rate based on the overlap ratio between ground truth and predicted bounding boxes."""
     thresholds_overlap = np.arange(0, 1.05, 0.05)
@@ -80,7 +81,7 @@ def show_result(self, success, precision=None, norm_precision=None, show_video_l
         tracker_names = [x[0] for x in tracker_auc]
         tracker_name_len = max(max(len(x) for x in success.keys()) + 2, 12)
         header = ("|{:^" + str(tracker_name_len) + "}|{:^9}|{:^11}|{:^16}|").format(
-            "Tracker name", "Success", "Precision", "Norm Precision")
+            "Tracker name", "IOU", "Precision", "Norm Precision")
         formatter = "|{:^" + str(tracker_name_len) + "}|{:^9.3f}|{:^11.3f}|{:^16.3f}|"
 
         print('-' * len(header))
@@ -243,7 +244,7 @@ def get_axis_aligned_bbox(region):
         cy = y + h / 2
     return cx, cy, w, h
 
-class OTB:
+class OTB2015:
     def __init__(self, root):
         # Go up one if directory is provided
         root = os.path.abspath(root)
diff --git a/tools/eval/eval.py b/tools/eval/eval.py
index e6ae15dc..f5d66e9d 100644
--- a/tools/eval/eval.py
+++ b/tools/eval/eval.py
@@ -123,7 +123,7 @@
             name="MiniSupervisely",
             topic="human_segmentation"),
         otb2015=dict(
-            name="OTB-2015",
+            name="OTB2015",
             topic="object_tracking"),
 )
 

From e7aba825afbd0e1504f232dca057fd314d40d8df Mon Sep 17 00:00:00 2001
From: Ryan Lee <ryanl1288@hotmail.com>
Date: Wed, 20 Mar 2024 02:10:40 -0400
Subject: [PATCH 06/17] Updated misnamed IOU to AUC

---
 tools/eval/datasets/otb2015.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/eval/datasets/otb2015.py b/tools/eval/datasets/otb2015.py
index 029e272d..bbcb5674 100644
--- a/tools/eval/datasets/otb2015.py
+++ b/tools/eval/datasets/otb2015.py
@@ -81,7 +81,7 @@ def show_result(self, success, precision=None, norm_precision=None, show_video_l
         tracker_names = [x[0] for x in tracker_auc]
         tracker_name_len = max(max(len(x) for x in success.keys()) + 2, 12)
         header = ("|{:^" + str(tracker_name_len) + "}|{:^9}|{:^11}|{:^16}|").format(
-            "Tracker name", "IOU", "Precision", "Norm Precision")
+            "Tracker name", "AUC", "Precision", "Norm Precision")
         formatter = "|{:^" + str(tracker_name_len) + "}|{:^9.3f}|{:^11.3f}|{:^16.3f}|"
 
         print('-' * len(header))

From 40a3b9b77f2cb54091d36b910806755b1df057ad Mon Sep 17 00:00:00 2001
From: Ryan Lee <ryanl1288@hotmail.com>
Date: Wed, 20 Mar 2024 02:25:39 -0400
Subject: [PATCH 07/17] Updated with OpenCV's GDrive Dataset Link

---
 tools/eval/README.md           |  4 +---
 tools/eval/datasets/otb2015.py | 26 +++++++++++++-------------
 2 files changed, 14 insertions(+), 16 deletions(-)

diff --git a/tools/eval/README.md b/tools/eval/README.md
index ba10f636..ae42e287 100644
--- a/tools/eval/README.md
+++ b/tools/eval/README.md
@@ -219,9 +219,7 @@ python eval.py -m pphumanseg_q -d mini_supervisely -dr /path/to/pphumanseg
 ### Prepare data
 
 1. The official site is http://cvlab.hanyang.ac.kr/.
-2. In case it is down, users can download from the alternative link we provide.
-
-Alternative Link: TBA
+2. In case it is down, users can download from the alternative [Google Drive Link](https://drive.google.com/drive/folders/1iTwCQAMgzdWWrlwncOjpshuHvipIWPMN?usp=sharing).
 
 ### Evaluation
 
diff --git a/tools/eval/datasets/otb2015.py b/tools/eval/datasets/otb2015.py
index bbcb5674..c8c44d07 100644
--- a/tools/eval/datasets/otb2015.py
+++ b/tools/eval/datasets/otb2015.py
@@ -252,26 +252,26 @@ def __init__(self, root):
             root = os.path.dirname(root)
         print(root)
 
-        # Unzip the OTB2015.zip file
-        if os.path.exists(f'{root}/OTB2015.zip'):
-            os.system(f'unzip -q "{os.path.join(root, "OTB2015.zip")}" -d "{root}"')
-            os.remove(f'{root}/OTB2015.zip')
+        # Unzip the OTB100.zip file
+        if os.path.exists(f'{root}/OTB100.zip'):
+            os.system(f'unzip -q "{os.path.join(root, "OTB100.zip")}" -d "{root}"')
+            os.remove(f'{root}/OTB100.zip')
 
         # Move the JSON label in if it's outside
         if os.path.exists(f'{root}/OTB.json'):
-            os.rename(f'{root}/OTB.json', f'{root}/OTB2015/OTB.json')
+            os.rename(f'{root}/OTB.json', f'{root}/OTB100/OTB.json')
 
-        if os.path.exists(f'{root}/OTB2015'):
+        if os.path.exists(f'{root}/OTB100'):
             original_directories = ['Jogging', 'Skating2', 'Human4']
             updated_directories = ['Jogging-1', 'Jogging-2', 'Skating2-1', 'Skating2-2', 'Human4-2', 'OTB.json']
-            original_exist = all(os.path.exists(f'{root}/OTB2015/{dir}') for dir in original_directories)
-            updated_exist = all(os.path.exists(f'{root}/OTB2015/{dir}') for dir in updated_directories)
+            original_exist = all(os.path.exists(f'{root}/OTB100/{dir}') for dir in original_directories)
+            updated_exist = all(os.path.exists(f'{root}/OTB100/{dir}') for dir in updated_directories)
             if original_exist:
-                os.rename(f'{root}/OTB2015/Jogging', f'{root}/OTB2015/Jogging-1')
-                os.rename(f'{root}/OTB2015/Skating2', f'{root}/OTB2015/Skating2-1')
-                os.rename(f'{root}/OTB2015/Human4', f'{root}/OTB2015/Human4-2')
-                os.system(f'cp -r "{root}/OTB2015/Jogging-1" "{root}/OTB2015/Jogging-2"')
-                os.system(f'cp -r "{root}/OTB2015/Skating2-1" "{root}/OTB2015/Skating2-2"')
+                os.rename(f'{root}/OTB100/Jogging', f'{root}/OTB100/Jogging-1')
+                os.rename(f'{root}/OTB100/Skating2', f'{root}/OTB100/Skating2-1')
+                os.rename(f'{root}/OTB100/Human4', f'{root}/OTB100/Human4-2')
+                os.system(f'cp -r "{root}/OTB100/Jogging-1" "{root}/OTB100/Jogging-2"')
+                os.system(f'cp -r "{root}/OTB100/Skating2-1" "{root}/OTB100/Skating2-2"')
             elif not updated_exist:
                 raise RuntimeError("Not all files needed for setup are present.")
 

From 9b9d4028d7210f7887fe2cca590baf2a93b69ea8 Mon Sep 17 00:00:00 2001
From: Ryan Lee <ryanl1288@hotmail.com>
Date: Mon, 1 Apr 2024 03:17:28 -0400
Subject: [PATCH 08/17] Updated to use the GT bbox from the dataset, not a
 separate JSON file. Removed all moving or renaming or files and directories

---
 tools/eval/README.md            |  14 +
 tools/eval/datasets/__init__.py |   4 +-
 tools/eval/datasets/otb100.py   | 536 ++++++++++++++++++++++++++++++++
 tools/eval/datasets/otb2015.py  | 333 --------------------
 tools/eval/eval.py              |   4 +-
 5 files changed, 554 insertions(+), 337 deletions(-)
 create mode 100644 tools/eval/datasets/otb100.py
 delete mode 100644 tools/eval/datasets/otb2015.py

diff --git a/tools/eval/README.md b/tools/eval/README.md
index ae42e287..72824c88 100644
--- a/tools/eval/README.md
+++ b/tools/eval/README.md
@@ -221,6 +221,20 @@ python eval.py -m pphumanseg_q -d mini_supervisely -dr /path/to/pphumanseg
 1. The official site is http://cvlab.hanyang.ac.kr/.
 2. In case it is down, users can download from the alternative [Google Drive Link](https://drive.google.com/drive/folders/1iTwCQAMgzdWWrlwncOjpshuHvipIWPMN?usp=sharing).
 
+Download both the `OTB100.zip` and `OTB.json`, organize files as follow:
+
+```shell
+$ tree -L 2 /path/to/otb100
+.
+├── Basketball
+│   ├── groundtruth_rect.txt
+│   └── img
+├── ...
+├── Woman
+└── OTB.json
+
+```
+
 ### Evaluation
 
 Run evaluation with the following command:
diff --git a/tools/eval/datasets/__init__.py b/tools/eval/datasets/__init__.py
index 9acaafdc..c6f3c915 100644
--- a/tools/eval/datasets/__init__.py
+++ b/tools/eval/datasets/__init__.py
@@ -4,7 +4,7 @@
 from .icdar import ICDAR
 from .iiit5k import IIIT5K
 from .minisupervisely import MiniSupervisely
-from .otb2015 import OTB2015
+from .otb100 import OTB100
 
 class Registery:
     def __init__(self, name):
@@ -24,4 +24,4 @@ def register(self, item):
 DATASETS.register(ICDAR)
 DATASETS.register(IIIT5K)
 DATASETS.register(MiniSupervisely)
-DATASETS.register(OTB2015)
+DATASETS.register(OTB100)
diff --git a/tools/eval/datasets/otb100.py b/tools/eval/datasets/otb100.py
new file mode 100644
index 00000000..904587f2
--- /dev/null
+++ b/tools/eval/datasets/otb100.py
@@ -0,0 +1,536 @@
+import os
+import json
+import numpy as np
+import cv2 as cv
+from colorama import Style, Fore
+from tqdm import tqdm
+from multiprocessing import Pool, cpu_count
+
+def overlap_ratio(rect1, rect2):
+    """Calculate the Intersection over Union (IoU) overlap ratio between two sets of rectangles."""  
+    tl = np.maximum(rect1[:, :2], rect2[:, :2])
+    br = np.minimum(rect1[:, :2] + rect1[:, 2:] - 1.0, rect2[:, :2] + rect2[:, 2:] - 1.0)
+    sz = np.maximum(br - tl + 1.0, 0)
+
+    # Area
+    intersection = np.prod(sz, axis=1)
+    union = np.prod(rect1[:, 2:], axis=1) + np.prod(rect2[:, 2:], axis=1) - intersection
+    iou = np.clip(intersection / union, 0, 1)
+    return iou
+
+
+def success_overlap(gt_bb, result_bb, n_frame):
+    """Calculate the success rate based on the overlap ratio between ground truth and predicted bounding boxes."""
+    thresholds_overlap = np.arange(0, 1.05, 0.05)
+    success = np.zeros(len(thresholds_overlap))
+    mask = np.sum(gt_bb[:, 2:] > 0, axis=1) == 2
+    iou = overlap_ratio(gt_bb[mask], result_bb[mask])
+    for i, threshold in enumerate(thresholds_overlap):
+        success[i] = np.sum(iou > threshold) / n_frame
+    return success
+
+def success_error(gt_center, result_center, thresholds, n_frame):
+    """Calculate the success rate based on the error distance between ground truth and predicted bounding box centers."""
+    success = np.zeros(len(thresholds))
+    mask = np.sum(gt_center > 0, axis=1) == 2
+    dist = np.linalg.norm(gt_center[mask] - result_center[mask], axis=1)
+    for i, threshold in enumerate(thresholds):
+        success[i] = np.sum(dist <= threshold) / n_frame
+    return success
+
+class OPEBenchmark:
+    def __init__(self, dataset):
+        self.dataset = dataset
+
+    def convert_bb_to_center(self, bboxes):
+        """Convert bounding box coordinates to centers."""
+        return np.array([(bboxes[:, 0] + (bboxes[:, 2] - 1) / 2),
+                         (bboxes[:, 1] + (bboxes[:, 3] - 1) / 2)]).T
+
+    def convert_bb_to_norm_center(self, bboxes, gt_wh):
+        """Convert bounding box coordinates to normalized centers."""
+        return self.convert_bb_to_center(bboxes) / (gt_wh + 1e-16)
+
+    def evaluate(self, metric):
+        """Evaluate the tracking performance based on the specified metric."""
+        evaluation_ret = {}
+        for video in self.dataset:
+            gt_traj = np.array(video.gt_traj)
+            tracker_traj = np.array(video.load_tracker())
+            n_frame = len(gt_traj)
+            if hasattr(video, 'absent'):
+                gt_traj = gt_traj[video.absent == 1]
+                tracker_traj = tracker_traj[video.absent == 1]
+            if metric == 'success':
+                evaluation_ret[video.name] = success_overlap(gt_traj, tracker_traj, n_frame)
+            elif metric == 'precision':
+                gt_center = self.convert_bb_to_center(gt_traj)
+                tracker_center = self.convert_bb_to_center(tracker_traj)
+                thresholds = np.arange(0, 51, 1)
+                evaluation_ret[video.name] = success_error(gt_center, tracker_center, thresholds, n_frame)
+            elif metric == 'norm_precision':
+                gt_center_norm = self.convert_bb_to_norm_center(gt_traj, gt_traj[:, 2:4])
+                tracker_center_norm = self.convert_bb_to_norm_center(tracker_traj, gt_traj[:, 2:4])
+                thresholds = np.arange(0, 51, 1) / 100
+                evaluation_ret[video.name] = success_error(gt_center_norm, tracker_center_norm, thresholds, n_frame)
+        return {"tracker": evaluation_ret}
+
+    def show_result(self, success, precision=None, norm_precision=None, show_video_level=False, height_threshold=0.6):
+        tracker_auc = {tracker_name: np.mean(list(scores.values())) for tracker_name, scores in success.items()}
+        tracker_auc = sorted(tracker_auc.items(), key=lambda x: x[1], reverse=True)[:20]
+        tracker_names = [x[0] for x in tracker_auc]
+        tracker_name_len = max(max(len(x) for x in success.keys()) + 2, 12)
+        header = ("|{:^" + str(tracker_name_len) + "}|{:^9}|{:^11}|{:^16}|").format(
+            "Tracker name", "AUC", "Precision", "Norm Precision")
+        formatter = "|{:^" + str(tracker_name_len) + "}|{:^9.3f}|{:^11.3f}|{:^16.3f}|"
+
+        print('-' * len(header))
+        print(header)
+        print('-' * len(header))
+
+        for tracker_name in tracker_names:
+            success_score = np.mean(list(success[tracker_name].values()))
+            precision_score = np.mean(list(precision[tracker_name].values()), axis=0)[20] if precision else 0
+            norm_precision_score = np.mean(list(norm_precision[tracker_name].values()), axis=0)[20] if norm_precision else 0
+            print(formatter.format(tracker_name, success_score, precision_score, norm_precision_score))
+
+        print('-' * len(header))
+
+        if show_video_level and len(success) < 10 and precision and len(precision) < 10:
+            print("\n\n")
+            header1 = "|{:^21}|".format("Tracker name")
+            header2 = "|{:^21}|".format("Video name")
+
+            for tracker_name in success.keys():
+                header1 += ("{:^21}|").format(tracker_name)
+                header2 += "{:^9}|{:^11}|".format("success", "precision")
+
+            print('-' * len(header1))
+            print(header1)
+            print('-' * len(header1))
+            print(header2)
+            print('-' * len(header1))
+
+            for video, scores in success.items():
+                row = "|{:^21}|".format(video)
+
+                for tracker_name in tracker_names:
+                    success_score = np.mean(success[tracker_name][video])
+                    precision_score = np.mean(precision[tracker_name][video])
+                    success_str = f'{success_score:.3f}' if success_score < height_threshold else f'{success_score:.3f}'
+                    precision_str = f'{precision_score:.3f}' if precision_score < height_threshold else f'{precision_score:.3f}'
+                    row += f"{success_str:^9}|{precision_str:^11}|"
+
+                print(row)
+
+            print('-' * len(header1))
+
+class Video:
+    def __init__(self, name, root, video_dir, init_rect, img_names, gt_rect, attr):
+        self.name = name
+        self.video_dir = video_dir
+        self.init_rect = init_rect
+        self.gt_traj = gt_rect
+        self.attr = attr
+        self.pred_trajs = {}
+        self.img_names = [os.path.join(root, x) for x in img_names]
+        self.imgs = None
+        img = cv.imread(self.img_names[0])
+        assert img is not None, self.img_names[0]
+        self.width = img.shape[1]
+        self.height = img.shape[0]
+
+    def __len__(self):
+        return len(self.img_names)
+
+    def __getitem__(self, idx):
+        if self.imgs is None:
+            return cv.imread(self.img_names[idx]), self.gt_traj[idx]
+        else:
+            return self.imgs[idx], self.gt_traj[idx]
+
+    def __iter__(self):
+        for i in range(len(self.img_names)):
+            if self.imgs is not None:
+                yield self.imgs[i], self.gt_traj[i]
+            else:
+                yield cv.imread(self.img_names[i]), self.gt_traj[i]
+
+    def load_tracker(self):
+        """Load tracker results from file."""
+        traj_file = os.path.join("OTB_results", self.name+'.txt')
+        if os.path.exists(traj_file):
+            with open(traj_file, 'r') as f:
+                pred_traj = [list(map(float, x.strip().split(','))) for x in f.readlines()]
+                if len(pred_traj) != len(self.gt_traj):
+                    print("tracker", len(pred_traj), len(self.gt_traj), self.name)
+                else:
+                    return pred_traj
+        else:
+            print(traj_file)
+
+class OTBDATASET:
+    def __init__(self, root):
+        meta_data = {}
+        for sequence_info in sequence_info_list:
+            sequence_path = sequence_info['path']
+            nz = sequence_info['nz']
+            ext = sequence_info['ext']
+            start_frame = sequence_info['startFrame']
+            end_frame = sequence_info['endFrame']
+
+            init_omit = 0
+            if 'initOmit' in sequence_info:
+                init_omit = sequence_info['initOmit']
+            frames = [f'{root}/OTB100/{sequence_path}/{frame_num:0{nz}}.{ext}' for \
+                      frame_num in range(start_frame+init_omit, end_frame+1)]
+
+            anno_path = f'{root}/OTB100/{sequence_info["anno_path"]}'
+
+            ground_truth_rect = load_text_numpy(str(anno_path), (',', None), np.float64)[init_omit:,:]
+            meta_data[sequence_info['name']] = {}
+            meta_data[sequence_info['name']]['video_dir'] = sequence_info['path']
+            meta_data[sequence_info['name']]['init_rect'] = ground_truth_rect[0]
+            meta_data[sequence_info['name']]['img_names'] = frames
+            meta_data[sequence_info['name']]['gt_rect'] = ground_truth_rect
+            meta_data[sequence_info['name']]['attr'] = [sequence_info["object_class"]]
+
+        self.data = meta_data
+        self.root = root
+        self.videos = {}
+        pbar = tqdm(meta_data.keys(), desc='Loading OTB', ncols=100)
+        for video in pbar:
+            pbar.set_postfix_str(video)
+            self.videos[video] = Video(video,
+                                       self.root,
+                                       meta_data[video]['video_dir'],
+                                       meta_data[video]['init_rect'],
+                                       meta_data[video]['img_names'],
+                                       meta_data[video]['gt_rect'],
+                                       meta_data[video]['attr'])
+        self.attr = {'ALL': list(self.videos.keys())}
+        all_attributes = [x.attr for x in self.videos.values()]
+        all_attributes = set(sum(all_attributes, []))
+        for attr_ in all_attributes:
+            self.attr[attr_] = []
+        for k, v in self.videos.items():
+            for attr_ in v.attr:
+                self.attr[attr_].append(k)
+
+    def __getitem__(self, idx):
+        if isinstance(idx, str):
+            return self.videos[idx]
+        elif isinstance(idx, int):
+            sorted_keys = sorted(list(self.videos.keys()))
+            return self.videos[sorted_keys[idx]]
+
+    def __len__(self):
+        return len(self.videos)
+
+    def __iter__(self):
+        sorted_keys = sorted(list(self.videos.keys()))
+        for key in sorted_keys:
+            yield self.videos[key]
+
+def get_axis_aligned_bbox(region):
+    """Converts a region to (cx, cy, w, h) representing an axis-aligned box."""
+    nv = region.size
+    if nv == 8:
+        cx = np.mean(region[0::2])
+        cy = np.mean(region[1::2])
+        x1 = min(region[0::2])
+        x2 = max(region[0::2])
+        y1 = min(region[1::2])
+        y2 = max(region[1::2])
+        A1 = np.linalg.norm(region[0:2] - region[2:4]) * np.linalg.norm(region[2:4] - region[4:6])
+        A2 = (x2 - x1) * (y2 - y1)
+        s = np.sqrt(A1 / A2)
+        w = s * (x2 - x1) + 1
+        h = s * (y2 - y1) + 1
+    else:
+        x, y, w, h = region
+        cx = x + w / 2
+        cy = y + h / 2
+    return cx, cy, w, h
+
+def load_text_numpy(path, delimiter, dtype):
+    if isinstance(delimiter, (tuple, list)):
+        for d in delimiter:
+            try:
+                ground_truth_rect = np.loadtxt(path, delimiter=d, dtype=dtype)
+                return ground_truth_rect
+            except:
+                pass
+
+        raise Exception('Could not read file {}'.format(path))
+    else:
+        ground_truth_rect = np.loadtxt(path, delimiter=delimiter, dtype=dtype)
+        return ground_truth_rect
+
+class OTB100:
+    def __init__(self, root):
+        # Go up one if directory is provided
+        root = os.path.abspath(root)
+        if root.endswith("OTB100"):
+            root = os.path.dirname(root)
+
+        self.dataset = OTBDATASET(root)
+
+    @property
+    def name(self):
+        return self.__class__.__name__
+
+    def eval(self, model):
+        for v_idx, video in enumerate(self.dataset):
+            total_time = 0
+            pred_bboxes = []
+            scores = []
+            track_times = []
+
+            for idx, (img, gt_bbox) in enumerate(video):
+                img = cv.cvtColor(img, cv.COLOR_BGR2RGB)
+                tic = cv.getTickCount()
+
+                if idx == 0:
+                    cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox))
+                    gt_bbox_ = (int(cx - w / 2), int(cy - h / 2), int(w), int(h))
+                    model.init(img, gt_bbox_)
+                    pred_bbox = gt_bbox_
+                    scores.append(None)
+                else:
+                    isLocated, bbox, score = model.infer(img)
+                    pred_bbox = bbox
+                    scores.append(score)
+
+                pred_bboxes.append(pred_bbox)
+                toc = (cv.getTickCount() - tic) / cv.getTickFrequency()
+                total_time += toc
+                track_times.append(toc)
+
+            model_path = os.path.join('OTB_results')
+            os.makedirs(model_path, exist_ok=True)
+            result_path = os.path.join(model_path, '{}.txt'.format(video.name))
+            print(result_path)
+            with open(result_path, 'w') as f:
+                for bbox in pred_bboxes:
+                    f.write(','.join(map(str, bbox)) + '\n')
+
+            avg_fps = len(video) / total_time if total_time > 0 else 0
+            print('({:3d}) Video: {:12s} Time: {:5.1f}s Speed: {:3.1f}fps'.format(
+                v_idx + 1, video.name, total_time, avg_fps))
+
+    def print_result(self):
+        benchmark = OPEBenchmark(self.dataset)
+        num_cores = cpu_count()
+        evaluation_results = {}
+        metrics = ["success", "precision", "norm_precision"]
+        for metric in metrics:
+            with Pool(processes=min(num_cores, max(1, num_cores - 1))) as pool:
+                for ret in tqdm(pool.imap_unordered(benchmark.evaluate, [metric], 1), desc=f'eval {metric}', total=1, ncols=100):
+                    evaluation_results[metric] = ret
+
+        benchmark.show_result(**evaluation_results, show_video_level=False)
+
+
+sequence_info_list = [
+    {"name": "Basketball", "path": "Basketball/img", "startFrame": 1, "endFrame": 725, "nz": 4, "ext": "jpg", "anno_path": "Basketball/groundtruth_rect.txt",
+        "object_class": "person"},
+    # {"name": "Biker", "path": "Biker/img", "startFrame": 1, "endFrame": 142, "nz": 4, "ext": "jpg", "anno_path": "Biker/groundtruth_rect.txt",
+    #     "object_class": "person head"},
+    # {"name": "Bird1", "path": "Bird1/img", "startFrame": 1, "endFrame": 408, "nz": 4, "ext": "jpg", "anno_path": "Bird1/groundtruth_rect.txt",
+    #     "object_class": "bird"},
+    # {"name": "Bird2", "path": "Bird2/img", "startFrame": 1, "endFrame": 99, "nz": 4, "ext": "jpg", "anno_path": "Bird2/groundtruth_rect.txt",
+    #     "object_class": "bird"},
+    # {"name": "BlurBody", "path": "BlurBody/img", "startFrame": 1, "endFrame": 334, "nz": 4, "ext": "jpg", "anno_path": "BlurBody/groundtruth_rect.txt",
+    #     "object_class": "person"},
+    # {"name": "BlurCar1", "path": "BlurCar1/img", "startFrame": 247, "endFrame": 988, "nz": 4, "ext": "jpg", "anno_path": "BlurCar1/groundtruth_rect.txt",
+    #     "object_class": "car"},
+    # {"name": "BlurCar2", "path": "BlurCar2/img", "startFrame": 1, "endFrame": 585, "nz": 4, "ext": "jpg", "anno_path": "BlurCar2/groundtruth_rect.txt",
+    #     "object_class": "car"},
+    # {"name": "BlurCar3", "path": "BlurCar3/img", "startFrame": 3, "endFrame": 359, "nz": 4, "ext": "jpg", "anno_path": "BlurCar3/groundtruth_rect.txt",
+    #     "object_class": "car"},
+    # {"name": "BlurCar4", "path": "BlurCar4/img", "startFrame": 18, "endFrame": 397, "nz": 4, "ext": "jpg", "anno_path": "BlurCar4/groundtruth_rect.txt",
+    #     "object_class": "car"},
+    # {"name": "BlurFace", "path": "BlurFace/img", "startFrame": 1, "endFrame": 493, "nz": 4, "ext": "jpg", "anno_path": "BlurFace/groundtruth_rect.txt",
+    #     "object_class": "face"},
+    # {"name": "BlurOwl", "path": "BlurOwl/img", "startFrame": 1, "endFrame": 631, "nz": 4, "ext": "jpg", "anno_path": "BlurOwl/groundtruth_rect.txt",
+    #     "object_class": "other"},
+    # {"name": "Board", "path": "Board/img", "startFrame": 1, "endFrame": 698, "nz": 5, "ext": "jpg", "anno_path": "Board/groundtruth_rect.txt",
+    #     "object_class": "other"},
+    # {"name": "Bolt", "path": "Bolt/img", "startFrame": 1, "endFrame": 350, "nz": 4, "ext": "jpg", "anno_path": "Bolt/groundtruth_rect.txt",
+    #     "object_class": "person"},
+    # {"name": "Bolt2", "path": "Bolt2/img", "startFrame": 1, "endFrame": 293, "nz": 4, "ext": "jpg", "anno_path": "Bolt2/groundtruth_rect.txt",
+    #     "object_class": "person"},
+    # {"name": "Box", "path": "Box/img", "startFrame": 1, "endFrame": 1161, "nz": 4, "ext": "jpg", "anno_path": "Box/groundtruth_rect.txt",
+    #     "object_class": "other"},
+    # {"name": "Boy", "path": "Boy/img", "startFrame": 1, "endFrame": 602, "nz": 4, "ext": "jpg", "anno_path": "Boy/groundtruth_rect.txt",
+    #     "object_class": "face"},
+    # {"name": "Car1", "path": "Car1/img", "startFrame": 1, "endFrame": 1020, "nz": 4, "ext": "jpg", "anno_path": "Car1/groundtruth_rect.txt",
+    #     "object_class": "car"},
+    # {"name": "Car2", "path": "Car2/img", "startFrame": 1, "endFrame": 913, "nz": 4, "ext": "jpg", "anno_path": "Car2/groundtruth_rect.txt",
+    #     "object_class": "car"},
+    # {"name": "Car24", "path": "Car24/img", "startFrame": 1, "endFrame": 3059, "nz": 4, "ext": "jpg", "anno_path": "Car24/groundtruth_rect.txt",
+    #     "object_class": "car"},
+    # {"name": "Car4", "path": "Car4/img", "startFrame": 1, "endFrame": 659, "nz": 4, "ext": "jpg", "anno_path": "Car4/groundtruth_rect.txt",
+    #     "object_class": "car"},
+    # {"name": "CarDark", "path": "CarDark/img", "startFrame": 1, "endFrame": 393, "nz": 4, "ext": "jpg", "anno_path": "CarDark/groundtruth_rect.txt",
+    #     "object_class": "car"},
+    # {"name": "CarScale", "path": "CarScale/img", "startFrame": 1, "endFrame": 252, "nz": 4, "ext": "jpg", "anno_path": "CarScale/groundtruth_rect.txt",
+    #     "object_class": "car"},
+    # {"name": "ClifBar", "path": "ClifBar/img", "startFrame": 1, "endFrame": 472, "nz": 4, "ext": "jpg", "anno_path": "ClifBar/groundtruth_rect.txt",
+    #     "object_class": "other"},
+    # {"name": "Coke", "path": "Coke/img", "startFrame": 1, "endFrame": 291, "nz": 4, "ext": "jpg", "anno_path": "Coke/groundtruth_rect.txt",
+    #     "object_class": "other"},
+    # {"name": "Couple", "path": "Couple/img", "startFrame": 1, "endFrame": 140, "nz": 4, "ext": "jpg", "anno_path": "Couple/groundtruth_rect.txt",
+    #     "object_class": "person"},
+    # {"name": "Coupon", "path": "Coupon/img", "startFrame": 1, "endFrame": 327, "nz": 4, "ext": "jpg", "anno_path": "Coupon/groundtruth_rect.txt",
+    #     "object_class": "other"},
+    # {"name": "Crossing", "path": "Crossing/img", "startFrame": 1, "endFrame": 120, "nz": 4, "ext": "jpg", "anno_path": "Crossing/groundtruth_rect.txt",
+    #     "object_class": "person"},
+    # {"name": "Crowds", "path": "Crowds/img", "startFrame": 1, "endFrame": 347, "nz": 4, "ext": "jpg", "anno_path": "Crowds/groundtruth_rect.txt",
+    #     "object_class": "person"},
+    # {"name": "Dancer", "path": "Dancer/img", "startFrame": 1, "endFrame": 225, "nz": 4, "ext": "jpg", "anno_path": "Dancer/groundtruth_rect.txt",
+    #     "object_class": "person"},
+    # {"name": "Dancer2", "path": "Dancer2/img", "startFrame": 1, "endFrame": 150, "nz": 4, "ext": "jpg", "anno_path": "Dancer2/groundtruth_rect.txt",
+    #     "object_class": "person"},
+    # {"name": "David", "path": "David/img", "startFrame": 300, "endFrame": 770, "nz": 4, "ext": "jpg", "anno_path": "David/groundtruth_rect.txt",
+    #     "object_class": "face"},
+    # {"name": "David2", "path": "David2/img", "startFrame": 1, "endFrame": 537, "nz": 4, "ext": "jpg", "anno_path": "David2/groundtruth_rect.txt",
+    #     "object_class": "face"},
+    # {"name": "David3", "path": "David3/img", "startFrame": 1, "endFrame": 252, "nz": 4, "ext": "jpg", "anno_path": "David3/groundtruth_rect.txt",
+    #     "object_class": "person"},
+    # {"name": "Deer", "path": "Deer/img", "startFrame": 1, "endFrame": 71, "nz": 4, "ext": "jpg", "anno_path": "Deer/groundtruth_rect.txt",
+    #     "object_class": "mammal"},
+    # {"name": "Diving", "path": "Diving/img", "startFrame": 1, "endFrame": 215, "nz": 4, "ext": "jpg", "anno_path": "Diving/groundtruth_rect.txt",
+    #     "object_class": "person"},
+    # {"name": "Dog", "path": "Dog/img", "startFrame": 1, "endFrame": 127, "nz": 4, "ext": "jpg", "anno_path": "Dog/groundtruth_rect.txt",
+    #     "object_class": "dog"},
+    # {"name": "Dog1", "path": "Dog1/img", "startFrame": 1, "endFrame": 1350, "nz": 4, "ext": "jpg", "anno_path": "Dog1/groundtruth_rect.txt",
+    #     "object_class": "dog"},
+    # {"name": "Doll", "path": "Doll/img", "startFrame": 1, "endFrame": 3872, "nz": 4, "ext": "jpg", "anno_path": "Doll/groundtruth_rect.txt",
+    #     "object_class": "other"},
+    # {"name": "DragonBaby", "path": "DragonBaby/img", "startFrame": 1, "endFrame": 113, "nz": 4, "ext": "jpg", "anno_path": "DragonBaby/groundtruth_rect.txt",
+    #     "object_class": "face"},
+    # {"name": "Dudek", "path": "Dudek/img", "startFrame": 1, "endFrame": 1145, "nz": 4, "ext": "jpg", "anno_path": "Dudek/groundtruth_rect.txt",
+    #     "object_class": "face"},
+    # {"name": "FaceOcc1", "path": "FaceOcc1/img", "startFrame": 1, "endFrame": 892, "nz": 4, "ext": "jpg", "anno_path": "FaceOcc1/groundtruth_rect.txt",
+    #     "object_class": "face"},
+    # {"name": "FaceOcc2", "path": "FaceOcc2/img", "startFrame": 1, "endFrame": 812, "nz": 4, "ext": "jpg", "anno_path": "FaceOcc2/groundtruth_rect.txt",
+    #     "object_class": "face"},
+    # {"name": "Fish", "path": "Fish/img", "startFrame": 1, "endFrame": 476, "nz": 4, "ext": "jpg", "anno_path": "Fish/groundtruth_rect.txt",
+    #     "object_class": "other"},
+    # {"name": "FleetFace", "path": "FleetFace/img", "startFrame": 1, "endFrame": 707, "nz": 4, "ext": "jpg", "anno_path": "FleetFace/groundtruth_rect.txt",
+    #     "object_class": "face"},
+    # {"name": "Football", "path": "Football/img", "startFrame": 1, "endFrame": 362, "nz": 4, "ext": "jpg", "anno_path": "Football/groundtruth_rect.txt",
+    #     "object_class": "person head"},
+    # {"name": "Football1", "path": "Football1/img", "startFrame": 1, "endFrame": 74, "nz": 4, "ext": "jpg", "anno_path": "Football1/groundtruth_rect.txt",
+    #     "object_class": "face"},
+    # {"name": "Freeman1", "path": "Freeman1/img", "startFrame": 1, "endFrame": 326, "nz": 4, "ext": "jpg", "anno_path": "Freeman1/groundtruth_rect.txt",
+    #     "object_class": "face"},
+    # {"name": "Freeman3", "path": "Freeman3/img", "startFrame": 1, "endFrame": 460, "nz": 4, "ext": "jpg", "anno_path": "Freeman3/groundtruth_rect.txt",
+    #     "object_class": "face"},
+    # {"name": "Freeman4", "path": "Freeman4/img", "startFrame": 1, "endFrame": 283, "nz": 4, "ext": "jpg", "anno_path": "Freeman4/groundtruth_rect.txt",
+    #     "object_class": "face"},
+    # {"name": "Girl", "path": "Girl/img", "startFrame": 1, "endFrame": 500, "nz": 4, "ext": "jpg", "anno_path": "Girl/groundtruth_rect.txt",
+    #     "object_class": "face"},
+    # {"name": "Girl2", "path": "Girl2/img", "startFrame": 1, "endFrame": 1500, "nz": 4, "ext": "jpg", "anno_path": "Girl2/groundtruth_rect.txt",
+    #     "object_class": "person"},
+    # {"name": "Gym", "path": "Gym/img", "startFrame": 1, "endFrame": 767, "nz": 4, "ext": "jpg", "anno_path": "Gym/groundtruth_rect.txt",
+    #     "object_class": "person"},
+    # {"name": "Human2", "path": "Human2/img", "startFrame": 1, "endFrame": 1128, "nz": 4, "ext": "jpg", "anno_path": "Human2/groundtruth_rect.txt",
+    #     "object_class": "person"},
+    # {"name": "Human3", "path": "Human3/img", "startFrame": 1, "endFrame": 1698, "nz": 4, "ext": "jpg", "anno_path": "Human3/groundtruth_rect.txt",
+    #     "object_class": "person"},
+    # {"name": "Human4", "path": "Human4/img", "startFrame": 1, "endFrame": 667, "nz": 4, "ext": "jpg", "anno_path": "Human4/groundtruth_rect.2.txt",
+    #     "object_class": "person"},
+    # {"name": "Human5", "path": "Human5/img", "startFrame": 1, "endFrame": 713, "nz": 4, "ext": "jpg", "anno_path": "Human5/groundtruth_rect.txt",
+    #     "object_class": "person"},
+    # {"name": "Human6", "path": "Human6/img", "startFrame": 1, "endFrame": 792, "nz": 4, "ext": "jpg", "anno_path": "Human6/groundtruth_rect.txt",
+    #     "object_class": "person"},
+    # {"name": "Human7", "path": "Human7/img", "startFrame": 1, "endFrame": 250, "nz": 4, "ext": "jpg", "anno_path": "Human7/groundtruth_rect.txt",
+    #     "object_class": "person"},
+    # {"name": "Human8", "path": "Human8/img", "startFrame": 1, "endFrame": 128, "nz": 4, "ext": "jpg", "anno_path": "Human8/groundtruth_rect.txt",
+    #     "object_class": "person"},
+    # {"name": "Human9", "path": "Human9/img", "startFrame": 1, "endFrame": 305, "nz": 4, "ext": "jpg", "anno_path": "Human9/groundtruth_rect.txt",
+    #     "object_class": "person"},
+    # {"name": "Ironman", "path": "Ironman/img", "startFrame": 1, "endFrame": 166, "nz": 4, "ext": "jpg", "anno_path": "Ironman/groundtruth_rect.txt",
+    #     "object_class": "person head"},
+    # {"name": "Jogging_1", "path": "Jogging/img", "startFrame": 1, "endFrame": 307, "nz": 4, "ext": "jpg", "anno_path": "Jogging/groundtruth_rect.1.txt",
+    #     "object_class": "person"},
+    # {"name": "Jogging_2", "path": "Jogging/img", "startFrame": 1, "endFrame": 307, "nz": 4, "ext": "jpg", "anno_path": "Jogging/groundtruth_rect.2.txt",
+    #     "object_class": "person"},
+    # {"name": "Jump", "path": "Jump/img", "startFrame": 1, "endFrame": 122, "nz": 4, "ext": "jpg", "anno_path": "Jump/groundtruth_rect.txt",
+    #     "object_class": "person"},
+    # {"name": "Jumping", "path": "Jumping/img", "startFrame": 1, "endFrame": 313, "nz": 4, "ext": "jpg", "anno_path": "Jumping/groundtruth_rect.txt",
+    #     "object_class": "face"},
+    # {"name": "KiteSurf", "path": "KiteSurf/img", "startFrame": 1, "endFrame": 84, "nz": 4, "ext": "jpg", "anno_path": "KiteSurf/groundtruth_rect.txt",
+    #     "object_class": "face"},
+    # {"name": "Lemming", "path": "Lemming/img", "startFrame": 1, "endFrame": 1336, "nz": 4, "ext": "jpg", "anno_path": "Lemming/groundtruth_rect.txt",
+    #     "object_class": "other"},
+    # {"name": "Liquor", "path": "Liquor/img", "startFrame": 1, "endFrame": 1741, "nz": 4, "ext": "jpg", "anno_path": "Liquor/groundtruth_rect.txt",
+    #     "object_class": "other"},
+    # {"name": "Man", "path": "Man/img", "startFrame": 1, "endFrame": 134, "nz": 4, "ext": "jpg", "anno_path": "Man/groundtruth_rect.txt",
+    #     "object_class": "face"},
+    # {"name": "Matrix", "path": "Matrix/img", "startFrame": 1, "endFrame": 100, "nz": 4, "ext": "jpg", "anno_path": "Matrix/groundtruth_rect.txt",
+    #     "object_class": "person head"},
+    # {"name": "Mhyang", "path": "Mhyang/img", "startFrame": 1, "endFrame": 1490, "nz": 4, "ext": "jpg", "anno_path": "Mhyang/groundtruth_rect.txt",
+    #     "object_class": "face"},
+    # {"name": "MotorRolling", "path": "MotorRolling/img", "startFrame": 1, "endFrame": 164, "nz": 4, "ext": "jpg", "anno_path": "MotorRolling/groundtruth_rect.txt",
+    #     "object_class": "vehicle"},
+    # {"name": "MountainBike", "path": "MountainBike/img", "startFrame": 1, "endFrame": 228, "nz": 4, "ext": "jpg", "anno_path": "MountainBike/groundtruth_rect.txt",
+    #     "object_class": "bicycle"},
+    # {"name": "Panda", "path": "Panda/img", "startFrame": 1, "endFrame": 1000, "nz": 4, "ext": "jpg", "anno_path": "Panda/groundtruth_rect.txt",
+    #     "object_class": "mammal"},
+    # {"name": "RedTeam", "path": "RedTeam/img", "startFrame": 1, "endFrame": 1918, "nz": 4, "ext": "jpg", "anno_path": "RedTeam/groundtruth_rect.txt",
+    #     "object_class": "vehicle"},
+    # {"name": "Rubik", "path": "Rubik/img", "startFrame": 1, "endFrame": 1997, "nz": 4, "ext": "jpg", "anno_path": "Rubik/groundtruth_rect.txt",
+    #     "object_class": "other"},
+    # {"name": "Shaking", "path": "Shaking/img", "startFrame": 1, "endFrame": 365, "nz": 4, "ext": "jpg", "anno_path": "Shaking/groundtruth_rect.txt",
+    #     "object_class": "face"},
+    # {"name": "Singer1", "path": "Singer1/img", "startFrame": 1, "endFrame": 351, "nz": 4, "ext": "jpg", "anno_path": "Singer1/groundtruth_rect.txt",
+    #     "object_class": "person"},
+    # {"name": "Singer2", "path": "Singer2/img", "startFrame": 1, "endFrame": 366, "nz": 4, "ext": "jpg", "anno_path": "Singer2/groundtruth_rect.txt",
+    #     "object_class": "person"},
+    # {"name": "Skater", "path": "Skater/img", "startFrame": 1, "endFrame": 160, "nz": 4, "ext": "jpg", "anno_path": "Skater/groundtruth_rect.txt",
+    #     "object_class": "person"},
+    # {"name": "Skater2", "path": "Skater2/img", "startFrame": 1, "endFrame": 435, "nz": 4, "ext": "jpg", "anno_path": "Skater2/groundtruth_rect.txt",
+    #     "object_class": "person"},
+    # {"name": "Skating1", "path": "Skating1/img", "startFrame": 1, "endFrame": 400, "nz": 4, "ext": "jpg", "anno_path": "Skating1/groundtruth_rect.txt",
+    #     "object_class": "person"},
+    # {"name": "Skating2_1", "path": "Skating2/img", "startFrame": 1, "endFrame": 473, "nz": 4, "ext": "jpg", "anno_path": "Skating2/groundtruth_rect.1.txt",
+    #     "object_class": "person"},
+    # {"name": "Skating2_2", "path": "Skating2/img", "startFrame": 1, "endFrame": 473, "nz": 4, "ext": "jpg", "anno_path": "Skating2/groundtruth_rect.2.txt",
+    #     "object_class": "person"},
+    # {"name": "Skiing", "path": "Skiing/img", "startFrame": 1, "endFrame": 81, "nz": 4, "ext": "jpg", "anno_path": "Skiing/groundtruth_rect.txt",
+    #     "object_class": "person"},
+    # {"name": "Soccer", "path": "Soccer/img", "startFrame": 1, "endFrame": 392, "nz": 4, "ext": "jpg", "anno_path": "Soccer/groundtruth_rect.txt",
+    #     "object_class": "face"},
+    # {"name": "Subway", "path": "Subway/img", "startFrame": 1, "endFrame": 175, "nz": 4, "ext": "jpg", "anno_path": "Subway/groundtruth_rect.txt",
+    #     "object_class": "person"},
+    # {"name": "Surfer", "path": "Surfer/img", "startFrame": 1, "endFrame": 376, "nz": 4, "ext": "jpg", "anno_path": "Surfer/groundtruth_rect.txt",
+    #     "object_class": "person head"},
+    # {"name": "Suv", "path": "Suv/img", "startFrame": 1, "endFrame": 945, "nz": 4, "ext": "jpg", "anno_path": "Suv/groundtruth_rect.txt",
+    #     "object_class": "car"},
+    # {"name": "Sylvester", "path": "Sylvester/img", "startFrame": 1, "endFrame": 1345, "nz": 4, "ext": "jpg", "anno_path": "Sylvester/groundtruth_rect.txt",
+    #     "object_class": "other"},
+    # {"name": "Tiger1", "path": "Tiger1/img", "startFrame": 1, "endFrame": 354, "nz": 4, "ext": "jpg", "anno_path": "Tiger1/groundtruth_rect.txt", "initOmit": 5,
+    #     "object_class": "other"},
+    # {"name": "Tiger2", "path": "Tiger2/img", "startFrame": 1, "endFrame": 365, "nz": 4, "ext": "jpg", "anno_path": "Tiger2/groundtruth_rect.txt",
+    #     "object_class": "other"},
+    # {"name": "Toy", "path": "Toy/img", "startFrame": 1, "endFrame": 271, "nz": 4, "ext": "jpg", "anno_path": "Toy/groundtruth_rect.txt",
+    #     "object_class": "other"},
+    # {"name": "Trans", "path": "Trans/img", "startFrame": 1, "endFrame": 124, "nz": 4, "ext": "jpg", "anno_path": "Trans/groundtruth_rect.txt",
+    #     "object_class": "other"},
+    # {"name": "Trellis", "path": "Trellis/img", "startFrame": 1, "endFrame": 569, "nz": 4, "ext": "jpg", "anno_path": "Trellis/groundtruth_rect.txt",
+    #     "object_class": "face"},
+    # {"name": "Twinnings", "path": "Twinnings/img", "startFrame": 1, "endFrame": 472, "nz": 4, "ext": "jpg", "anno_path": "Twinnings/groundtruth_rect.txt",
+    #     "object_class": "other"},
+    # {"name": "Vase", "path": "Vase/img", "startFrame": 1, "endFrame": 271, "nz": 4, "ext": "jpg", "anno_path": "Vase/groundtruth_rect.txt",
+    #     "object_class": "other"},
+    # {"name": "Walking", "path": "Walking/img", "startFrame": 1, "endFrame": 412, "nz": 4, "ext": "jpg", "anno_path": "Walking/groundtruth_rect.txt",
+    #     "object_class": "person"},
+    # {"name": "Walking2", "path": "Walking2/img", "startFrame": 1, "endFrame": 500, "nz": 4, "ext": "jpg", "anno_path": "Walking2/groundtruth_rect.txt",
+    #     "object_class": "person"},
+    # {"name": "Woman", "path": "Woman/img", "startFrame": 1, "endFrame": 597, "nz": 4, "ext": "jpg", "anno_path": "Woman/groundtruth_rect.txt",
+    #     "object_class": "person"}
+]
diff --git a/tools/eval/datasets/otb2015.py b/tools/eval/datasets/otb2015.py
deleted file mode 100644
index c8c44d07..00000000
--- a/tools/eval/datasets/otb2015.py
+++ /dev/null
@@ -1,333 +0,0 @@
-import os
-import json
-import numpy as np
-import cv2 as cv
-from colorama import Style, Fore
-from tqdm import tqdm
-from multiprocessing import Pool, cpu_count
-
-def overlap_ratio(rect1, rect2):
-    """Calculate the Intersection over Union (IoU) overlap ratio between two sets of rectangles."""  
-    tl = np.maximum(rect1[:, :2], rect2[:, :2])
-    br = np.minimum(rect1[:, :2] + rect1[:, 2:] - 1.0, rect2[:, :2] + rect2[:, 2:] - 1.0)
-    sz = np.maximum(br - tl + 1.0, 0)
-
-    # Area
-    intersection = np.prod(sz, axis=1)
-    union = np.prod(rect1[:, 2:], axis=1) + np.prod(rect2[:, 2:], axis=1) - intersection
-    iou = np.clip(intersection / union, 0, 1)
-    return iou
-
-
-def success_overlap(gt_bb, result_bb, n_frame):
-    """Calculate the success rate based on the overlap ratio between ground truth and predicted bounding boxes."""
-    thresholds_overlap = np.arange(0, 1.05, 0.05)
-    success = np.zeros(len(thresholds_overlap))
-    mask = np.sum(gt_bb[:, 2:] > 0, axis=1) == 2
-    iou = overlap_ratio(gt_bb[mask], result_bb[mask])
-    for i, threshold in enumerate(thresholds_overlap):
-        success[i] = np.sum(iou > threshold) / n_frame
-    return success
-
-def success_error(gt_center, result_center, thresholds, n_frame):
-    """Calculate the success rate based on the error distance between ground truth and predicted bounding box centers."""
-    success = np.zeros(len(thresholds))
-    mask = np.sum(gt_center > 0, axis=1) == 2
-    dist = np.linalg.norm(gt_center[mask] - result_center[mask], axis=1)
-    for i, threshold in enumerate(thresholds):
-        success[i] = np.sum(dist <= threshold) / n_frame
-    return success
-
-class OPEBenchmark:
-    def __init__(self, dataset):
-        self.dataset = dataset
-
-    def convert_bb_to_center(self, bboxes):
-        """Convert bounding box coordinates to centers."""
-        return np.array([(bboxes[:, 0] + (bboxes[:, 2] - 1) / 2),
-                         (bboxes[:, 1] + (bboxes[:, 3] - 1) / 2)]).T
-
-    def convert_bb_to_norm_center(self, bboxes, gt_wh):
-        """Convert bounding box coordinates to normalized centers."""
-        return self.convert_bb_to_center(bboxes) / (gt_wh + 1e-16)
-
-    def evaluate(self, metric):
-        """Evaluate the tracking performance based on the specified metric."""
-        evaluation_ret = {}
-        for video in self.dataset:
-            gt_traj = np.array(video.gt_traj)
-            tracker_traj = np.array(video.load_tracker())
-            n_frame = len(gt_traj)
-            if hasattr(video, 'absent'):
-                gt_traj = gt_traj[video.absent == 1]
-                tracker_traj = tracker_traj[video.absent == 1]
-            if metric == 'success':
-                evaluation_ret[video.name] = success_overlap(gt_traj, tracker_traj, n_frame)
-            elif metric == 'precision':
-                gt_center = self.convert_bb_to_center(gt_traj)
-                tracker_center = self.convert_bb_to_center(tracker_traj)
-                thresholds = np.arange(0, 51, 1)
-                evaluation_ret[video.name] = success_error(gt_center, tracker_center, thresholds, n_frame)
-            elif metric == 'norm_precision':
-                gt_center_norm = self.convert_bb_to_norm_center(gt_traj, gt_traj[:, 2:4])
-                tracker_center_norm = self.convert_bb_to_norm_center(tracker_traj, gt_traj[:, 2:4])
-                thresholds = np.arange(0, 51, 1) / 100
-                evaluation_ret[video.name] = success_error(gt_center_norm, tracker_center_norm, thresholds, n_frame)
-        return {"tracker": evaluation_ret}
-
-    def show_result(self, success, precision=None, norm_precision=None, show_video_level=False, height_threshold=0.6):
-        tracker_auc = {tracker_name: np.mean(list(scores.values())) for tracker_name, scores in success.items()}
-        tracker_auc = sorted(tracker_auc.items(), key=lambda x: x[1], reverse=True)[:20]
-        tracker_names = [x[0] for x in tracker_auc]
-        tracker_name_len = max(max(len(x) for x in success.keys()) + 2, 12)
-        header = ("|{:^" + str(tracker_name_len) + "}|{:^9}|{:^11}|{:^16}|").format(
-            "Tracker name", "AUC", "Precision", "Norm Precision")
-        formatter = "|{:^" + str(tracker_name_len) + "}|{:^9.3f}|{:^11.3f}|{:^16.3f}|"
-
-        print('-' * len(header))
-        print(header)
-        print('-' * len(header))
-
-        for tracker_name in tracker_names:
-            success_score = np.mean(list(success[tracker_name].values()))
-            precision_score = np.mean(list(precision[tracker_name].values()), axis=0)[20] if precision else 0
-            norm_precision_score = np.mean(list(norm_precision[tracker_name].values()), axis=0)[20] if norm_precision else 0
-            print(formatter.format(tracker_name, success_score, precision_score, norm_precision_score))
-
-        print('-' * len(header))
-
-        if show_video_level and len(success) < 10 and precision and len(precision) < 10:
-            print("\n\n")
-            header1 = "|{:^21}|".format("Tracker name")
-            header2 = "|{:^21}|".format("Video name")
-
-            for tracker_name in success.keys():
-                header1 += ("{:^21}|").format(tracker_name)
-                header2 += "{:^9}|{:^11}|".format("success", "precision")
-
-            print('-' * len(header1))
-            print(header1)
-            print('-' * len(header1))
-            print(header2)
-            print('-' * len(header1))
-
-            for video, scores in success.items():
-                row = "|{:^21}|".format(video)
-
-                for tracker_name in tracker_names:
-                    success_score = np.mean(success[tracker_name][video])
-                    precision_score = np.mean(precision[tracker_name][video])
-                    success_str = f'{success_score:.3f}' if success_score < height_threshold else f'{success_score:.3f}'
-                    precision_str = f'{precision_score:.3f}' if precision_score < height_threshold else f'{precision_score:.3f}'
-                    row += f"{success_str:^9}|{precision_str:^11}|"
-
-                print(row)
-
-            print('-' * len(header1))
-
-class Video:
-    def __init__(self, name, root, video_dir, init_rect, img_names, gt_rect, attr):
-        self.name = name
-        self.video_dir = video_dir
-        self.init_rect = init_rect
-        self.gt_traj = gt_rect
-        self.attr = attr
-        self.pred_trajs = {}
-        self.img_names = [os.path.join(root, x) for x in img_names]
-        self.imgs = None
-        img = cv.imread(self.img_names[0])
-        assert img is not None, self.img_names[0]
-        self.width = img.shape[1]
-        self.height = img.shape[0]
-
-    def __len__(self):
-        return len(self.img_names)
-
-    def __getitem__(self, idx):
-        if self.imgs is None:
-            return cv.imread(self.img_names[idx]), self.gt_traj[idx]
-        else:
-            return self.imgs[idx], self.gt_traj[idx]
-
-    def __iter__(self):
-        for i in range(len(self.img_names)):
-            if self.imgs is not None:
-                yield self.imgs[i], self.gt_traj[i]
-            else:
-                yield cv.imread(self.img_names[i]), self.gt_traj[i]
-
-    def load_tracker(self):
-        """Load tracker results from file."""
-        traj_file = os.path.join("OTB_results", self.name+'.txt')
-        if not os.path.exists(traj_file):
-            txt_names = {
-                'FleetFace': 'fleetface.txt',
-                'Jogging-1': 'jogging_1.txt',
-                'Jogging-2': 'jogging_2.txt',
-                'Skating2-1': 'skating2_1.txt',
-                'Skating2-2': 'skating2_2.txt',
-                'FaceOcc1': 'faceocc1.txt',
-                'FaceOcc2': 'faceocc2.txt',
-                'Human4-2': 'human4_2.txt'
-            }
-            txt_name = txt_names.get(self.name, self.name[0].lower() + self.name[1:] + '.txt')
-            traj_file = os.path.join("OTB_results", txt_name)
-
-        if os.path.exists(traj_file):
-            with open(traj_file, 'r') as f:
-                pred_traj = [list(map(float, x.strip().split(','))) for x in f.readlines()]
-                if len(pred_traj) != len(self.gt_traj):
-                    print("tracker", len(pred_traj), len(self.gt_traj), self.name)
-                else:
-                    return pred_traj
-        else:
-            print(traj_file)
-
-class OTBDATASET:
-    def __init__(self, root):
-        with open(os.path.join(root, 'OTB.json'), 'r') as f:
-            meta_data = json.load(f)
-        self.root = root
-        self.videos = {}
-        pbar = tqdm(meta_data.keys(), desc='Loading OTB', ncols=100)
-        for video in pbar:
-            pbar.set_postfix_str(video)
-            self.videos[video] = Video(video,
-                                       self.root,
-                                       meta_data[video]['video_dir'],
-                                       meta_data[video]['init_rect'],
-                                       meta_data[video]['img_names'],
-                                       meta_data[video]['gt_rect'],
-                                       meta_data[video]['attr'])
-        self.attr = {'ALL': list(self.videos.keys())}
-        all_attributes = [x.attr for x in self.videos.values()]
-        all_attributes = set(sum(all_attributes, []))
-        for attr_ in all_attributes:
-            self.attr[attr_] = []
-        for k, v in self.videos.items():
-            for attr_ in v.attr:
-                self.attr[attr_].append(k)
-
-    def __getitem__(self, idx):
-        if isinstance(idx, str):
-            return self.videos[idx]
-        elif isinstance(idx, int):
-            sorted_keys = sorted(list(self.videos.keys()))
-            return self.videos[sorted_keys[idx]]
-
-    def __len__(self):
-        return len(self.videos)
-
-    def __iter__(self):
-        sorted_keys = sorted(list(self.videos.keys()))
-        for key in sorted_keys:
-            yield self.videos[key]
-
-def get_axis_aligned_bbox(region):
-    """Converts a region to (cx, cy, w, h) representing an axis-aligned box."""
-    nv = region.size
-    if nv == 8:
-        cx = np.mean(region[0::2])
-        cy = np.mean(region[1::2])
-        x1 = min(region[0::2])
-        x2 = max(region[0::2])
-        y1 = min(region[1::2])
-        y2 = max(region[1::2])
-        A1 = np.linalg.norm(region[0:2] - region[2:4]) * np.linalg.norm(region[2:4] - region[4:6])
-        A2 = (x2 - x1) * (y2 - y1)
-        s = np.sqrt(A1 / A2)
-        w = s * (x2 - x1) + 1
-        h = s * (y2 - y1) + 1
-    else:
-        x, y, w, h = region
-        cx = x + w / 2
-        cy = y + h / 2
-    return cx, cy, w, h
-
-class OTB2015:
-    def __init__(self, root):
-        # Go up one if directory is provided
-        root = os.path.abspath(root)
-        if root.endswith("OTB2015"):
-            root = os.path.dirname(root)
-        print(root)
-
-        # Unzip the OTB100.zip file
-        if os.path.exists(f'{root}/OTB100.zip'):
-            os.system(f'unzip -q "{os.path.join(root, "OTB100.zip")}" -d "{root}"')
-            os.remove(f'{root}/OTB100.zip')
-
-        # Move the JSON label in if it's outside
-        if os.path.exists(f'{root}/OTB.json'):
-            os.rename(f'{root}/OTB.json', f'{root}/OTB100/OTB.json')
-
-        if os.path.exists(f'{root}/OTB100'):
-            original_directories = ['Jogging', 'Skating2', 'Human4']
-            updated_directories = ['Jogging-1', 'Jogging-2', 'Skating2-1', 'Skating2-2', 'Human4-2', 'OTB.json']
-            original_exist = all(os.path.exists(f'{root}/OTB100/{dir}') for dir in original_directories)
-            updated_exist = all(os.path.exists(f'{root}/OTB100/{dir}') for dir in updated_directories)
-            if original_exist:
-                os.rename(f'{root}/OTB100/Jogging', f'{root}/OTB100/Jogging-1')
-                os.rename(f'{root}/OTB100/Skating2', f'{root}/OTB100/Skating2-1')
-                os.rename(f'{root}/OTB100/Human4', f'{root}/OTB100/Human4-2')
-                os.system(f'cp -r "{root}/OTB100/Jogging-1" "{root}/OTB100/Jogging-2"')
-                os.system(f'cp -r "{root}/OTB100/Skating2-1" "{root}/OTB100/Skating2-2"')
-            elif not updated_exist:
-                raise RuntimeError("Not all files needed for setup are present.")
-
-        self.root = f'{root}/OTB2015'
-        self.dataset = OTBDATASET(self.root)
-
-    @property
-    def name(self):
-        return self.__class__.__name__
-
-    def eval(self, model):
-        for v_idx, video in enumerate(self.dataset):
-            total_time = 0
-            pred_bboxes = []
-            scores = []
-            track_times = []
-
-            for idx, (img, gt_bbox) in enumerate(video):
-                img = cv.cvtColor(img, cv.COLOR_BGR2RGB)
-                tic = cv.getTickCount()
-
-                if idx == 0:
-                    cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox))
-                    gt_bbox_ = (int(cx - w / 2), int(cy - h / 2), int(w), int(h))
-                    model.init(img, gt_bbox_)
-                    pred_bbox = gt_bbox_
-                    scores.append(None)
-                else:
-                    isLocated, bbox, score = model.infer(img)
-                    pred_bbox = bbox
-                    scores.append(score)
-
-                pred_bboxes.append(pred_bbox)
-                toc = (cv.getTickCount() - tic) / cv.getTickFrequency()
-                total_time += toc
-                track_times.append(toc)
-
-            model_path = os.path.join('OTB_results')
-            os.makedirs(model_path, exist_ok=True)
-            result_path = os.path.join(model_path, '{}.txt'.format(video.name))
-            with open(result_path, 'w') as f:
-                for bbox in pred_bboxes:
-                    f.write(','.join(map(str, bbox)) + '\n')
-
-            avg_fps = len(video) / total_time if total_time > 0 else 0
-            print('({:3d}) Video: {:12s} Time: {:5.1f}s Speed: {:3.1f}fps'.format(
-                v_idx + 1, video.name, total_time, avg_fps))
-
-    def print_result(self):
-        benchmark = OPEBenchmark(self.dataset)
-        num_cores = cpu_count()
-        evaluation_results = {}
-        metrics = ["success", "precision", "norm_precision"]
-        for metric in metrics:
-            with Pool(processes=min(num_cores, max(1, num_cores - 1))) as pool:
-                for ret in tqdm(pool.imap_unordered(benchmark.evaluate, [metric], 1), desc=f'eval {metric}', total=1, ncols=100):
-                    evaluation_results[metric] = ret
-
-        benchmark.show_result(**evaluation_results, show_video_level=False)
diff --git a/tools/eval/eval.py b/tools/eval/eval.py
index f5d66e9d..0fd3b553 100644
--- a/tools/eval/eval.py
+++ b/tools/eval/eval.py
@@ -122,8 +122,8 @@
         mini_supervisely=dict(
             name="MiniSupervisely",
             topic="human_segmentation"),
-        otb2015=dict(
-            name="OTB2015",
+        otb100=dict(
+            name="OTB100",
             topic="object_tracking"),
 )
 

From 27694a208a9d82de9ec81a12ddeae1260fb91f1c Mon Sep 17 00:00:00 2001
From: Ryan Lee <ryanl1288@hotmail.com>
Date: Mon, 1 Apr 2024 03:23:06 -0400
Subject: [PATCH 09/17] Uncomment all sequence infos

---
 tools/eval/datasets/otb100.py | 396 +++++++++++++++++-----------------
 1 file changed, 198 insertions(+), 198 deletions(-)

diff --git a/tools/eval/datasets/otb100.py b/tools/eval/datasets/otb100.py
index 904587f2..fa4b0e7e 100644
--- a/tools/eval/datasets/otb100.py
+++ b/tools/eval/datasets/otb100.py
@@ -335,202 +335,202 @@ def print_result(self):
 sequence_info_list = [
     {"name": "Basketball", "path": "Basketball/img", "startFrame": 1, "endFrame": 725, "nz": 4, "ext": "jpg", "anno_path": "Basketball/groundtruth_rect.txt",
         "object_class": "person"},
-    # {"name": "Biker", "path": "Biker/img", "startFrame": 1, "endFrame": 142, "nz": 4, "ext": "jpg", "anno_path": "Biker/groundtruth_rect.txt",
-    #     "object_class": "person head"},
-    # {"name": "Bird1", "path": "Bird1/img", "startFrame": 1, "endFrame": 408, "nz": 4, "ext": "jpg", "anno_path": "Bird1/groundtruth_rect.txt",
-    #     "object_class": "bird"},
-    # {"name": "Bird2", "path": "Bird2/img", "startFrame": 1, "endFrame": 99, "nz": 4, "ext": "jpg", "anno_path": "Bird2/groundtruth_rect.txt",
-    #     "object_class": "bird"},
-    # {"name": "BlurBody", "path": "BlurBody/img", "startFrame": 1, "endFrame": 334, "nz": 4, "ext": "jpg", "anno_path": "BlurBody/groundtruth_rect.txt",
-    #     "object_class": "person"},
-    # {"name": "BlurCar1", "path": "BlurCar1/img", "startFrame": 247, "endFrame": 988, "nz": 4, "ext": "jpg", "anno_path": "BlurCar1/groundtruth_rect.txt",
-    #     "object_class": "car"},
-    # {"name": "BlurCar2", "path": "BlurCar2/img", "startFrame": 1, "endFrame": 585, "nz": 4, "ext": "jpg", "anno_path": "BlurCar2/groundtruth_rect.txt",
-    #     "object_class": "car"},
-    # {"name": "BlurCar3", "path": "BlurCar3/img", "startFrame": 3, "endFrame": 359, "nz": 4, "ext": "jpg", "anno_path": "BlurCar3/groundtruth_rect.txt",
-    #     "object_class": "car"},
-    # {"name": "BlurCar4", "path": "BlurCar4/img", "startFrame": 18, "endFrame": 397, "nz": 4, "ext": "jpg", "anno_path": "BlurCar4/groundtruth_rect.txt",
-    #     "object_class": "car"},
-    # {"name": "BlurFace", "path": "BlurFace/img", "startFrame": 1, "endFrame": 493, "nz": 4, "ext": "jpg", "anno_path": "BlurFace/groundtruth_rect.txt",
-    #     "object_class": "face"},
-    # {"name": "BlurOwl", "path": "BlurOwl/img", "startFrame": 1, "endFrame": 631, "nz": 4, "ext": "jpg", "anno_path": "BlurOwl/groundtruth_rect.txt",
-    #     "object_class": "other"},
-    # {"name": "Board", "path": "Board/img", "startFrame": 1, "endFrame": 698, "nz": 5, "ext": "jpg", "anno_path": "Board/groundtruth_rect.txt",
-    #     "object_class": "other"},
-    # {"name": "Bolt", "path": "Bolt/img", "startFrame": 1, "endFrame": 350, "nz": 4, "ext": "jpg", "anno_path": "Bolt/groundtruth_rect.txt",
-    #     "object_class": "person"},
-    # {"name": "Bolt2", "path": "Bolt2/img", "startFrame": 1, "endFrame": 293, "nz": 4, "ext": "jpg", "anno_path": "Bolt2/groundtruth_rect.txt",
-    #     "object_class": "person"},
-    # {"name": "Box", "path": "Box/img", "startFrame": 1, "endFrame": 1161, "nz": 4, "ext": "jpg", "anno_path": "Box/groundtruth_rect.txt",
-    #     "object_class": "other"},
-    # {"name": "Boy", "path": "Boy/img", "startFrame": 1, "endFrame": 602, "nz": 4, "ext": "jpg", "anno_path": "Boy/groundtruth_rect.txt",
-    #     "object_class": "face"},
-    # {"name": "Car1", "path": "Car1/img", "startFrame": 1, "endFrame": 1020, "nz": 4, "ext": "jpg", "anno_path": "Car1/groundtruth_rect.txt",
-    #     "object_class": "car"},
-    # {"name": "Car2", "path": "Car2/img", "startFrame": 1, "endFrame": 913, "nz": 4, "ext": "jpg", "anno_path": "Car2/groundtruth_rect.txt",
-    #     "object_class": "car"},
-    # {"name": "Car24", "path": "Car24/img", "startFrame": 1, "endFrame": 3059, "nz": 4, "ext": "jpg", "anno_path": "Car24/groundtruth_rect.txt",
-    #     "object_class": "car"},
-    # {"name": "Car4", "path": "Car4/img", "startFrame": 1, "endFrame": 659, "nz": 4, "ext": "jpg", "anno_path": "Car4/groundtruth_rect.txt",
-    #     "object_class": "car"},
-    # {"name": "CarDark", "path": "CarDark/img", "startFrame": 1, "endFrame": 393, "nz": 4, "ext": "jpg", "anno_path": "CarDark/groundtruth_rect.txt",
-    #     "object_class": "car"},
-    # {"name": "CarScale", "path": "CarScale/img", "startFrame": 1, "endFrame": 252, "nz": 4, "ext": "jpg", "anno_path": "CarScale/groundtruth_rect.txt",
-    #     "object_class": "car"},
-    # {"name": "ClifBar", "path": "ClifBar/img", "startFrame": 1, "endFrame": 472, "nz": 4, "ext": "jpg", "anno_path": "ClifBar/groundtruth_rect.txt",
-    #     "object_class": "other"},
-    # {"name": "Coke", "path": "Coke/img", "startFrame": 1, "endFrame": 291, "nz": 4, "ext": "jpg", "anno_path": "Coke/groundtruth_rect.txt",
-    #     "object_class": "other"},
-    # {"name": "Couple", "path": "Couple/img", "startFrame": 1, "endFrame": 140, "nz": 4, "ext": "jpg", "anno_path": "Couple/groundtruth_rect.txt",
-    #     "object_class": "person"},
-    # {"name": "Coupon", "path": "Coupon/img", "startFrame": 1, "endFrame": 327, "nz": 4, "ext": "jpg", "anno_path": "Coupon/groundtruth_rect.txt",
-    #     "object_class": "other"},
-    # {"name": "Crossing", "path": "Crossing/img", "startFrame": 1, "endFrame": 120, "nz": 4, "ext": "jpg", "anno_path": "Crossing/groundtruth_rect.txt",
-    #     "object_class": "person"},
-    # {"name": "Crowds", "path": "Crowds/img", "startFrame": 1, "endFrame": 347, "nz": 4, "ext": "jpg", "anno_path": "Crowds/groundtruth_rect.txt",
-    #     "object_class": "person"},
-    # {"name": "Dancer", "path": "Dancer/img", "startFrame": 1, "endFrame": 225, "nz": 4, "ext": "jpg", "anno_path": "Dancer/groundtruth_rect.txt",
-    #     "object_class": "person"},
-    # {"name": "Dancer2", "path": "Dancer2/img", "startFrame": 1, "endFrame": 150, "nz": 4, "ext": "jpg", "anno_path": "Dancer2/groundtruth_rect.txt",
-    #     "object_class": "person"},
-    # {"name": "David", "path": "David/img", "startFrame": 300, "endFrame": 770, "nz": 4, "ext": "jpg", "anno_path": "David/groundtruth_rect.txt",
-    #     "object_class": "face"},
-    # {"name": "David2", "path": "David2/img", "startFrame": 1, "endFrame": 537, "nz": 4, "ext": "jpg", "anno_path": "David2/groundtruth_rect.txt",
-    #     "object_class": "face"},
-    # {"name": "David3", "path": "David3/img", "startFrame": 1, "endFrame": 252, "nz": 4, "ext": "jpg", "anno_path": "David3/groundtruth_rect.txt",
-    #     "object_class": "person"},
-    # {"name": "Deer", "path": "Deer/img", "startFrame": 1, "endFrame": 71, "nz": 4, "ext": "jpg", "anno_path": "Deer/groundtruth_rect.txt",
-    #     "object_class": "mammal"},
-    # {"name": "Diving", "path": "Diving/img", "startFrame": 1, "endFrame": 215, "nz": 4, "ext": "jpg", "anno_path": "Diving/groundtruth_rect.txt",
-    #     "object_class": "person"},
-    # {"name": "Dog", "path": "Dog/img", "startFrame": 1, "endFrame": 127, "nz": 4, "ext": "jpg", "anno_path": "Dog/groundtruth_rect.txt",
-    #     "object_class": "dog"},
-    # {"name": "Dog1", "path": "Dog1/img", "startFrame": 1, "endFrame": 1350, "nz": 4, "ext": "jpg", "anno_path": "Dog1/groundtruth_rect.txt",
-    #     "object_class": "dog"},
-    # {"name": "Doll", "path": "Doll/img", "startFrame": 1, "endFrame": 3872, "nz": 4, "ext": "jpg", "anno_path": "Doll/groundtruth_rect.txt",
-    #     "object_class": "other"},
-    # {"name": "DragonBaby", "path": "DragonBaby/img", "startFrame": 1, "endFrame": 113, "nz": 4, "ext": "jpg", "anno_path": "DragonBaby/groundtruth_rect.txt",
-    #     "object_class": "face"},
-    # {"name": "Dudek", "path": "Dudek/img", "startFrame": 1, "endFrame": 1145, "nz": 4, "ext": "jpg", "anno_path": "Dudek/groundtruth_rect.txt",
-    #     "object_class": "face"},
-    # {"name": "FaceOcc1", "path": "FaceOcc1/img", "startFrame": 1, "endFrame": 892, "nz": 4, "ext": "jpg", "anno_path": "FaceOcc1/groundtruth_rect.txt",
-    #     "object_class": "face"},
-    # {"name": "FaceOcc2", "path": "FaceOcc2/img", "startFrame": 1, "endFrame": 812, "nz": 4, "ext": "jpg", "anno_path": "FaceOcc2/groundtruth_rect.txt",
-    #     "object_class": "face"},
-    # {"name": "Fish", "path": "Fish/img", "startFrame": 1, "endFrame": 476, "nz": 4, "ext": "jpg", "anno_path": "Fish/groundtruth_rect.txt",
-    #     "object_class": "other"},
-    # {"name": "FleetFace", "path": "FleetFace/img", "startFrame": 1, "endFrame": 707, "nz": 4, "ext": "jpg", "anno_path": "FleetFace/groundtruth_rect.txt",
-    #     "object_class": "face"},
-    # {"name": "Football", "path": "Football/img", "startFrame": 1, "endFrame": 362, "nz": 4, "ext": "jpg", "anno_path": "Football/groundtruth_rect.txt",
-    #     "object_class": "person head"},
-    # {"name": "Football1", "path": "Football1/img", "startFrame": 1, "endFrame": 74, "nz": 4, "ext": "jpg", "anno_path": "Football1/groundtruth_rect.txt",
-    #     "object_class": "face"},
-    # {"name": "Freeman1", "path": "Freeman1/img", "startFrame": 1, "endFrame": 326, "nz": 4, "ext": "jpg", "anno_path": "Freeman1/groundtruth_rect.txt",
-    #     "object_class": "face"},
-    # {"name": "Freeman3", "path": "Freeman3/img", "startFrame": 1, "endFrame": 460, "nz": 4, "ext": "jpg", "anno_path": "Freeman3/groundtruth_rect.txt",
-    #     "object_class": "face"},
-    # {"name": "Freeman4", "path": "Freeman4/img", "startFrame": 1, "endFrame": 283, "nz": 4, "ext": "jpg", "anno_path": "Freeman4/groundtruth_rect.txt",
-    #     "object_class": "face"},
-    # {"name": "Girl", "path": "Girl/img", "startFrame": 1, "endFrame": 500, "nz": 4, "ext": "jpg", "anno_path": "Girl/groundtruth_rect.txt",
-    #     "object_class": "face"},
-    # {"name": "Girl2", "path": "Girl2/img", "startFrame": 1, "endFrame": 1500, "nz": 4, "ext": "jpg", "anno_path": "Girl2/groundtruth_rect.txt",
-    #     "object_class": "person"},
-    # {"name": "Gym", "path": "Gym/img", "startFrame": 1, "endFrame": 767, "nz": 4, "ext": "jpg", "anno_path": "Gym/groundtruth_rect.txt",
-    #     "object_class": "person"},
-    # {"name": "Human2", "path": "Human2/img", "startFrame": 1, "endFrame": 1128, "nz": 4, "ext": "jpg", "anno_path": "Human2/groundtruth_rect.txt",
-    #     "object_class": "person"},
-    # {"name": "Human3", "path": "Human3/img", "startFrame": 1, "endFrame": 1698, "nz": 4, "ext": "jpg", "anno_path": "Human3/groundtruth_rect.txt",
-    #     "object_class": "person"},
-    # {"name": "Human4", "path": "Human4/img", "startFrame": 1, "endFrame": 667, "nz": 4, "ext": "jpg", "anno_path": "Human4/groundtruth_rect.2.txt",
-    #     "object_class": "person"},
-    # {"name": "Human5", "path": "Human5/img", "startFrame": 1, "endFrame": 713, "nz": 4, "ext": "jpg", "anno_path": "Human5/groundtruth_rect.txt",
-    #     "object_class": "person"},
-    # {"name": "Human6", "path": "Human6/img", "startFrame": 1, "endFrame": 792, "nz": 4, "ext": "jpg", "anno_path": "Human6/groundtruth_rect.txt",
-    #     "object_class": "person"},
-    # {"name": "Human7", "path": "Human7/img", "startFrame": 1, "endFrame": 250, "nz": 4, "ext": "jpg", "anno_path": "Human7/groundtruth_rect.txt",
-    #     "object_class": "person"},
-    # {"name": "Human8", "path": "Human8/img", "startFrame": 1, "endFrame": 128, "nz": 4, "ext": "jpg", "anno_path": "Human8/groundtruth_rect.txt",
-    #     "object_class": "person"},
-    # {"name": "Human9", "path": "Human9/img", "startFrame": 1, "endFrame": 305, "nz": 4, "ext": "jpg", "anno_path": "Human9/groundtruth_rect.txt",
-    #     "object_class": "person"},
-    # {"name": "Ironman", "path": "Ironman/img", "startFrame": 1, "endFrame": 166, "nz": 4, "ext": "jpg", "anno_path": "Ironman/groundtruth_rect.txt",
-    #     "object_class": "person head"},
-    # {"name": "Jogging_1", "path": "Jogging/img", "startFrame": 1, "endFrame": 307, "nz": 4, "ext": "jpg", "anno_path": "Jogging/groundtruth_rect.1.txt",
-    #     "object_class": "person"},
-    # {"name": "Jogging_2", "path": "Jogging/img", "startFrame": 1, "endFrame": 307, "nz": 4, "ext": "jpg", "anno_path": "Jogging/groundtruth_rect.2.txt",
-    #     "object_class": "person"},
-    # {"name": "Jump", "path": "Jump/img", "startFrame": 1, "endFrame": 122, "nz": 4, "ext": "jpg", "anno_path": "Jump/groundtruth_rect.txt",
-    #     "object_class": "person"},
-    # {"name": "Jumping", "path": "Jumping/img", "startFrame": 1, "endFrame": 313, "nz": 4, "ext": "jpg", "anno_path": "Jumping/groundtruth_rect.txt",
-    #     "object_class": "face"},
-    # {"name": "KiteSurf", "path": "KiteSurf/img", "startFrame": 1, "endFrame": 84, "nz": 4, "ext": "jpg", "anno_path": "KiteSurf/groundtruth_rect.txt",
-    #     "object_class": "face"},
-    # {"name": "Lemming", "path": "Lemming/img", "startFrame": 1, "endFrame": 1336, "nz": 4, "ext": "jpg", "anno_path": "Lemming/groundtruth_rect.txt",
-    #     "object_class": "other"},
-    # {"name": "Liquor", "path": "Liquor/img", "startFrame": 1, "endFrame": 1741, "nz": 4, "ext": "jpg", "anno_path": "Liquor/groundtruth_rect.txt",
-    #     "object_class": "other"},
-    # {"name": "Man", "path": "Man/img", "startFrame": 1, "endFrame": 134, "nz": 4, "ext": "jpg", "anno_path": "Man/groundtruth_rect.txt",
-    #     "object_class": "face"},
-    # {"name": "Matrix", "path": "Matrix/img", "startFrame": 1, "endFrame": 100, "nz": 4, "ext": "jpg", "anno_path": "Matrix/groundtruth_rect.txt",
-    #     "object_class": "person head"},
-    # {"name": "Mhyang", "path": "Mhyang/img", "startFrame": 1, "endFrame": 1490, "nz": 4, "ext": "jpg", "anno_path": "Mhyang/groundtruth_rect.txt",
-    #     "object_class": "face"},
-    # {"name": "MotorRolling", "path": "MotorRolling/img", "startFrame": 1, "endFrame": 164, "nz": 4, "ext": "jpg", "anno_path": "MotorRolling/groundtruth_rect.txt",
-    #     "object_class": "vehicle"},
-    # {"name": "MountainBike", "path": "MountainBike/img", "startFrame": 1, "endFrame": 228, "nz": 4, "ext": "jpg", "anno_path": "MountainBike/groundtruth_rect.txt",
-    #     "object_class": "bicycle"},
-    # {"name": "Panda", "path": "Panda/img", "startFrame": 1, "endFrame": 1000, "nz": 4, "ext": "jpg", "anno_path": "Panda/groundtruth_rect.txt",
-    #     "object_class": "mammal"},
-    # {"name": "RedTeam", "path": "RedTeam/img", "startFrame": 1, "endFrame": 1918, "nz": 4, "ext": "jpg", "anno_path": "RedTeam/groundtruth_rect.txt",
-    #     "object_class": "vehicle"},
-    # {"name": "Rubik", "path": "Rubik/img", "startFrame": 1, "endFrame": 1997, "nz": 4, "ext": "jpg", "anno_path": "Rubik/groundtruth_rect.txt",
-    #     "object_class": "other"},
-    # {"name": "Shaking", "path": "Shaking/img", "startFrame": 1, "endFrame": 365, "nz": 4, "ext": "jpg", "anno_path": "Shaking/groundtruth_rect.txt",
-    #     "object_class": "face"},
-    # {"name": "Singer1", "path": "Singer1/img", "startFrame": 1, "endFrame": 351, "nz": 4, "ext": "jpg", "anno_path": "Singer1/groundtruth_rect.txt",
-    #     "object_class": "person"},
-    # {"name": "Singer2", "path": "Singer2/img", "startFrame": 1, "endFrame": 366, "nz": 4, "ext": "jpg", "anno_path": "Singer2/groundtruth_rect.txt",
-    #     "object_class": "person"},
-    # {"name": "Skater", "path": "Skater/img", "startFrame": 1, "endFrame": 160, "nz": 4, "ext": "jpg", "anno_path": "Skater/groundtruth_rect.txt",
-    #     "object_class": "person"},
-    # {"name": "Skater2", "path": "Skater2/img", "startFrame": 1, "endFrame": 435, "nz": 4, "ext": "jpg", "anno_path": "Skater2/groundtruth_rect.txt",
-    #     "object_class": "person"},
-    # {"name": "Skating1", "path": "Skating1/img", "startFrame": 1, "endFrame": 400, "nz": 4, "ext": "jpg", "anno_path": "Skating1/groundtruth_rect.txt",
-    #     "object_class": "person"},
-    # {"name": "Skating2_1", "path": "Skating2/img", "startFrame": 1, "endFrame": 473, "nz": 4, "ext": "jpg", "anno_path": "Skating2/groundtruth_rect.1.txt",
-    #     "object_class": "person"},
-    # {"name": "Skating2_2", "path": "Skating2/img", "startFrame": 1, "endFrame": 473, "nz": 4, "ext": "jpg", "anno_path": "Skating2/groundtruth_rect.2.txt",
-    #     "object_class": "person"},
-    # {"name": "Skiing", "path": "Skiing/img", "startFrame": 1, "endFrame": 81, "nz": 4, "ext": "jpg", "anno_path": "Skiing/groundtruth_rect.txt",
-    #     "object_class": "person"},
-    # {"name": "Soccer", "path": "Soccer/img", "startFrame": 1, "endFrame": 392, "nz": 4, "ext": "jpg", "anno_path": "Soccer/groundtruth_rect.txt",
-    #     "object_class": "face"},
-    # {"name": "Subway", "path": "Subway/img", "startFrame": 1, "endFrame": 175, "nz": 4, "ext": "jpg", "anno_path": "Subway/groundtruth_rect.txt",
-    #     "object_class": "person"},
-    # {"name": "Surfer", "path": "Surfer/img", "startFrame": 1, "endFrame": 376, "nz": 4, "ext": "jpg", "anno_path": "Surfer/groundtruth_rect.txt",
-    #     "object_class": "person head"},
-    # {"name": "Suv", "path": "Suv/img", "startFrame": 1, "endFrame": 945, "nz": 4, "ext": "jpg", "anno_path": "Suv/groundtruth_rect.txt",
-    #     "object_class": "car"},
-    # {"name": "Sylvester", "path": "Sylvester/img", "startFrame": 1, "endFrame": 1345, "nz": 4, "ext": "jpg", "anno_path": "Sylvester/groundtruth_rect.txt",
-    #     "object_class": "other"},
-    # {"name": "Tiger1", "path": "Tiger1/img", "startFrame": 1, "endFrame": 354, "nz": 4, "ext": "jpg", "anno_path": "Tiger1/groundtruth_rect.txt", "initOmit": 5,
-    #     "object_class": "other"},
-    # {"name": "Tiger2", "path": "Tiger2/img", "startFrame": 1, "endFrame": 365, "nz": 4, "ext": "jpg", "anno_path": "Tiger2/groundtruth_rect.txt",
-    #     "object_class": "other"},
-    # {"name": "Toy", "path": "Toy/img", "startFrame": 1, "endFrame": 271, "nz": 4, "ext": "jpg", "anno_path": "Toy/groundtruth_rect.txt",
-    #     "object_class": "other"},
-    # {"name": "Trans", "path": "Trans/img", "startFrame": 1, "endFrame": 124, "nz": 4, "ext": "jpg", "anno_path": "Trans/groundtruth_rect.txt",
-    #     "object_class": "other"},
-    # {"name": "Trellis", "path": "Trellis/img", "startFrame": 1, "endFrame": 569, "nz": 4, "ext": "jpg", "anno_path": "Trellis/groundtruth_rect.txt",
-    #     "object_class": "face"},
-    # {"name": "Twinnings", "path": "Twinnings/img", "startFrame": 1, "endFrame": 472, "nz": 4, "ext": "jpg", "anno_path": "Twinnings/groundtruth_rect.txt",
-    #     "object_class": "other"},
-    # {"name": "Vase", "path": "Vase/img", "startFrame": 1, "endFrame": 271, "nz": 4, "ext": "jpg", "anno_path": "Vase/groundtruth_rect.txt",
-    #     "object_class": "other"},
-    # {"name": "Walking", "path": "Walking/img", "startFrame": 1, "endFrame": 412, "nz": 4, "ext": "jpg", "anno_path": "Walking/groundtruth_rect.txt",
-    #     "object_class": "person"},
-    # {"name": "Walking2", "path": "Walking2/img", "startFrame": 1, "endFrame": 500, "nz": 4, "ext": "jpg", "anno_path": "Walking2/groundtruth_rect.txt",
-    #     "object_class": "person"},
-    # {"name": "Woman", "path": "Woman/img", "startFrame": 1, "endFrame": 597, "nz": 4, "ext": "jpg", "anno_path": "Woman/groundtruth_rect.txt",
-    #     "object_class": "person"}
+    {"name": "Biker", "path": "Biker/img", "startFrame": 1, "endFrame": 142, "nz": 4, "ext": "jpg", "anno_path": "Biker/groundtruth_rect.txt",
+        "object_class": "person head"},
+    {"name": "Bird1", "path": "Bird1/img", "startFrame": 1, "endFrame": 408, "nz": 4, "ext": "jpg", "anno_path": "Bird1/groundtruth_rect.txt",
+        "object_class": "bird"},
+    {"name": "Bird2", "path": "Bird2/img", "startFrame": 1, "endFrame": 99, "nz": 4, "ext": "jpg", "anno_path": "Bird2/groundtruth_rect.txt",
+        "object_class": "bird"},
+    {"name": "BlurBody", "path": "BlurBody/img", "startFrame": 1, "endFrame": 334, "nz": 4, "ext": "jpg", "anno_path": "BlurBody/groundtruth_rect.txt",
+        "object_class": "person"},
+    {"name": "BlurCar1", "path": "BlurCar1/img", "startFrame": 247, "endFrame": 988, "nz": 4, "ext": "jpg", "anno_path": "BlurCar1/groundtruth_rect.txt",
+        "object_class": "car"},
+    {"name": "BlurCar2", "path": "BlurCar2/img", "startFrame": 1, "endFrame": 585, "nz": 4, "ext": "jpg", "anno_path": "BlurCar2/groundtruth_rect.txt",
+        "object_class": "car"},
+    {"name": "BlurCar3", "path": "BlurCar3/img", "startFrame": 3, "endFrame": 359, "nz": 4, "ext": "jpg", "anno_path": "BlurCar3/groundtruth_rect.txt",
+        "object_class": "car"},
+    {"name": "BlurCar4", "path": "BlurCar4/img", "startFrame": 18, "endFrame": 397, "nz": 4, "ext": "jpg", "anno_path": "BlurCar4/groundtruth_rect.txt",
+        "object_class": "car"},
+    {"name": "BlurFace", "path": "BlurFace/img", "startFrame": 1, "endFrame": 493, "nz": 4, "ext": "jpg", "anno_path": "BlurFace/groundtruth_rect.txt",
+        "object_class": "face"},
+    {"name": "BlurOwl", "path": "BlurOwl/img", "startFrame": 1, "endFrame": 631, "nz": 4, "ext": "jpg", "anno_path": "BlurOwl/groundtruth_rect.txt",
+        "object_class": "other"},
+    {"name": "Board", "path": "Board/img", "startFrame": 1, "endFrame": 698, "nz": 5, "ext": "jpg", "anno_path": "Board/groundtruth_rect.txt",
+        "object_class": "other"},
+    {"name": "Bolt", "path": "Bolt/img", "startFrame": 1, "endFrame": 350, "nz": 4, "ext": "jpg", "anno_path": "Bolt/groundtruth_rect.txt",
+        "object_class": "person"},
+    {"name": "Bolt2", "path": "Bolt2/img", "startFrame": 1, "endFrame": 293, "nz": 4, "ext": "jpg", "anno_path": "Bolt2/groundtruth_rect.txt",
+        "object_class": "person"},
+    {"name": "Box", "path": "Box/img", "startFrame": 1, "endFrame": 1161, "nz": 4, "ext": "jpg", "anno_path": "Box/groundtruth_rect.txt",
+        "object_class": "other"},
+    {"name": "Boy", "path": "Boy/img", "startFrame": 1, "endFrame": 602, "nz": 4, "ext": "jpg", "anno_path": "Boy/groundtruth_rect.txt",
+        "object_class": "face"},
+    {"name": "Car1", "path": "Car1/img", "startFrame": 1, "endFrame": 1020, "nz": 4, "ext": "jpg", "anno_path": "Car1/groundtruth_rect.txt",
+        "object_class": "car"},
+    {"name": "Car2", "path": "Car2/img", "startFrame": 1, "endFrame": 913, "nz": 4, "ext": "jpg", "anno_path": "Car2/groundtruth_rect.txt",
+        "object_class": "car"},
+    {"name": "Car24", "path": "Car24/img", "startFrame": 1, "endFrame": 3059, "nz": 4, "ext": "jpg", "anno_path": "Car24/groundtruth_rect.txt",
+        "object_class": "car"},
+    {"name": "Car4", "path": "Car4/img", "startFrame": 1, "endFrame": 659, "nz": 4, "ext": "jpg", "anno_path": "Car4/groundtruth_rect.txt",
+        "object_class": "car"},
+    {"name": "CarDark", "path": "CarDark/img", "startFrame": 1, "endFrame": 393, "nz": 4, "ext": "jpg", "anno_path": "CarDark/groundtruth_rect.txt",
+        "object_class": "car"},
+    {"name": "CarScale", "path": "CarScale/img", "startFrame": 1, "endFrame": 252, "nz": 4, "ext": "jpg", "anno_path": "CarScale/groundtruth_rect.txt",
+        "object_class": "car"},
+    {"name": "ClifBar", "path": "ClifBar/img", "startFrame": 1, "endFrame": 472, "nz": 4, "ext": "jpg", "anno_path": "ClifBar/groundtruth_rect.txt",
+        "object_class": "other"},
+    {"name": "Coke", "path": "Coke/img", "startFrame": 1, "endFrame": 291, "nz": 4, "ext": "jpg", "anno_path": "Coke/groundtruth_rect.txt",
+        "object_class": "other"},
+    {"name": "Couple", "path": "Couple/img", "startFrame": 1, "endFrame": 140, "nz": 4, "ext": "jpg", "anno_path": "Couple/groundtruth_rect.txt",
+        "object_class": "person"},
+    {"name": "Coupon", "path": "Coupon/img", "startFrame": 1, "endFrame": 327, "nz": 4, "ext": "jpg", "anno_path": "Coupon/groundtruth_rect.txt",
+        "object_class": "other"},
+    {"name": "Crossing", "path": "Crossing/img", "startFrame": 1, "endFrame": 120, "nz": 4, "ext": "jpg", "anno_path": "Crossing/groundtruth_rect.txt",
+        "object_class": "person"},
+    {"name": "Crowds", "path": "Crowds/img", "startFrame": 1, "endFrame": 347, "nz": 4, "ext": "jpg", "anno_path": "Crowds/groundtruth_rect.txt",
+        "object_class": "person"},
+    {"name": "Dancer", "path": "Dancer/img", "startFrame": 1, "endFrame": 225, "nz": 4, "ext": "jpg", "anno_path": "Dancer/groundtruth_rect.txt",
+        "object_class": "person"},
+    {"name": "Dancer2", "path": "Dancer2/img", "startFrame": 1, "endFrame": 150, "nz": 4, "ext": "jpg", "anno_path": "Dancer2/groundtruth_rect.txt",
+        "object_class": "person"},
+    {"name": "David", "path": "David/img", "startFrame": 300, "endFrame": 770, "nz": 4, "ext": "jpg", "anno_path": "David/groundtruth_rect.txt",
+        "object_class": "face"},
+    {"name": "David2", "path": "David2/img", "startFrame": 1, "endFrame": 537, "nz": 4, "ext": "jpg", "anno_path": "David2/groundtruth_rect.txt",
+        "object_class": "face"},
+    {"name": "David3", "path": "David3/img", "startFrame": 1, "endFrame": 252, "nz": 4, "ext": "jpg", "anno_path": "David3/groundtruth_rect.txt",
+        "object_class": "person"},
+    {"name": "Deer", "path": "Deer/img", "startFrame": 1, "endFrame": 71, "nz": 4, "ext": "jpg", "anno_path": "Deer/groundtruth_rect.txt",
+        "object_class": "mammal"},
+    {"name": "Diving", "path": "Diving/img", "startFrame": 1, "endFrame": 215, "nz": 4, "ext": "jpg", "anno_path": "Diving/groundtruth_rect.txt",
+        "object_class": "person"},
+    {"name": "Dog", "path": "Dog/img", "startFrame": 1, "endFrame": 127, "nz": 4, "ext": "jpg", "anno_path": "Dog/groundtruth_rect.txt",
+        "object_class": "dog"},
+    {"name": "Dog1", "path": "Dog1/img", "startFrame": 1, "endFrame": 1350, "nz": 4, "ext": "jpg", "anno_path": "Dog1/groundtruth_rect.txt",
+        "object_class": "dog"},
+    {"name": "Doll", "path": "Doll/img", "startFrame": 1, "endFrame": 3872, "nz": 4, "ext": "jpg", "anno_path": "Doll/groundtruth_rect.txt",
+        "object_class": "other"},
+    {"name": "DragonBaby", "path": "DragonBaby/img", "startFrame": 1, "endFrame": 113, "nz": 4, "ext": "jpg", "anno_path": "DragonBaby/groundtruth_rect.txt",
+        "object_class": "face"},
+    {"name": "Dudek", "path": "Dudek/img", "startFrame": 1, "endFrame": 1145, "nz": 4, "ext": "jpg", "anno_path": "Dudek/groundtruth_rect.txt",
+        "object_class": "face"},
+    {"name": "FaceOcc1", "path": "FaceOcc1/img", "startFrame": 1, "endFrame": 892, "nz": 4, "ext": "jpg", "anno_path": "FaceOcc1/groundtruth_rect.txt",
+        "object_class": "face"},
+    {"name": "FaceOcc2", "path": "FaceOcc2/img", "startFrame": 1, "endFrame": 812, "nz": 4, "ext": "jpg", "anno_path": "FaceOcc2/groundtruth_rect.txt",
+        "object_class": "face"},
+    {"name": "Fish", "path": "Fish/img", "startFrame": 1, "endFrame": 476, "nz": 4, "ext": "jpg", "anno_path": "Fish/groundtruth_rect.txt",
+        "object_class": "other"},
+    {"name": "FleetFace", "path": "FleetFace/img", "startFrame": 1, "endFrame": 707, "nz": 4, "ext": "jpg", "anno_path": "FleetFace/groundtruth_rect.txt",
+        "object_class": "face"},
+    {"name": "Football", "path": "Football/img", "startFrame": 1, "endFrame": 362, "nz": 4, "ext": "jpg", "anno_path": "Football/groundtruth_rect.txt",
+        "object_class": "person head"},
+    {"name": "Football1", "path": "Football1/img", "startFrame": 1, "endFrame": 74, "nz": 4, "ext": "jpg", "anno_path": "Football1/groundtruth_rect.txt",
+        "object_class": "face"},
+    {"name": "Freeman1", "path": "Freeman1/img", "startFrame": 1, "endFrame": 326, "nz": 4, "ext": "jpg", "anno_path": "Freeman1/groundtruth_rect.txt",
+        "object_class": "face"},
+    {"name": "Freeman3", "path": "Freeman3/img", "startFrame": 1, "endFrame": 460, "nz": 4, "ext": "jpg", "anno_path": "Freeman3/groundtruth_rect.txt",
+        "object_class": "face"},
+    {"name": "Freeman4", "path": "Freeman4/img", "startFrame": 1, "endFrame": 283, "nz": 4, "ext": "jpg", "anno_path": "Freeman4/groundtruth_rect.txt",
+        "object_class": "face"},
+    {"name": "Girl", "path": "Girl/img", "startFrame": 1, "endFrame": 500, "nz": 4, "ext": "jpg", "anno_path": "Girl/groundtruth_rect.txt",
+        "object_class": "face"},
+    {"name": "Girl2", "path": "Girl2/img", "startFrame": 1, "endFrame": 1500, "nz": 4, "ext": "jpg", "anno_path": "Girl2/groundtruth_rect.txt",
+        "object_class": "person"},
+    {"name": "Gym", "path": "Gym/img", "startFrame": 1, "endFrame": 767, "nz": 4, "ext": "jpg", "anno_path": "Gym/groundtruth_rect.txt",
+        "object_class": "person"},
+    {"name": "Human2", "path": "Human2/img", "startFrame": 1, "endFrame": 1128, "nz": 4, "ext": "jpg", "anno_path": "Human2/groundtruth_rect.txt",
+        "object_class": "person"},
+    {"name": "Human3", "path": "Human3/img", "startFrame": 1, "endFrame": 1698, "nz": 4, "ext": "jpg", "anno_path": "Human3/groundtruth_rect.txt",
+        "object_class": "person"},
+    {"name": "Human4", "path": "Human4/img", "startFrame": 1, "endFrame": 667, "nz": 4, "ext": "jpg", "anno_path": "Human4/groundtruth_rect.2.txt",
+        "object_class": "person"},
+    {"name": "Human5", "path": "Human5/img", "startFrame": 1, "endFrame": 713, "nz": 4, "ext": "jpg", "anno_path": "Human5/groundtruth_rect.txt",
+        "object_class": "person"},
+    {"name": "Human6", "path": "Human6/img", "startFrame": 1, "endFrame": 792, "nz": 4, "ext": "jpg", "anno_path": "Human6/groundtruth_rect.txt",
+        "object_class": "person"},
+    {"name": "Human7", "path": "Human7/img", "startFrame": 1, "endFrame": 250, "nz": 4, "ext": "jpg", "anno_path": "Human7/groundtruth_rect.txt",
+        "object_class": "person"},
+    {"name": "Human8", "path": "Human8/img", "startFrame": 1, "endFrame": 128, "nz": 4, "ext": "jpg", "anno_path": "Human8/groundtruth_rect.txt",
+        "object_class": "person"},
+    {"name": "Human9", "path": "Human9/img", "startFrame": 1, "endFrame": 305, "nz": 4, "ext": "jpg", "anno_path": "Human9/groundtruth_rect.txt",
+        "object_class": "person"},
+    {"name": "Ironman", "path": "Ironman/img", "startFrame": 1, "endFrame": 166, "nz": 4, "ext": "jpg", "anno_path": "Ironman/groundtruth_rect.txt",
+        "object_class": "person head"},
+    {"name": "Jogging_1", "path": "Jogging/img", "startFrame": 1, "endFrame": 307, "nz": 4, "ext": "jpg", "anno_path": "Jogging/groundtruth_rect.1.txt",
+        "object_class": "person"},
+    {"name": "Jogging_2", "path": "Jogging/img", "startFrame": 1, "endFrame": 307, "nz": 4, "ext": "jpg", "anno_path": "Jogging/groundtruth_rect.2.txt",
+        "object_class": "person"},
+    {"name": "Jump", "path": "Jump/img", "startFrame": 1, "endFrame": 122, "nz": 4, "ext": "jpg", "anno_path": "Jump/groundtruth_rect.txt",
+        "object_class": "person"},
+    {"name": "Jumping", "path": "Jumping/img", "startFrame": 1, "endFrame": 313, "nz": 4, "ext": "jpg", "anno_path": "Jumping/groundtruth_rect.txt",
+        "object_class": "face"},
+    {"name": "KiteSurf", "path": "KiteSurf/img", "startFrame": 1, "endFrame": 84, "nz": 4, "ext": "jpg", "anno_path": "KiteSurf/groundtruth_rect.txt",
+        "object_class": "face"},
+    {"name": "Lemming", "path": "Lemming/img", "startFrame": 1, "endFrame": 1336, "nz": 4, "ext": "jpg", "anno_path": "Lemming/groundtruth_rect.txt",
+        "object_class": "other"},
+    {"name": "Liquor", "path": "Liquor/img", "startFrame": 1, "endFrame": 1741, "nz": 4, "ext": "jpg", "anno_path": "Liquor/groundtruth_rect.txt",
+        "object_class": "other"},
+    {"name": "Man", "path": "Man/img", "startFrame": 1, "endFrame": 134, "nz": 4, "ext": "jpg", "anno_path": "Man/groundtruth_rect.txt",
+        "object_class": "face"},
+    {"name": "Matrix", "path": "Matrix/img", "startFrame": 1, "endFrame": 100, "nz": 4, "ext": "jpg", "anno_path": "Matrix/groundtruth_rect.txt",
+        "object_class": "person head"},
+    {"name": "Mhyang", "path": "Mhyang/img", "startFrame": 1, "endFrame": 1490, "nz": 4, "ext": "jpg", "anno_path": "Mhyang/groundtruth_rect.txt",
+        "object_class": "face"},
+    {"name": "MotorRolling", "path": "MotorRolling/img", "startFrame": 1, "endFrame": 164, "nz": 4, "ext": "jpg", "anno_path": "MotorRolling/groundtruth_rect.txt",
+        "object_class": "vehicle"},
+    {"name": "MountainBike", "path": "MountainBike/img", "startFrame": 1, "endFrame": 228, "nz": 4, "ext": "jpg", "anno_path": "MountainBike/groundtruth_rect.txt",
+        "object_class": "bicycle"},
+    {"name": "Panda", "path": "Panda/img", "startFrame": 1, "endFrame": 1000, "nz": 4, "ext": "jpg", "anno_path": "Panda/groundtruth_rect.txt",
+        "object_class": "mammal"},
+    {"name": "RedTeam", "path": "RedTeam/img", "startFrame": 1, "endFrame": 1918, "nz": 4, "ext": "jpg", "anno_path": "RedTeam/groundtruth_rect.txt",
+        "object_class": "vehicle"},
+    {"name": "Rubik", "path": "Rubik/img", "startFrame": 1, "endFrame": 1997, "nz": 4, "ext": "jpg", "anno_path": "Rubik/groundtruth_rect.txt",
+        "object_class": "other"},
+    {"name": "Shaking", "path": "Shaking/img", "startFrame": 1, "endFrame": 365, "nz": 4, "ext": "jpg", "anno_path": "Shaking/groundtruth_rect.txt",
+        "object_class": "face"},
+    {"name": "Singer1", "path": "Singer1/img", "startFrame": 1, "endFrame": 351, "nz": 4, "ext": "jpg", "anno_path": "Singer1/groundtruth_rect.txt",
+        "object_class": "person"},
+    {"name": "Singer2", "path": "Singer2/img", "startFrame": 1, "endFrame": 366, "nz": 4, "ext": "jpg", "anno_path": "Singer2/groundtruth_rect.txt",
+        "object_class": "person"},
+    {"name": "Skater", "path": "Skater/img", "startFrame": 1, "endFrame": 160, "nz": 4, "ext": "jpg", "anno_path": "Skater/groundtruth_rect.txt",
+        "object_class": "person"},
+    {"name": "Skater2", "path": "Skater2/img", "startFrame": 1, "endFrame": 435, "nz": 4, "ext": "jpg", "anno_path": "Skater2/groundtruth_rect.txt",
+        "object_class": "person"},
+    {"name": "Skating1", "path": "Skating1/img", "startFrame": 1, "endFrame": 400, "nz": 4, "ext": "jpg", "anno_path": "Skating1/groundtruth_rect.txt",
+        "object_class": "person"},
+    {"name": "Skating2_1", "path": "Skating2/img", "startFrame": 1, "endFrame": 473, "nz": 4, "ext": "jpg", "anno_path": "Skating2/groundtruth_rect.1.txt",
+        "object_class": "person"},
+    {"name": "Skating2_2", "path": "Skating2/img", "startFrame": 1, "endFrame": 473, "nz": 4, "ext": "jpg", "anno_path": "Skating2/groundtruth_rect.2.txt",
+        "object_class": "person"},
+    {"name": "Skiing", "path": "Skiing/img", "startFrame": 1, "endFrame": 81, "nz": 4, "ext": "jpg", "anno_path": "Skiing/groundtruth_rect.txt",
+        "object_class": "person"},
+    {"name": "Soccer", "path": "Soccer/img", "startFrame": 1, "endFrame": 392, "nz": 4, "ext": "jpg", "anno_path": "Soccer/groundtruth_rect.txt",
+        "object_class": "face"},
+    {"name": "Subway", "path": "Subway/img", "startFrame": 1, "endFrame": 175, "nz": 4, "ext": "jpg", "anno_path": "Subway/groundtruth_rect.txt",
+        "object_class": "person"},
+    {"name": "Surfer", "path": "Surfer/img", "startFrame": 1, "endFrame": 376, "nz": 4, "ext": "jpg", "anno_path": "Surfer/groundtruth_rect.txt",
+        "object_class": "person head"},
+    {"name": "Suv", "path": "Suv/img", "startFrame": 1, "endFrame": 945, "nz": 4, "ext": "jpg", "anno_path": "Suv/groundtruth_rect.txt",
+        "object_class": "car"},
+    {"name": "Sylvester", "path": "Sylvester/img", "startFrame": 1, "endFrame": 1345, "nz": 4, "ext": "jpg", "anno_path": "Sylvester/groundtruth_rect.txt",
+        "object_class": "other"},
+    {"name": "Tiger1", "path": "Tiger1/img", "startFrame": 1, "endFrame": 354, "nz": 4, "ext": "jpg", "anno_path": "Tiger1/groundtruth_rect.txt", "initOmit": 5,
+        "object_class": "other"},
+    {"name": "Tiger2", "path": "Tiger2/img", "startFrame": 1, "endFrame": 365, "nz": 4, "ext": "jpg", "anno_path": "Tiger2/groundtruth_rect.txt",
+        "object_class": "other"},
+    {"name": "Toy", "path": "Toy/img", "startFrame": 1, "endFrame": 271, "nz": 4, "ext": "jpg", "anno_path": "Toy/groundtruth_rect.txt",
+        "object_class": "other"},
+    {"name": "Trans", "path": "Trans/img", "startFrame": 1, "endFrame": 124, "nz": 4, "ext": "jpg", "anno_path": "Trans/groundtruth_rect.txt",
+        "object_class": "other"},
+    {"name": "Trellis", "path": "Trellis/img", "startFrame": 1, "endFrame": 569, "nz": 4, "ext": "jpg", "anno_path": "Trellis/groundtruth_rect.txt",
+        "object_class": "face"},
+    {"name": "Twinnings", "path": "Twinnings/img", "startFrame": 1, "endFrame": 472, "nz": 4, "ext": "jpg", "anno_path": "Twinnings/groundtruth_rect.txt",
+        "object_class": "other"},
+    {"name": "Vase", "path": "Vase/img", "startFrame": 1, "endFrame": 271, "nz": 4, "ext": "jpg", "anno_path": "Vase/groundtruth_rect.txt",
+        "object_class": "other"},
+    {"name": "Walking", "path": "Walking/img", "startFrame": 1, "endFrame": 412, "nz": 4, "ext": "jpg", "anno_path": "Walking/groundtruth_rect.txt",
+        "object_class": "person"},
+    {"name": "Walking2", "path": "Walking2/img", "startFrame": 1, "endFrame": 500, "nz": 4, "ext": "jpg", "anno_path": "Walking2/groundtruth_rect.txt",
+        "object_class": "person"},
+    {"name": "Woman", "path": "Woman/img", "startFrame": 1, "endFrame": 597, "nz": 4, "ext": "jpg", "anno_path": "Woman/groundtruth_rect.txt",
+        "object_class": "person"}
 ]

From e3af11ed96fe3b1a1ff25839604c4135ef15d896 Mon Sep 17 00:00:00 2001
From: Ryan Lee <ryanl1288@hotmail.com>
Date: Mon, 1 Apr 2024 03:32:19 -0400
Subject: [PATCH 10/17] Light cleanup of unused variables

---
 tools/eval/datasets/otb100.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tools/eval/datasets/otb100.py b/tools/eval/datasets/otb100.py
index fa4b0e7e..6754348c 100644
--- a/tools/eval/datasets/otb100.py
+++ b/tools/eval/datasets/otb100.py
@@ -195,14 +195,12 @@ def __init__(self, root):
             meta_data[sequence_info['name']]['gt_rect'] = ground_truth_rect
             meta_data[sequence_info['name']]['attr'] = [sequence_info["object_class"]]
 
-        self.data = meta_data
-        self.root = root
         self.videos = {}
         pbar = tqdm(meta_data.keys(), desc='Loading OTB', ncols=100)
         for video in pbar:
             pbar.set_postfix_str(video)
             self.videos[video] = Video(video,
-                                       self.root,
+                                       root,
                                        meta_data[video]['video_dir'],
                                        meta_data[video]['init_rect'],
                                        meta_data[video]['img_names'],

From 8f74f73e1dc15500cab7ea6baaffa6984ce519af Mon Sep 17 00:00:00 2001
From: Ryan Lee <ryanl1288@hotmail.com>
Date: Mon, 1 Apr 2024 03:34:23 -0400
Subject: [PATCH 11/17] Add citation and slight cleanup

---
 tools/eval/datasets/otb100.py | 25 ++++++++++---------------
 1 file changed, 10 insertions(+), 15 deletions(-)

diff --git a/tools/eval/datasets/otb100.py b/tools/eval/datasets/otb100.py
index 6754348c..f48aabc0 100644
--- a/tools/eval/datasets/otb100.py
+++ b/tools/eval/datasets/otb100.py
@@ -184,10 +184,9 @@ def __init__(self, root):
                 init_omit = sequence_info['initOmit']
             frames = [f'{root}/OTB100/{sequence_path}/{frame_num:0{nz}}.{ext}' for \
                       frame_num in range(start_frame+init_omit, end_frame+1)]
-
             anno_path = f'{root}/OTB100/{sequence_info["anno_path"]}'
-
             ground_truth_rect = load_text_numpy(str(anno_path), (',', None), np.float64)[init_omit:,:]
+
             meta_data[sequence_info['name']] = {}
             meta_data[sequence_info['name']]['video_dir'] = sequence_info['path']
             meta_data[sequence_info['name']]['init_rect'] = ground_truth_rect[0]
@@ -252,18 +251,14 @@ def get_axis_aligned_bbox(region):
     return cx, cy, w, h
 
 def load_text_numpy(path, delimiter, dtype):
-    if isinstance(delimiter, (tuple, list)):
-        for d in delimiter:
-            try:
-                ground_truth_rect = np.loadtxt(path, delimiter=d, dtype=dtype)
-                return ground_truth_rect
-            except:
-                pass
-
-        raise Exception('Could not read file {}'.format(path))
-    else:
-        ground_truth_rect = np.loadtxt(path, delimiter=delimiter, dtype=dtype)
-        return ground_truth_rect
+    for d in delimiter:
+        try:
+            ground_truth_rect = np.loadtxt(path, delimiter=d, dtype=dtype)
+            return ground_truth_rect
+        except:
+            pass
+
+    raise Exception('Could not read file {}'.format(path))
 
 class OTB100:
     def __init__(self, root):
@@ -329,7 +324,7 @@ def print_result(self):
 
         benchmark.show_result(**evaluation_results, show_video_level=False)
 
-
+# Sourced from https://github.com/lpylpy0514/VitTracker
 sequence_info_list = [
     {"name": "Basketball", "path": "Basketball/img", "startFrame": 1, "endFrame": 725, "nz": 4, "ext": "jpg", "anno_path": "Basketball/groundtruth_rect.txt",
         "object_class": "person"},

From 93fb68e3db48c07c02a570bc9fe06b27e2a968bd Mon Sep 17 00:00:00 2001
From: Ryan Lee <ryanl1288@hotmail.com>
Date: Mon, 1 Apr 2024 03:47:12 -0400
Subject: [PATCH 12/17] Update README

---
 tools/eval/README.md | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/tools/eval/README.md b/tools/eval/README.md
index 72824c88..fc6643e6 100644
--- a/tools/eval/README.md
+++ b/tools/eval/README.md
@@ -221,7 +221,7 @@ python eval.py -m pphumanseg_q -d mini_supervisely -dr /path/to/pphumanseg
 1. The official site is http://cvlab.hanyang.ac.kr/.
 2. In case it is down, users can download from the alternative [Google Drive Link](https://drive.google.com/drive/folders/1iTwCQAMgzdWWrlwncOjpshuHvipIWPMN?usp=sharing).
 
-Download both the `OTB100.zip` and `OTB.json`, organize files as follow:
+Download `OTB100.zip` and unzip it in a directory, the tree structure should appear as follows:
 
 ```shell
 $ tree -L 2 /path/to/otb100
@@ -230,8 +230,7 @@ $ tree -L 2 /path/to/otb100
 │   ├── groundtruth_rect.txt
 │   └── img
 ├── ...
-├── Woman
-└── OTB.json
+└── Woman
 
 ```
 

From 8936ff3233fb40fff0c2a08829c44b20df1dcbd4 Mon Sep 17 00:00:00 2001
From: Ryan Lee <ryanl1288@hotmail.com>
Date: Mon, 8 Apr 2024 03:10:29 -0400
Subject: [PATCH 13/17] Update README

---
 tools/eval/README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/eval/README.md b/tools/eval/README.md
index fc6643e6..9380a7dd 100644
--- a/tools/eval/README.md
+++ b/tools/eval/README.md
@@ -22,7 +22,7 @@ Supported datasets:
 - [ICDAR](#icdar2003)
 - [IIIT5K](#iiit5k)
 - [Mini Supervisely](#mini-supervisely)
-- [OTB-2015](#otb-2015)
+- [OTB-100](#otb-100)
 
 ## ImageNet
 
@@ -214,7 +214,7 @@ Run evaluation on quantized model with the following command :
 python eval.py -m pphumanseg_q -d mini_supervisely -dr /path/to/pphumanseg
 ```
 
-## OTB-2015
+## OTB-100
 
 ### Prepare data
 

From b232a9b28b431de8185e67018c636c877e453e5c Mon Sep 17 00:00:00 2001
From: Ryan Lee <ryanl1288@hotmail.com>
Date: Mon, 8 Apr 2024 03:32:36 -0400
Subject: [PATCH 14/17] Update progress bars to show the necessary outer loops
 only

---
 tools/eval/datasets/otb100.py | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/tools/eval/datasets/otb100.py b/tools/eval/datasets/otb100.py
index f48aabc0..1ccea8bc 100644
--- a/tools/eval/datasets/otb100.py
+++ b/tools/eval/datasets/otb100.py
@@ -195,9 +195,7 @@ def __init__(self, root):
             meta_data[sequence_info['name']]['attr'] = [sequence_info["object_class"]]
 
         self.videos = {}
-        pbar = tqdm(meta_data.keys(), desc='Loading OTB', ncols=100)
-        for video in pbar:
-            pbar.set_postfix_str(video)
+        for video in meta_data.keys():
             self.videos[video] = Video(video,
                                        root,
                                        meta_data[video]['video_dir'],
@@ -274,7 +272,7 @@ def name(self):
         return self.__class__.__name__
 
     def eval(self, model):
-        for v_idx, video in enumerate(self.dataset):
+        for video in tqdm(self.dataset, desc="Evaluating: ", total=100, ncols=100):
             total_time = 0
             pred_bboxes = []
             scores = []
@@ -303,15 +301,10 @@ def eval(self, model):
             model_path = os.path.join('OTB_results')
             os.makedirs(model_path, exist_ok=True)
             result_path = os.path.join(model_path, '{}.txt'.format(video.name))
-            print(result_path)
             with open(result_path, 'w') as f:
                 for bbox in pred_bboxes:
                     f.write(','.join(map(str, bbox)) + '\n')
 
-            avg_fps = len(video) / total_time if total_time > 0 else 0
-            print('({:3d}) Video: {:12s} Time: {:5.1f}s Speed: {:3.1f}fps'.format(
-                v_idx + 1, video.name, total_time, avg_fps))
-
     def print_result(self):
         benchmark = OPEBenchmark(self.dataset)
         num_cores = cpu_count()
@@ -319,7 +312,7 @@ def print_result(self):
         metrics = ["success", "precision", "norm_precision"]
         for metric in metrics:
             with Pool(processes=min(num_cores, max(1, num_cores - 1))) as pool:
-                for ret in tqdm(pool.imap_unordered(benchmark.evaluate, [metric], 1), desc=f'eval {metric}', total=1, ncols=100):
+                for ret in pool.imap_unordered(benchmark.evaluate, [metric], 1):
                     evaluation_results[metric] = ret
 
         benchmark.show_result(**evaluation_results, show_video_level=False)

From 4e575578bf9b2f84b218f3464e2604153643cb5f Mon Sep 17 00:00:00 2001
From: Ryan Lee <ryanl1288@hotmail.com>
Date: Mon, 8 Apr 2024 04:22:24 -0400
Subject: [PATCH 15/17] Remove unnecessary code

---
 tools/eval/datasets/otb100.py | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/tools/eval/datasets/otb100.py b/tools/eval/datasets/otb100.py
index 1ccea8bc..f780b7a9 100644
--- a/tools/eval/datasets/otb100.py
+++ b/tools/eval/datasets/otb100.py
@@ -273,30 +273,20 @@ def name(self):
 
     def eval(self, model):
         for video in tqdm(self.dataset, desc="Evaluating: ", total=100, ncols=100):
-            total_time = 0
             pred_bboxes = []
-            scores = []
-            track_times = []
 
             for idx, (img, gt_bbox) in enumerate(video):
                 img = cv.cvtColor(img, cv.COLOR_BGR2RGB)
-                tic = cv.getTickCount()
-
                 if idx == 0:
                     cx, cy, w, h = get_axis_aligned_bbox(np.array(gt_bbox))
                     gt_bbox_ = (int(cx - w / 2), int(cy - h / 2), int(w), int(h))
                     model.init(img, gt_bbox_)
                     pred_bbox = gt_bbox_
-                    scores.append(None)
                 else:
                     isLocated, bbox, score = model.infer(img)
                     pred_bbox = bbox
-                    scores.append(score)
 
                 pred_bboxes.append(pred_bbox)
-                toc = (cv.getTickCount() - tic) / cv.getTickFrequency()
-                total_time += toc
-                track_times.append(toc)
 
             model_path = os.path.join('OTB_results')
             os.makedirs(model_path, exist_ok=True)

From b73105962360c215efcddfe2c546cb191cf9f3c1 Mon Sep 17 00:00:00 2001
From: Ryan Lee <ryanl1288@hotmail.com>
Date: Mon, 8 Apr 2024 04:40:57 -0400
Subject: [PATCH 16/17] Instead of saving, put it in a global dictionary

---
 tools/eval/datasets/otb100.py | 32 ++++++++------------------------
 1 file changed, 8 insertions(+), 24 deletions(-)

diff --git a/tools/eval/datasets/otb100.py b/tools/eval/datasets/otb100.py
index f780b7a9..af3373b2 100644
--- a/tools/eval/datasets/otb100.py
+++ b/tools/eval/datasets/otb100.py
@@ -1,24 +1,23 @@
 import os
-import json
 import numpy as np
 import cv2 as cv
 from colorama import Style, Fore
 from tqdm import tqdm
 from multiprocessing import Pool, cpu_count
 
+PRED_BBOXES_DICT = {}
+
 def overlap_ratio(rect1, rect2):
     """Calculate the Intersection over Union (IoU) overlap ratio between two sets of rectangles."""  
     tl = np.maximum(rect1[:, :2], rect2[:, :2])
     br = np.minimum(rect1[:, :2] + rect1[:, 2:] - 1.0, rect2[:, :2] + rect2[:, 2:] - 1.0)
     sz = np.maximum(br - tl + 1.0, 0)
 
-    # Area
     intersection = np.prod(sz, axis=1)
     union = np.prod(rect1[:, 2:], axis=1) + np.prod(rect2[:, 2:], axis=1) - intersection
     iou = np.clip(intersection / union, 0, 1)
     return iou
 
-
 def success_overlap(gt_bb, result_bb, n_frame):
     """Calculate the success rate based on the overlap ratio between ground truth and predicted bounding boxes."""
     thresholds_overlap = np.arange(0, 1.05, 0.05)
@@ -157,17 +156,11 @@ def __iter__(self):
                 yield cv.imread(self.img_names[i]), self.gt_traj[i]
 
     def load_tracker(self):
-        """Load tracker results from file."""
-        traj_file = os.path.join("OTB_results", self.name+'.txt')
-        if os.path.exists(traj_file):
-            with open(traj_file, 'r') as f:
-                pred_traj = [list(map(float, x.strip().split(','))) for x in f.readlines()]
-                if len(pred_traj) != len(self.gt_traj):
-                    print("tracker", len(pred_traj), len(self.gt_traj), self.name)
-                else:
-                    return pred_traj
+        if self.name in PRED_BBOXES_DICT:
+            return PRED_BBOXES_DICT[self.name]
         else:
-            print(traj_file)
+            print(f"No prediction found for video {self.name}")
+            return None
 
 class OTBDATASET:
     def __init__(self, root):
@@ -274,7 +267,6 @@ def name(self):
     def eval(self, model):
         for video in tqdm(self.dataset, desc="Evaluating: ", total=100, ncols=100):
             pred_bboxes = []
-
             for idx, (img, gt_bbox) in enumerate(video):
                 img = cv.cvtColor(img, cv.COLOR_BGR2RGB)
                 if idx == 0:
@@ -283,17 +275,9 @@ def eval(self, model):
                     model.init(img, gt_bbox_)
                     pred_bbox = gt_bbox_
                 else:
-                    isLocated, bbox, score = model.infer(img)
-                    pred_bbox = bbox
-
+                    pred_bbox = model.infer(img)[1]
                 pred_bboxes.append(pred_bbox)
-
-            model_path = os.path.join('OTB_results')
-            os.makedirs(model_path, exist_ok=True)
-            result_path = os.path.join(model_path, '{}.txt'.format(video.name))
-            with open(result_path, 'w') as f:
-                for bbox in pred_bboxes:
-                    f.write(','.join(map(str, bbox)) + '\n')
+            PRED_BBOXES_DICT[video.name] = pred_bboxes
 
     def print_result(self):
         benchmark = OPEBenchmark(self.dataset)

From 66651cf95f8bf4c67d0fe27252b2d19d31be7947 Mon Sep 17 00:00:00 2001
From: Ryan Lee <ryanl1288@hotmail.com>
Date: Thu, 6 Jun 2024 03:03:44 -0400
Subject: [PATCH 17/17] Remove PRED_BBOXES_DICT as a global variable

---
 tools/eval/datasets/otb100.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/tools/eval/datasets/otb100.py b/tools/eval/datasets/otb100.py
index af3373b2..1684ceed 100644
--- a/tools/eval/datasets/otb100.py
+++ b/tools/eval/datasets/otb100.py
@@ -5,8 +5,6 @@
 from tqdm import tqdm
 from multiprocessing import Pool, cpu_count
 
-PRED_BBOXES_DICT = {}
-
 def overlap_ratio(rect1, rect2):
     """Calculate the Intersection over Union (IoU) overlap ratio between two sets of rectangles."""  
     tl = np.maximum(rect1[:, :2], rect2[:, :2])
@@ -125,7 +123,7 @@ def show_result(self, success, precision=None, norm_precision=None, show_video_l
             print('-' * len(header1))
 
 class Video:
-    def __init__(self, name, root, video_dir, init_rect, img_names, gt_rect, attr):
+    def __init__(self, name, root, video_dir, init_rect, img_names, gt_rect, attr, pred_bboxes_dict):
         self.name = name
         self.video_dir = video_dir
         self.init_rect = init_rect
@@ -138,6 +136,7 @@ def __init__(self, name, root, video_dir, init_rect, img_names, gt_rect, attr):
         assert img is not None, self.img_names[0]
         self.width = img.shape[1]
         self.height = img.shape[0]
+        self.pred_bboxes_dict = pred_bboxes_dict
 
     def __len__(self):
         return len(self.img_names)
@@ -156,14 +155,15 @@ def __iter__(self):
                 yield cv.imread(self.img_names[i]), self.gt_traj[i]
 
     def load_tracker(self):
-        if self.name in PRED_BBOXES_DICT:
-            return PRED_BBOXES_DICT[self.name]
+        if self.name in self.pred_bboxes_dict:
+            return self.pred_bboxes_dict[self.name]
         else:
             print(f"No prediction found for video {self.name}")
             return None
 
 class OTBDATASET:
     def __init__(self, root):
+        self.pred_bboxes_dict = {}
         meta_data = {}
         for sequence_info in sequence_info_list:
             sequence_path = sequence_info['path']
@@ -195,7 +195,8 @@ def __init__(self, root):
                                        meta_data[video]['init_rect'],
                                        meta_data[video]['img_names'],
                                        meta_data[video]['gt_rect'],
-                                       meta_data[video]['attr'])
+                                       meta_data[video]['attr'],
+                                       self.pred_bboxes_dict)
         self.attr = {'ALL': list(self.videos.keys())}
         all_attributes = [x.attr for x in self.videos.values()]
         all_attributes = set(sum(all_attributes, []))
@@ -277,7 +278,7 @@ def eval(self, model):
                 else:
                     pred_bbox = model.infer(img)[1]
                 pred_bboxes.append(pred_bbox)
-            PRED_BBOXES_DICT[video.name] = pred_bboxes
+            self.dataset.pred_bboxes_dict[video.name] = pred_bboxes
 
     def print_result(self):
         benchmark = OPEBenchmark(self.dataset)