From aa34aa0ec14dc31cece99563571a57f6483ca81c Mon Sep 17 00:00:00 2001
From: Ildar Salakhiev <ildar@fb.com>
Date: Tue, 28 Feb 2023 15:23:29 +0000
Subject: [PATCH 01/43] created class BlobLoader and moved all related function
 to sep file

---
 .../implicitron/dataset/json_index_dataset.py | 461 ++-------------
 pytorch3d/implicitron/dataset/load_blob.py    | 542 ++++++++++++++++++
 2 files changed, 576 insertions(+), 427 deletions(-)
 create mode 100644 pytorch3d/implicitron/dataset/load_blob.py

diff --git a/pytorch3d/implicitron/dataset/json_index_dataset.py b/pytorch3d/implicitron/dataset/json_index_dataset.py
index 669f4e9b6..ac9daf02a 100644
--- a/pytorch3d/implicitron/dataset/json_index_dataset.py
+++ b/pytorch3d/implicitron/dataset/json_index_dataset.py
@@ -32,17 +32,16 @@
 
 import numpy as np
 import torch
-from PIL import Image
+from tqdm import tqdm
+
 from pytorch3d.implicitron.tools.config import registry, ReplaceableBase
-from pytorch3d.io import IO
+from pytorch3d.implicitron.dataset import types
+from pytorch3d.implicitron.dataset.dataset_base import DatasetBase, FrameData
+from pytorch3d.implicitron.dataset.load_blob import BlobLoader
+from pytorch3d.implicitron.dataset.utils import is_known_frame_scalar
 from pytorch3d.renderer.camera_utils import join_cameras_as_batch
 from pytorch3d.renderer.cameras import CamerasBase, PerspectiveCameras
 from pytorch3d.structures.pointclouds import Pointclouds
-from tqdm import tqdm
-
-from . import types
-from .dataset_base import DatasetBase, FrameData
-from .utils import is_known_frame_scalar
 
 
 logger = logging.getLogger(__name__)
@@ -53,6 +52,7 @@
 
     class FrameAnnotsEntry(TypedDict):
         subset: Optional[str]
+        # pyre-ignore
         frame_annotation: types.FrameAnnotation
 
 else:
@@ -60,6 +60,7 @@ class FrameAnnotsEntry(TypedDict):
 
 
 @registry.register
+# pyre-ignore
 class JsonIndexDataset(DatasetBase, ReplaceableBase):
     """
     A dataset with annotations in json files like the Common Objects in 3D
@@ -130,6 +131,7 @@ class JsonIndexDataset(DatasetBase, ReplaceableBase):
 
     frame_annotations_type: ClassVar[
         Type[types.FrameAnnotation]
+        # pyre-ignore
     ] = types.FrameAnnotation
 
     path_manager: Any = None
@@ -162,6 +164,7 @@ class JsonIndexDataset(DatasetBase, ReplaceableBase):
     sort_frames: bool = False
     eval_batches: Any = None
     eval_batch_index: Any = None
+    loader: BlobLoader
     # frame_annots: List[FrameAnnotsEntry] = field(init=False)
     # seq_annots: Dict[str, types.SequenceAnnotation] = field(init=False)
 
@@ -175,6 +178,23 @@ def __post_init__(self) -> None:
         self._load_subset_lists()
         self._filter_db()  # also computes sequence indices
         self._extract_and_set_eval_batches()
+
+        self.loader = BlobLoader(
+            self.dataset_root,
+            self.load_images,
+            self.load_depths,
+            self.load_depth_masks,
+            self.load_masks,
+            self.load_point_clouds,
+            self.max_points,
+            self.mask_images,
+            self.mask_depths,
+            self.image_height,
+            self.image_width,
+            self.box_crop,
+            self.box_crop_mask_thr,
+            self.box_crop_context,
+        )
         logger.info(str(self))
 
     def _extract_and_set_eval_batches(self):
@@ -207,12 +227,11 @@ def join(self, other_datasets: Iterable[DatasetBase]) -> None:
             # https://gist.github.com/treyhunner/f35292e676efa0be1728
             functools.reduce(
                 lambda a, b: {**a, **b},
-                [d.seq_annots for d in other_datasets],  # pyre-ignore[16]
+                [d.seq_annots for d in other_datasets],
             )
         )
         all_eval_batches = [
             self.eval_batches,
-            # pyre-ignore
             *[d.eval_batches for d in other_datasets],
         ]
         if not (
@@ -396,6 +415,7 @@ def __len__(self) -> int:
     def _get_frame_type(self, entry: FrameAnnotsEntry) -> Optional[str]:
         return entry["subset"]
 
+    # pyre-ignore
     def get_all_train_cameras(self) -> CamerasBase:
         """
         Returns the cameras corresponding to all the known frames.
@@ -411,6 +431,7 @@ def get_all_train_cameras(self) -> CamerasBase:
                 cameras.append(self[frame_idx].camera)
         return join_cameras_as_batch(cameras)
 
+    # pyre-ignore
     def __getitem__(self, index) -> FrameData:
         # pyre-ignore[16]
         if index >= len(self.frame_annots):
@@ -438,238 +459,14 @@ def __getitem__(self, index) -> FrameData:
         # The rest of the fields are optional
         frame_data.frame_type = self._get_frame_type(self.frame_annots[index])
 
-        (
-            frame_data.fg_probability,
-            frame_data.mask_path,
-            frame_data.bbox_xywh,
-            clamp_bbox_xyxy,
-            frame_data.crop_bbox_xywh,
-        ) = self._load_crop_fg_probability(entry)
-
-        scale = 1.0
-        if self.load_images and entry.image is not None:
-            # original image size
-            frame_data.image_size_hw = _safe_as_tensor(entry.image.size, torch.long)
-
-            (
-                frame_data.image_rgb,
-                frame_data.image_path,
-                frame_data.mask_crop,
-                scale,
-            ) = self._load_crop_images(
-                entry, frame_data.fg_probability, clamp_bbox_xyxy
-            )
-
-        if self.load_depths and entry.depth is not None:
-            (
-                frame_data.depth_map,
-                frame_data.depth_path,
-                frame_data.depth_mask,
-            ) = self._load_mask_depth(entry, clamp_bbox_xyxy, frame_data.fg_probability)
-
-        if entry.viewpoint is not None:
-            frame_data.camera = self._get_pytorch3d_camera(
-                entry,
-                scale,
-                clamp_bbox_xyxy,
-            )
-
-        if self.load_point_clouds and point_cloud is not None:
-            pcl_path = self._fix_point_cloud_path(point_cloud.path)
-            frame_data.sequence_point_cloud = _load_pointcloud(
-                self._local_path(pcl_path), max_points=self.max_points
-            )
-            frame_data.sequence_point_cloud_path = pcl_path
-
+        frame_data = self.loader.load(frame_data, entry, point_cloud)
         return frame_data
 
-    def _fix_point_cloud_path(self, path: str) -> str:
-        """
-        Fix up a point cloud path from the dataset.
-        Some files in Co3Dv2 have an accidental absolute path stored.
-        """
-        unwanted_prefix = (
-            "/large_experiments/p3/replay/datasets/co3d/co3d45k_220512/export_v23/"
-        )
-        if path.startswith(unwanted_prefix):
-            path = path[len(unwanted_prefix) :]
-        return os.path.join(self.dataset_root, path)
-
-    def _load_crop_fg_probability(
-        self, entry: types.FrameAnnotation
-    ) -> Tuple[
-        Optional[torch.Tensor],
-        Optional[str],
-        Optional[torch.Tensor],
-        Optional[torch.Tensor],
-        Optional[torch.Tensor],
-    ]:
-        fg_probability = None
-        full_path = None
-        bbox_xywh = None
-        clamp_bbox_xyxy = None
-        crop_box_xywh = None
-
-        if (self.load_masks or self.box_crop) and entry.mask is not None:
-            full_path = os.path.join(self.dataset_root, entry.mask.path)
-            mask = _load_mask(self._local_path(full_path))
-
-            if mask.shape[-2:] != entry.image.size:
-                raise ValueError(
-                    f"bad mask size: {mask.shape[-2:]} vs {entry.image.size}!"
-                )
-
-            bbox_xywh = torch.tensor(_get_bbox_from_mask(mask, self.box_crop_mask_thr))
-
-            if self.box_crop:
-                clamp_bbox_xyxy = _clamp_box_to_image_bounds_and_round(
-                    _get_clamp_bbox(
-                        bbox_xywh,
-                        image_path=entry.image.path,
-                        box_crop_context=self.box_crop_context,
-                    ),
-                    image_size_hw=tuple(mask.shape[-2:]),
-                )
-                crop_box_xywh = _bbox_xyxy_to_xywh(clamp_bbox_xyxy)
-
-                mask = _crop_around_box(mask, clamp_bbox_xyxy, full_path)
-
-            fg_probability, _, _ = self._resize_image(mask, mode="nearest")
-
-        return fg_probability, full_path, bbox_xywh, clamp_bbox_xyxy, crop_box_xywh
-
-    def _load_crop_images(
-        self,
-        entry: types.FrameAnnotation,
-        fg_probability: Optional[torch.Tensor],
-        clamp_bbox_xyxy: Optional[torch.Tensor],
-    ) -> Tuple[torch.Tensor, str, torch.Tensor, float]:
-        assert self.dataset_root is not None and entry.image is not None
-        path = os.path.join(self.dataset_root, entry.image.path)
-        image_rgb = _load_image(self._local_path(path))
-
-        if image_rgb.shape[-2:] != entry.image.size:
-            raise ValueError(
-                f"bad image size: {image_rgb.shape[-2:]} vs {entry.image.size}!"
-            )
-
-        if self.box_crop:
-            assert clamp_bbox_xyxy is not None
-            image_rgb = _crop_around_box(image_rgb, clamp_bbox_xyxy, path)
-
-        image_rgb, scale, mask_crop = self._resize_image(image_rgb)
-
-        if self.mask_images:
-            assert fg_probability is not None
-            image_rgb *= fg_probability
-
-        return image_rgb, path, mask_crop, scale
-
-    def _load_mask_depth(
-        self,
-        entry: types.FrameAnnotation,
-        clamp_bbox_xyxy: Optional[torch.Tensor],
-        fg_probability: Optional[torch.Tensor],
-    ) -> Tuple[torch.Tensor, str, torch.Tensor]:
-        entry_depth = entry.depth
-        assert entry_depth is not None
-        path = os.path.join(self.dataset_root, entry_depth.path)
-        depth_map = _load_depth(self._local_path(path), entry_depth.scale_adjustment)
-
-        if self.box_crop:
-            assert clamp_bbox_xyxy is not None
-            depth_bbox_xyxy = _rescale_bbox(
-                clamp_bbox_xyxy, entry.image.size, depth_map.shape[-2:]
-            )
-            depth_map = _crop_around_box(depth_map, depth_bbox_xyxy, path)
-
-        depth_map, _, _ = self._resize_image(depth_map, mode="nearest")
-
-        if self.mask_depths:
-            assert fg_probability is not None
-            depth_map *= fg_probability
-
-        if self.load_depth_masks:
-            assert entry_depth.mask_path is not None
-            mask_path = os.path.join(self.dataset_root, entry_depth.mask_path)
-            depth_mask = _load_depth_mask(self._local_path(mask_path))
-
-            if self.box_crop:
-                assert clamp_bbox_xyxy is not None
-                depth_mask_bbox_xyxy = _rescale_bbox(
-                    clamp_bbox_xyxy, entry.image.size, depth_mask.shape[-2:]
-                )
-                depth_mask = _crop_around_box(
-                    depth_mask, depth_mask_bbox_xyxy, mask_path
-                )
-
-            depth_mask, _, _ = self._resize_image(depth_mask, mode="nearest")
-        else:
-            depth_mask = torch.ones_like(depth_map)
-
-        return depth_map, path, depth_mask
-
-    def _get_pytorch3d_camera(
-        self,
-        entry: types.FrameAnnotation,
-        scale: float,
-        clamp_bbox_xyxy: Optional[torch.Tensor],
-    ) -> PerspectiveCameras:
-        entry_viewpoint = entry.viewpoint
-        assert entry_viewpoint is not None
-        # principal point and focal length
-        principal_point = torch.tensor(
-            entry_viewpoint.principal_point, dtype=torch.float
-        )
-        focal_length = torch.tensor(entry_viewpoint.focal_length, dtype=torch.float)
-
-        half_image_size_wh_orig = (
-            torch.tensor(list(reversed(entry.image.size)), dtype=torch.float) / 2.0
-        )
-
-        # first, we convert from the dataset's NDC convention to pixels
-        format = entry_viewpoint.intrinsics_format
-        if format.lower() == "ndc_norm_image_bounds":
-            # this is e.g. currently used in CO3D for storing intrinsics
-            rescale = half_image_size_wh_orig
-        elif format.lower() == "ndc_isotropic":
-            rescale = half_image_size_wh_orig.min()
-        else:
-            raise ValueError(f"Unknown intrinsics format: {format}")
-
-        # principal point and focal length in pixels
-        principal_point_px = half_image_size_wh_orig - principal_point * rescale
-        focal_length_px = focal_length * rescale
-        if self.box_crop:
-            assert clamp_bbox_xyxy is not None
-            principal_point_px -= clamp_bbox_xyxy[:2]
-
-        # now, convert from pixels to PyTorch3D v0.5+ NDC convention
-        if self.image_height is None or self.image_width is None:
-            out_size = list(reversed(entry.image.size))
-        else:
-            out_size = [self.image_width, self.image_height]
-
-        half_image_size_output = torch.tensor(out_size, dtype=torch.float) / 2.0
-        half_min_image_size_output = half_image_size_output.min()
-
-        # rescaled principal point and focal length in ndc
-        principal_point = (
-            half_image_size_output - principal_point_px * scale
-        ) / half_min_image_size_output
-        focal_length = focal_length_px * scale / half_min_image_size_output
-
-        return PerspectiveCameras(
-            focal_length=focal_length[None],
-            principal_point=principal_point[None],
-            R=torch.tensor(entry_viewpoint.R, dtype=torch.float)[None],
-            T=torch.tensor(entry_viewpoint.T, dtype=torch.float)[None],
-        )
-
     def _load_frames(self) -> None:
         logger.info(f"Loading Co3D frames from {self.frame_annotations_file}.")
         local_file = self._local_path(self.frame_annotations_file)
         with gzip.open(local_file, "rt", encoding="utf8") as zipfile:
+            # pyre-ignore
             frame_annots_list = types.load_dataclass(
                 zipfile, List[self.frame_annotations_type]
             )
@@ -684,6 +481,7 @@ def _load_sequences(self) -> None:
         logger.info(f"Loading Co3D sequences from {self.sequence_annotations_file}.")
         local_file = self._local_path(self.sequence_annotations_file)
         with gzip.open(local_file, "rt", encoding="utf8") as zipfile:
+            # pyre-ignore
             seq_annots = types.load_dataclass(zipfile, List[types.SequenceAnnotation])
         if not seq_annots:
             raise ValueError("Empty sequences file!")
@@ -853,35 +651,6 @@ def _invalidate_seq_to_idx(self) -> None:
         # pyre-ignore[16]
         self._seq_to_idx = seq_to_idx
 
-    def _resize_image(
-        self, image, mode="bilinear"
-    ) -> Tuple[torch.Tensor, float, torch.Tensor]:
-        image_height, image_width = self.image_height, self.image_width
-        if image_height is None or image_width is None:
-            # skip the resizing
-            imre_ = torch.from_numpy(image)
-            return imre_, 1.0, torch.ones_like(imre_[:1])
-        # takes numpy array, returns pytorch tensor
-        minscale = min(
-            image_height / image.shape[-2],
-            image_width / image.shape[-1],
-        )
-        imre = torch.nn.functional.interpolate(
-            torch.from_numpy(image)[None],
-            scale_factor=minscale,
-            mode=mode,
-            align_corners=False if mode == "bilinear" else None,
-            recompute_scale_factor=True,
-        )[0]
-        # pyre-fixme[19]: Expected 1 positional argument.
-        imre_ = torch.zeros(image.shape[0], self.image_height, self.image_width)
-        imre_[:, 0 : imre.shape[1], 0 : imre.shape[2]] = imre
-        # pyre-fixme[6]: For 2nd param expected `int` but got `Optional[int]`.
-        # pyre-fixme[6]: For 3rd param expected `int` but got `Optional[int]`.
-        mask = torch.zeros(1, self.image_height, self.image_width)
-        mask[:, 0 : imre.shape[1], 0 : imre.shape[2]] = 1.0
-        return imre_, minscale, mask
-
     def _local_path(self, path: str) -> str:
         if self.path_manager is None:
             return path
@@ -920,167 +689,5 @@ def _seq_name_to_seed(seq_name) -> int:
     return int(hashlib.sha1(seq_name.encode("utf-8")).hexdigest(), 16)
 
 
-def _load_image(path) -> np.ndarray:
-    with Image.open(path) as pil_im:
-        im = np.array(pil_im.convert("RGB"))
-    im = im.transpose((2, 0, 1))
-    im = im.astype(np.float32) / 255.0
-    return im
-
-
-def _load_16big_png_depth(depth_png) -> np.ndarray:
-    with Image.open(depth_png) as depth_pil:
-        # the image is stored with 16-bit depth but PIL reads it as I (32 bit).
-        # we cast it to uint16, then reinterpret as float16, then cast to float32
-        depth = (
-            np.frombuffer(np.array(depth_pil, dtype=np.uint16), dtype=np.float16)
-            .astype(np.float32)
-            .reshape((depth_pil.size[1], depth_pil.size[0]))
-        )
-    return depth
-
-
-def _load_1bit_png_mask(file: str) -> np.ndarray:
-    with Image.open(file) as pil_im:
-        mask = (np.array(pil_im.convert("L")) > 0.0).astype(np.float32)
-    return mask
-
-
-def _load_depth_mask(path: str) -> np.ndarray:
-    if not path.lower().endswith(".png"):
-        raise ValueError('unsupported depth mask file name "%s"' % path)
-    m = _load_1bit_png_mask(path)
-    return m[None]  # fake feature channel
-
-
-def _load_depth(path, scale_adjustment) -> np.ndarray:
-    if not path.lower().endswith(".png"):
-        raise ValueError('unsupported depth file name "%s"' % path)
-
-    d = _load_16big_png_depth(path) * scale_adjustment
-    d[~np.isfinite(d)] = 0.0
-    return d[None]  # fake feature channel
-
-
-def _load_mask(path) -> np.ndarray:
-    with Image.open(path) as pil_im:
-        mask = np.array(pil_im)
-    mask = mask.astype(np.float32) / 255.0
-    return mask[None]  # fake feature channel
-
-
-def _get_1d_bounds(arr) -> Tuple[int, int]:
-    nz = np.flatnonzero(arr)
-    return nz[0], nz[-1] + 1
-
-
-def _get_bbox_from_mask(
-    mask, thr, decrease_quant: float = 0.05
-) -> Tuple[int, int, int, int]:
-    # bbox in xywh
-    masks_for_box = np.zeros_like(mask)
-    while masks_for_box.sum() <= 1.0:
-        masks_for_box = (mask > thr).astype(np.float32)
-        thr -= decrease_quant
-    if thr <= 0.0:
-        warnings.warn(f"Empty masks_for_bbox (thr={thr}) => using full image.")
-
-    x0, x1 = _get_1d_bounds(masks_for_box.sum(axis=-2))
-    y0, y1 = _get_1d_bounds(masks_for_box.sum(axis=-1))
-
-    return x0, y0, x1 - x0, y1 - y0
-
-
-def _get_clamp_bbox(
-    bbox: torch.Tensor,
-    box_crop_context: float = 0.0,
-    image_path: str = "",
-) -> torch.Tensor:
-    # box_crop_context: rate of expansion for bbox
-    # returns possibly expanded bbox xyxy as float
-
-    bbox = bbox.clone()  # do not edit bbox in place
-
-    # increase box size
-    if box_crop_context > 0.0:
-        c = box_crop_context
-        bbox = bbox.float()
-        bbox[0] -= bbox[2] * c / 2
-        bbox[1] -= bbox[3] * c / 2
-        bbox[2] += bbox[2] * c
-        bbox[3] += bbox[3] * c
-
-    if (bbox[2:] <= 1.0).any():
-        raise ValueError(
-            f"squashed image {image_path}!! The bounding box contains no pixels."
-        )
-
-    bbox[2:] = torch.clamp(bbox[2:], 2)  # set min height, width to 2 along both axes
-    bbox_xyxy = _bbox_xywh_to_xyxy(bbox, clamp_size=2)
-
-    return bbox_xyxy
-
-
-def _crop_around_box(tensor, bbox, impath: str = ""):
-    # bbox is xyxy, where the upper bound is corrected with +1
-    bbox = _clamp_box_to_image_bounds_and_round(
-        bbox,
-        image_size_hw=tensor.shape[-2:],
-    )
-    tensor = tensor[..., bbox[1] : bbox[3], bbox[0] : bbox[2]]
-    assert all(c > 0 for c in tensor.shape), f"squashed image {impath}"
-    return tensor
-
-
-def _clamp_box_to_image_bounds_and_round(
-    bbox_xyxy: torch.Tensor,
-    image_size_hw: Tuple[int, int],
-) -> torch.LongTensor:
-    bbox_xyxy = bbox_xyxy.clone()
-    bbox_xyxy[[0, 2]] = torch.clamp(bbox_xyxy[[0, 2]], 0, image_size_hw[-1])
-    bbox_xyxy[[1, 3]] = torch.clamp(bbox_xyxy[[1, 3]], 0, image_size_hw[-2])
-    if not isinstance(bbox_xyxy, torch.LongTensor):
-        bbox_xyxy = bbox_xyxy.round().long()
-    return bbox_xyxy  # pyre-ignore [7]
-
-
-def _rescale_bbox(bbox: torch.Tensor, orig_res, new_res) -> torch.Tensor:
-    assert bbox is not None
-    assert np.prod(orig_res) > 1e-8
-    # average ratio of dimensions
-    rel_size = (new_res[0] / orig_res[0] + new_res[1] / orig_res[1]) / 2.0
-    return bbox * rel_size
-
-
-def _bbox_xyxy_to_xywh(xyxy: torch.Tensor) -> torch.Tensor:
-    wh = xyxy[2:] - xyxy[:2]
-    xywh = torch.cat([xyxy[:2], wh])
-    return xywh
-
-
-def _bbox_xywh_to_xyxy(
-    xywh: torch.Tensor, clamp_size: Optional[int] = None
-) -> torch.Tensor:
-    xyxy = xywh.clone()
-    if clamp_size is not None:
-        xyxy[2:] = torch.clamp(xyxy[2:], clamp_size)
-    xyxy[2:] += xyxy[:2]
-    return xyxy
-
-
 def _safe_as_tensor(data, dtype):
-    if data is None:
-        return None
-    return torch.tensor(data, dtype=dtype)
-
-
-# NOTE this cache is per-worker; they are implemented as processes.
-# each batch is loaded and collated by a single worker;
-# since sequences tend to co-occur within batches, this is useful.
-@functools.lru_cache(maxsize=256)
-def _load_pointcloud(pcl_path: Union[str, Path], max_points: int = 0) -> Pointclouds:
-    pcl = IO().load_pointcloud(pcl_path)
-    if max_points > 0:
-        pcl = pcl.subsample(max_points)
-
-    return pcl
+    return torch.tensor(data, dtype=dtype) if data is not None else None
diff --git a/pytorch3d/implicitron/dataset/load_blob.py b/pytorch3d/implicitron/dataset/load_blob.py
new file mode 100644
index 000000000..b10fb1267
--- /dev/null
+++ b/pytorch3d/implicitron/dataset/load_blob.py
@@ -0,0 +1,542 @@
+import functools
+import os
+import warnings
+
+import numpy as np
+from PIL import Image
+import torch
+from typing import Any, Optional, Tuple
+
+from pytorch3d.implicitron.dataset import types
+from pytorch3d.implicitron.dataset.dataset_base import FrameData
+from pytorch3d.io import IO
+
+
+class BlobLoader:
+    """
+    A loader for correctly (according to setup) loading blobs for FrameData
+
+    Args:
+        dataset_root: The root folder of the dataset; all the paths in jsons are
+                specified relative to this root (but not json paths themselves).
+        load_images: Enable loading the frame RGB data.
+        load_depths: Enable loading the frame depth maps.
+        load_depth_masks: Enable loading the frame depth map masks denoting the
+            depth values used for evaluation (the points consistent across views).
+        load_masks: Enable loading frame foreground masks.
+        load_point_clouds: Enable loading sequence-level point clouds.
+        max_points: Cap on the number of loaded points in the point cloud;
+                if reached, they are randomly sampled without replacement.
+        mask_images: Whether to mask the images with the loaded foreground masks;
+                0 value is used for background.
+        mask_depths: Whether to mask the depth maps with the loaded foreground
+            masks; 0 value is used for background.
+        image_height: The height of the returned images, masks, and depth maps;
+                aspect ratio is preserved during cropping/resizing.
+        image_width: The width of the returned images, masks, and depth maps;
+            aspect ratio is preserved during cropping/resizing.
+        box_crop: Enable cropping of the image around the bounding box inferred
+                from the foreground region of the loaded segmentation mask; masks
+                and depth maps are cropped accordingly; cameras are corrected.
+        box_crop_mask_thr: The threshold used to separate pixels into foreground
+                and background based on the foreground_probability mask; if no value
+                is greater than this threshold, the loader lowers it and repeats.
+        box_crop_context: The amount of additional padding added to each
+                dimension of the cropping bounding box, relative to box size.
+    """
+
+    path_manager: Any = None
+
+    def __init__(
+        self,
+        dataset_root,
+        load_images,
+        load_depths,
+        load_depth_masks,
+        load_masks,
+        load_point_clouds,
+        max_points,
+        mask_images,
+        mask_depths,
+        image_height,
+        image_width,
+        box_crop,
+        box_crop_mask_thr,
+        box_crop_context,
+    ):
+        self.dataset_root = dataset_root
+        self.load_images = load_images
+        self.load_depths = load_depths
+        self.load_depth_masks = load_depth_masks
+        self.load_masks = load_masks
+        self.load_point_clouds = load_point_clouds
+        self.max_points = max_points
+        self.mask_images = mask_images
+        self.mask_depths = mask_depths
+        self.image_height = image_height
+        self.image_width = image_width
+        self.box_crop = box_crop
+        self.box_crop_mask_thr = box_crop_mask_thr
+        self.box_crop_context = box_crop_context
+
+    def load(
+        self,
+        # pyre-ignore
+        frame_data: FrameData,
+        # pyre-ignore
+        entry: types.FrameAnnotation,
+        # pyre-ignore
+        point_cloud: types.PointCloudAnnotation,
+    ) -> FrameData:
+        """Main method for loader."""
+        (
+            frame_data.fg_probability,
+            frame_data.mask_path,
+            frame_data.bbox_xywh,
+            clamp_bbox_xyxy,
+            frame_data.crop_bbox_xywh,
+        ) = self._load_crop_fg_probability(entry)
+
+        scale = 1.0
+        if self.load_images and entry.image is not None:
+            # original image size
+            frame_data.image_size_hw = _safe_as_tensor(entry.image.size, torch.long)
+
+            (
+                frame_data.image_rgb,
+                frame_data.image_path,
+                frame_data.mask_crop,
+                scale,
+            ) = self._load_crop_images(
+                entry, frame_data.fg_probability, clamp_bbox_xyxy
+            )
+
+        if self.load_depths and entry.depth is not None:
+            (
+                frame_data.depth_map,
+                frame_data.depth_path,
+                frame_data.depth_mask,
+            ) = self._load_mask_depth(entry, clamp_bbox_xyxy, frame_data.fg_probability)
+
+        if entry.viewpoint is not None:
+            frame_data.camera = self._get_pytorch3d_camera(
+                entry,
+                scale,
+                clamp_bbox_xyxy,
+            )
+
+        if self.load_point_clouds and point_cloud is not None:
+            pcl_path = self._fix_point_cloud_path(point_cloud.path)
+            frame_data.sequence_point_cloud = _load_pointcloud(
+                self._local_path(pcl_path), max_points=self.max_points
+            )
+            frame_data.sequence_point_cloud_path = pcl_path
+        return frame_data
+
+    def _load_crop_fg_probability(
+        self, entry: types.FrameAnnotation
+    ) -> Tuple[
+        Optional[torch.Tensor],
+        Optional[str],
+        Optional[torch.Tensor],
+        Optional[torch.Tensor],
+        Optional[torch.Tensor],
+    ]:
+        fg_probability = None
+        full_path = None
+        bbox_xywh = None
+        clamp_bbox_xyxy = None
+        crop_box_xywh = None
+
+        if (self.load_masks or self.box_crop) and entry.mask is not None:
+            full_path = os.path.join(self.dataset_root, entry.mask.path)
+            mask = _load_mask(self._local_path(full_path))
+
+            if mask.shape[-2:] != entry.image.size:
+                raise ValueError(
+                    f"bad mask size: {mask.shape[-2:]} vs {entry.image.size}!"
+                )
+
+            bbox_xywh = torch.tensor(_get_bbox_from_mask(mask, self.box_crop_mask_thr))
+
+            if self.box_crop:
+                clamp_bbox_xyxy = _clamp_box_to_image_bounds_and_round(
+                    _get_clamp_bbox(
+                        bbox_xywh,
+                        image_path=entry.image.path,
+                        box_crop_context=self.box_crop_context,
+                    ),
+                    image_size_hw=tuple(mask.shape[-2:]),
+                )
+                crop_box_xywh = _bbox_xyxy_to_xywh(clamp_bbox_xyxy)
+
+                mask = _crop_around_box(mask, clamp_bbox_xyxy, full_path)
+
+            fg_probability, _, _ = self._resize_image(mask, mode="nearest")
+
+        return fg_probability, full_path, bbox_xywh, clamp_bbox_xyxy, crop_box_xywh
+
+    def _load_crop_images(
+        self,
+        entry: types.FrameAnnotation,
+        fg_probability: Optional[torch.Tensor],
+        clamp_bbox_xyxy: Optional[torch.Tensor],
+    ) -> Tuple[torch.Tensor, str, torch.Tensor, float]:
+        assert self.dataset_root is not None and entry.image is not None
+        path = os.path.join(self.dataset_root, entry.image.path)
+        image_rgb = _load_image(self._local_path(path))
+
+        if image_rgb.shape[-2:] != entry.image.size:
+            raise ValueError(
+                f"bad image size: {image_rgb.shape[-2:]} vs {entry.image.size}!"
+            )
+
+        if self.box_crop:
+            assert clamp_bbox_xyxy is not None
+            image_rgb = _crop_around_box(image_rgb, clamp_bbox_xyxy, path)
+
+        image_rgb, scale, mask_crop = self._resize_image(image_rgb)
+
+        if self.mask_images:
+            assert fg_probability is not None
+            image_rgb *= fg_probability
+
+        return image_rgb, path, mask_crop, scale
+
+    def _load_mask_depth(
+        self,
+        entry: types.FrameAnnotation,
+        clamp_bbox_xyxy: Optional[torch.Tensor],
+        fg_probability: Optional[torch.Tensor],
+    ) -> Tuple[torch.Tensor, str, torch.Tensor]:
+        entry_depth = entry.depth
+        assert entry_depth is not None
+        path = os.path.join(self.dataset_root, entry_depth.path)
+        depth_map = _load_depth(self._local_path(path), entry_depth.scale_adjustment)
+
+        if self.box_crop:
+            assert clamp_bbox_xyxy is not None
+            depth_bbox_xyxy = _rescale_bbox(
+                clamp_bbox_xyxy, entry.image.size, depth_map.shape[-2:]
+            )
+            depth_map = _crop_around_box(depth_map, depth_bbox_xyxy, path)
+
+        depth_map, _, _ = self._resize_image(depth_map, mode="nearest")
+
+        if self.mask_depths:
+            assert fg_probability is not None
+            depth_map *= fg_probability
+
+        if self.load_depth_masks:
+            assert entry_depth.mask_path is not None
+            mask_path = os.path.join(self.dataset_root, entry_depth.mask_path)
+            depth_mask = _load_depth_mask(self._local_path(mask_path))
+
+            if self.box_crop:
+                assert clamp_bbox_xyxy is not None
+                depth_mask_bbox_xyxy = _rescale_bbox(
+                    clamp_bbox_xyxy, entry.image.size, depth_mask.shape[-2:]
+                )
+                depth_mask = _crop_around_box(
+                    depth_mask, depth_mask_bbox_xyxy, mask_path
+                )
+
+            depth_mask, _, _ = self._resize_image(depth_mask, mode="nearest")
+        else:
+            depth_mask = torch.ones_like(depth_map)
+
+        return depth_map, path, depth_mask
+
+    def _get_pytorch3d_camera(
+        self,
+        entry: types.FrameAnnotation,
+        scale: float,
+        clamp_bbox_xyxy: Optional[torch.Tensor],
+    ) -> PerspectiveCameras:  # pyre-ignore
+        entry_viewpoint = entry.viewpoint
+        assert entry_viewpoint is not None
+        # principal point and focal length
+        principal_point = torch.tensor(
+            entry_viewpoint.principal_point, dtype=torch.float
+        )
+        focal_length = torch.tensor(entry_viewpoint.focal_length, dtype=torch.float)
+
+        half_image_size_wh_orig = (
+            torch.tensor(list(reversed(entry.image.size)), dtype=torch.float) / 2.0
+        )
+
+        # first, we convert from the dataset's NDC convention to pixels
+        format = entry_viewpoint.intrinsics_format
+        if format.lower() == "ndc_norm_image_bounds":
+            # this is e.g. currently used in CO3D for storing intrinsics
+            rescale = half_image_size_wh_orig
+        elif format.lower() == "ndc_isotropic":
+            rescale = half_image_size_wh_orig.min()
+        else:
+            raise ValueError(f"Unknown intrinsics format: {format}")
+
+        # principal point and focal length in pixels
+        principal_point_px = half_image_size_wh_orig - principal_point * rescale
+        focal_length_px = focal_length * rescale
+        if self.box_crop:
+            assert clamp_bbox_xyxy is not None
+            principal_point_px -= clamp_bbox_xyxy[:2]
+
+        # now, convert from pixels to PyTorch3D v0.5+ NDC convention
+        if self.image_height is None or self.image_width is None:
+            out_size = list(reversed(entry.image.size))
+        else:
+            out_size = [self.image_width, self.image_height]
+
+        half_image_size_output = torch.tensor(out_size, dtype=torch.float) / 2.0
+        half_min_image_size_output = half_image_size_output.min()
+
+        # rescaled principal point and focal length in ndc
+        principal_point = (
+            half_image_size_output - principal_point_px * scale
+        ) / half_min_image_size_output
+        focal_length = focal_length_px * scale / half_min_image_size_output
+
+        return PerspectiveCameras(
+            focal_length=focal_length[None],
+            principal_point=principal_point[None],
+            R=torch.tensor(entry_viewpoint.R, dtype=torch.float)[None],
+            T=torch.tensor(entry_viewpoint.T, dtype=torch.float)[None],
+        )
+
+    def _fix_point_cloud_path(self, path: str) -> str:
+        """
+        Fix up a point cloud path from the dataset.
+        Some files in Co3Dv2 have an accidental absolute path stored.
+        """
+        unwanted_prefix = (
+            "/large_experiments/p3/replay/datasets/co3d/co3d45k_220512/export_v23/"
+        )
+        if path.startswith(unwanted_prefix):
+            path = path[len(unwanted_prefix) :]
+        return os.path.join(self.dataset_root, path)
+
+    def _local_path(self, path: str) -> str:
+        if self.path_manager is None:
+            return path
+        return self.path_manager.get_local_path(path)
+
+    def _resize_image(
+        self, image, mode="bilinear"
+    ) -> Tuple[torch.Tensor, float, torch.Tensor]:
+        image_height, image_width = self.image_height, self.image_width
+        if image_height is None or image_width is None:
+            # skip the resizing
+            imre_ = torch.from_numpy(image)
+            return imre_, 1.0, torch.ones_like(imre_[:1])
+        # takes numpy array, returns pytorch tensor
+        minscale = min(
+            image_height / image.shape[-2],
+            image_width / image.shape[-1],
+        )
+        imre = torch.nn.functional.interpolate(
+            torch.from_numpy(image)[None],
+            scale_factor=minscale,
+            mode=mode,
+            align_corners=False if mode == "bilinear" else None,
+            recompute_scale_factor=True,
+        )[0]
+        # pyre-fixme[19]: Expected 1 positional argument.
+        imre_ = torch.zeros(image.shape[0], self.image_height, self.image_width)
+        imre_[:, 0 : imre.shape[1], 0 : imre.shape[2]] = imre
+        mask = torch.zeros(1, self.image_height, self.image_width)
+        mask[:, 0 : imre.shape[1], 0 : imre.shape[2]] = 1.0
+        return imre_, minscale, mask
+
+
+def _load_image(path) -> np.ndarray:
+    with Image.open(path) as pil_im:
+        im = np.array(pil_im.convert("RGB"))
+    im = im.transpose((2, 0, 1))
+    im = im.astype(np.float32) / 255.0
+    return im
+
+
+def _load_mask(path) -> np.ndarray:
+    with Image.open(path) as pil_im:
+        mask = np.array(pil_im)
+    mask = mask.astype(np.float32) / 255.0
+    return mask[None]  # fake feature channel
+
+
+def _get_bbox_from_mask(
+    mask, thr, decrease_quant: float = 0.05
+) -> Tuple[int, int, int, int]:
+    # bbox in xywh
+    masks_for_box = np.zeros_like(mask)
+    while masks_for_box.sum() <= 1.0:
+        masks_for_box = (mask > thr).astype(np.float32)
+        thr -= decrease_quant
+    if thr <= 0.0:
+        warnings.warn(f"Empty masks_for_bbox (thr={thr}) => using full image.")
+
+    x0, x1 = _get_1d_bounds(masks_for_box.sum(axis=-2))
+    y0, y1 = _get_1d_bounds(masks_for_box.sum(axis=-1))
+
+    return x0, y0, x1 - x0, y1 - y0
+
+
+def _crop_around_box(tensor, bbox, impath: str = ""):
+    # bbox is xyxy, where the upper bound is corrected with +1
+    bbox = _clamp_box_to_image_bounds_and_round(
+        bbox,
+        image_size_hw=tensor.shape[-2:],
+    )
+    tensor = tensor[..., bbox[1] : bbox[3], bbox[0] : bbox[2]]
+    assert all(c > 0 for c in tensor.shape), f"squashed image {impath}"
+    return tensor
+
+
+def _clamp_box_to_image_bounds_and_round(
+    bbox_xyxy: torch.Tensor,
+    image_size_hw: Tuple[int, int],
+) -> torch.LongTensor:
+    bbox_xyxy = bbox_xyxy.clone()
+    bbox_xyxy[[0, 2]] = torch.clamp(bbox_xyxy[[0, 2]], 0, image_size_hw[-1])
+    bbox_xyxy[[1, 3]] = torch.clamp(bbox_xyxy[[1, 3]], 0, image_size_hw[-2])
+    if not isinstance(bbox_xyxy, torch.LongTensor):
+        bbox_xyxy = bbox_xyxy.round().long()
+    return bbox_xyxy  # pyre-ignore [7]
+
+
+def _get_clamp_bbox(
+    bbox: torch.Tensor,
+    box_crop_context: float = 0.0,
+    image_path: str = "",
+) -> torch.Tensor:
+    # box_crop_context: rate of expansion for bbox
+    # returns possibly expanded bbox xyxy as float
+
+    bbox = bbox.clone()  # do not edit bbox in place
+
+    # increase box size
+    if box_crop_context > 0.0:
+        c = box_crop_context
+        bbox = bbox.float()
+        bbox[0] -= bbox[2] * c / 2
+        bbox[1] -= bbox[3] * c / 2
+        bbox[2] += bbox[2] * c
+        bbox[3] += bbox[3] * c
+
+    if (bbox[2:] <= 1.0).any():
+        raise ValueError(
+            f"squashed image {image_path}!! The bounding box contains no pixels."
+        )
+
+    bbox[2:] = torch.clamp(bbox[2:], 2)  # set min height, width to 2 along both axes
+    bbox_xyxy = _bbox_xywh_to_xyxy(bbox, clamp_size=2)
+
+    return bbox_xyxy
+
+
+def _bbox_xyxy_to_xywh(xyxy: torch.Tensor) -> torch.Tensor:
+    wh = xyxy[2:] - xyxy[:2]
+    xywh = torch.cat([xyxy[:2], wh])
+    return xywh
+
+
+def _resize_image(
+    self, image, mode="bilinear"
+) -> Tuple[torch.Tensor, float, torch.Tensor]:
+    image_height, image_width = self.image_height, self.image_width
+    if image_height is None or image_width is None:
+        # skip the resizing
+        imre_ = torch.from_numpy(image)
+        return imre_, 1.0, torch.ones_like(imre_[:1])
+    # takes numpy array, returns pytorch tensor
+    minscale = min(
+        image_height / image.shape[-2],
+        image_width / image.shape[-1],
+    )
+    imre = torch.nn.functional.interpolate(
+        torch.from_numpy(image)[None],
+        scale_factor=minscale,
+        mode=mode,
+        align_corners=False if mode == "bilinear" else None,
+        recompute_scale_factor=True,
+    )[0]
+    # pyre-fixme[19]: Expected 1 positional argument.
+    imre_ = torch.zeros(image.shape[0], self.image_height, self.image_width)
+    imre_[:, 0 : imre.shape[1], 0 : imre.shape[2]] = imre
+    mask = torch.zeros(1, self.image_height, self.image_width)
+    mask[:, 0 : imre.shape[1], 0 : imre.shape[2]] = 1.0
+    return imre_, minscale, mask
+
+
+def _load_depth(path, scale_adjustment) -> np.ndarray:
+    if not path.lower().endswith(".png"):
+        raise ValueError('unsupported depth file name "%s"' % path)
+
+    d = _load_16big_png_depth(path) * scale_adjustment
+    d[~np.isfinite(d)] = 0.0
+    return d[None]  # fake feature channel
+
+
+def _load_16big_png_depth(depth_png) -> np.ndarray:
+    with Image.open(depth_png) as depth_pil:
+        # the image is stored with 16-bit depth but PIL reads it as I (32 bit).
+        # we cast it to uint16, then reinterpret as float16, then cast to float32
+        depth = (
+            np.frombuffer(np.array(depth_pil, dtype=np.uint16), dtype=np.float16)
+            .astype(np.float32)
+            .reshape((depth_pil.size[1], depth_pil.size[0]))
+        )
+    return depth
+
+
+def _rescale_bbox(bbox: torch.Tensor, orig_res, new_res) -> torch.Tensor:
+    assert bbox is not None
+    assert np.prod(orig_res) > 1e-8
+    # average ratio of dimensions
+    rel_size = (new_res[0] / orig_res[0] + new_res[1] / orig_res[1]) / 2.0
+    return bbox * rel_size
+
+
+def _load_1bit_png_mask(file: str) -> np.ndarray:
+    with Image.open(file) as pil_im:
+        mask = (np.array(pil_im.convert("L")) > 0.0).astype(np.float32)
+    return mask
+
+
+def _load_depth_mask(path: str) -> np.ndarray:
+    if not path.lower().endswith(".png"):
+        raise ValueError('unsupported depth mask file name "%s"' % path)
+    m = _load_1bit_png_mask(path)
+    return m[None]  # fake feature channel
+
+
+def _get_1d_bounds(arr) -> Tuple[int, int]:
+    nz = np.flatnonzero(arr)
+    return nz[0], nz[-1] + 1
+
+
+def _bbox_xywh_to_xyxy(
+    xywh: torch.Tensor, clamp_size: Optional[int] = None
+) -> torch.Tensor:
+    xyxy = xywh.clone()
+    if clamp_size is not None:
+        xyxy[2:] = torch.clamp(xyxy[2:], clamp_size)
+    xyxy[2:] += xyxy[:2]
+    return xyxy
+
+
+def _safe_as_tensor(data, dtype):
+    return torch.tensor(data, dtype=dtype) if data is not None else None
+
+
+# NOTE this cache is per-worker; they are implemented as processes.
+# each batch is loaded and collated by a single worker;
+# since sequences tend to co-occur within batches, this is useful.
+@functools.lru_cache(maxsize=256)
+# pyre-ignore
+def _load_pointcloud(pcl_path: Union[str, Path], max_points: int = 0) -> Pointclouds:
+    pcl = IO().load_pointcloud(pcl_path)
+    if max_points > 0:
+        pcl = pcl.subsample(max_points)
+
+    return pcl

From f745dfc941e9c5ed3e10e0a2664236b3124b3770 Mon Sep 17 00:00:00 2001
From: Ildar Salakhiev <ildar@fb.com>
Date: Tue, 28 Feb 2023 15:39:42 +0000
Subject: [PATCH 02/43] added type hints and deleted chore pyre-ignore

---
 .../implicitron/dataset/json_index_dataset.py | 45 ++++++++-----------
 pytorch3d/implicitron/dataset/load_blob.py    | 30 ++++++-------
 2 files changed, 32 insertions(+), 43 deletions(-)

diff --git a/pytorch3d/implicitron/dataset/json_index_dataset.py b/pytorch3d/implicitron/dataset/json_index_dataset.py
index ac9daf02a..9bec154c3 100644
--- a/pytorch3d/implicitron/dataset/json_index_dataset.py
+++ b/pytorch3d/implicitron/dataset/json_index_dataset.py
@@ -52,7 +52,6 @@
 
     class FrameAnnotsEntry(TypedDict):
         subset: Optional[str]
-        # pyre-ignore
         frame_annotation: types.FrameAnnotation
 
 else:
@@ -60,7 +59,6 @@ class FrameAnnotsEntry(TypedDict):
 
 
 @registry.register
-# pyre-ignore
 class JsonIndexDataset(DatasetBase, ReplaceableBase):
     """
     A dataset with annotations in json files like the Common Objects in 3D
@@ -131,7 +129,6 @@ class JsonIndexDataset(DatasetBase, ReplaceableBase):
 
     frame_annotations_type: ClassVar[
         Type[types.FrameAnnotation]
-        # pyre-ignore
     ] = types.FrameAnnotation
 
     path_manager: Any = None
@@ -164,7 +161,7 @@ class JsonIndexDataset(DatasetBase, ReplaceableBase):
     sort_frames: bool = False
     eval_batches: Any = None
     eval_batch_index: Any = None
-    loader: BlobLoader
+    blob_loader: BlobLoader
     # frame_annots: List[FrameAnnotsEntry] = field(init=False)
     # seq_annots: Dict[str, types.SequenceAnnotation] = field(init=False)
 
@@ -179,21 +176,21 @@ def __post_init__(self) -> None:
         self._filter_db()  # also computes sequence indices
         self._extract_and_set_eval_batches()
 
-        self.loader = BlobLoader(
-            self.dataset_root,
-            self.load_images,
-            self.load_depths,
-            self.load_depth_masks,
-            self.load_masks,
-            self.load_point_clouds,
-            self.max_points,
-            self.mask_images,
-            self.mask_depths,
-            self.image_height,
-            self.image_width,
-            self.box_crop,
-            self.box_crop_mask_thr,
-            self.box_crop_context,
+        self.blob_loader = BlobLoader(
+            dataset_root = self.dataset_root,
+            load_images = self.load_images,
+            load_depths = self.load_depths,
+            load_depth_masks = self.load_depth_masks,
+            load_masks = self.load_masks,
+            load_point_clouds = self.load_point_clouds,
+            max_points = self.max_points,
+            mask_images = self.mask_images,
+            mask_depths = self.mask_depths,
+            image_height = self.image_height,
+            image_width = self.image_width,
+            box_crop = self.box_crop,
+            box_crop_mask_thr = self.box_crop_mask_thr,
+            box_crop_context = self.box_crop_context,
         )
         logger.info(str(self))
 
@@ -415,7 +412,6 @@ def __len__(self) -> int:
     def _get_frame_type(self, entry: FrameAnnotsEntry) -> Optional[str]:
         return entry["subset"]
 
-    # pyre-ignore
     def get_all_train_cameras(self) -> CamerasBase:
         """
         Returns the cameras corresponding to all the known frames.
@@ -431,7 +427,6 @@ def get_all_train_cameras(self) -> CamerasBase:
                 cameras.append(self[frame_idx].camera)
         return join_cameras_as_batch(cameras)
 
-    # pyre-ignore
     def __getitem__(self, index) -> FrameData:
         # pyre-ignore[16]
         if index >= len(self.frame_annots):
@@ -456,17 +451,14 @@ def __getitem__(self, index) -> FrameData:
             else None,
         )
 
-        # The rest of the fields are optional
+        # Optional field
         frame_data.frame_type = self._get_frame_type(self.frame_annots[index])
-
-        frame_data = self.loader.load(frame_data, entry, point_cloud)
-        return frame_data
+        return self.blob_loader.load(frame_data, entry, point_cloud)
 
     def _load_frames(self) -> None:
         logger.info(f"Loading Co3D frames from {self.frame_annotations_file}.")
         local_file = self._local_path(self.frame_annotations_file)
         with gzip.open(local_file, "rt", encoding="utf8") as zipfile:
-            # pyre-ignore
             frame_annots_list = types.load_dataclass(
                 zipfile, List[self.frame_annotations_type]
             )
@@ -481,7 +473,6 @@ def _load_sequences(self) -> None:
         logger.info(f"Loading Co3D sequences from {self.sequence_annotations_file}.")
         local_file = self._local_path(self.sequence_annotations_file)
         with gzip.open(local_file, "rt", encoding="utf8") as zipfile:
-            # pyre-ignore
             seq_annots = types.load_dataclass(zipfile, List[types.SequenceAnnotation])
         if not seq_annots:
             raise ValueError("Empty sequences file!")
diff --git a/pytorch3d/implicitron/dataset/load_blob.py b/pytorch3d/implicitron/dataset/load_blob.py
index b10fb1267..905351896 100644
--- a/pytorch3d/implicitron/dataset/load_blob.py
+++ b/pytorch3d/implicitron/dataset/load_blob.py
@@ -44,7 +44,6 @@ class BlobLoader:
         box_crop_context: The amount of additional padding added to each
                 dimension of the cropping bounding box, relative to box size.
     """
-
     path_manager: Any = None
 
     def __init__(
@@ -64,20 +63,20 @@ def __init__(
         box_crop_mask_thr,
         box_crop_context,
     ):
-        self.dataset_root = dataset_root
-        self.load_images = load_images
-        self.load_depths = load_depths
-        self.load_depth_masks = load_depth_masks
-        self.load_masks = load_masks
-        self.load_point_clouds = load_point_clouds
-        self.max_points = max_points
-        self.mask_images = mask_images
-        self.mask_depths = mask_depths
-        self.image_height = image_height
-        self.image_width = image_width
-        self.box_crop = box_crop
-        self.box_crop_mask_thr = box_crop_mask_thr
-        self.box_crop_context = box_crop_context
+        self.dataset_root: str = dataset_root
+        self.load_images: bool = load_images
+        self.load_depths: bool = load_depths
+        self.load_depth_masks: bool = load_depth_masks
+        self.load_masks: bool = load_masks
+        self.load_point_clouds: bool = load_point_clouds
+        self.max_points: int = max_points
+        self.mask_images: bool = mask_images
+        self.mask_depths: bool = mask_depths
+        self.image_height: int = image_height
+        self.image_width: int = image_width
+        self.box_crop: bool = box_crop
+        self.box_crop_mask_thr: float = box_crop_mask_thr
+        self.box_crop_context: float = box_crop_context
 
     def load(
         self,
@@ -341,7 +340,6 @@ def _resize_image(
             align_corners=False if mode == "bilinear" else None,
             recompute_scale_factor=True,
         )[0]
-        # pyre-fixme[19]: Expected 1 positional argument.
         imre_ = torch.zeros(image.shape[0], self.image_height, self.image_width)
         imre_[:, 0 : imre.shape[1], 0 : imre.shape[2]] = imre
         mask = torch.zeros(1, self.image_height, self.image_width)

From c3c5110364ae1d7e42ba63a97223d3410926d587 Mon Sep 17 00:00:00 2001
From: Ildar Salakhiev <ildar@fb.com>
Date: Tue, 28 Feb 2023 16:16:37 +0000
Subject: [PATCH 03/43] linter

---
 .../implicitron/dataset/json_index_dataset.py | 36 +++++++++----------
 pytorch3d/implicitron/dataset/load_blob.py    | 10 ++++--
 2 files changed, 24 insertions(+), 22 deletions(-)

diff --git a/pytorch3d/implicitron/dataset/json_index_dataset.py b/pytorch3d/implicitron/dataset/json_index_dataset.py
index 9bec154c3..0d5aa1796 100644
--- a/pytorch3d/implicitron/dataset/json_index_dataset.py
+++ b/pytorch3d/implicitron/dataset/json_index_dataset.py
@@ -15,7 +15,6 @@
 import warnings
 from collections import defaultdict
 from itertools import islice
-from pathlib import Path
 from typing import (
     Any,
     ClassVar,
@@ -30,18 +29,17 @@
     Union,
 )
 
-import numpy as np
 import torch
 from tqdm import tqdm
 
-from pytorch3d.implicitron.tools.config import registry, ReplaceableBase
 from pytorch3d.implicitron.dataset import types
 from pytorch3d.implicitron.dataset.dataset_base import DatasetBase, FrameData
 from pytorch3d.implicitron.dataset.load_blob import BlobLoader
 from pytorch3d.implicitron.dataset.utils import is_known_frame_scalar
+
+from pytorch3d.implicitron.tools.config import registry, ReplaceableBase
 from pytorch3d.renderer.camera_utils import join_cameras_as_batch
-from pytorch3d.renderer.cameras import CamerasBase, PerspectiveCameras
-from pytorch3d.structures.pointclouds import Pointclouds
+from pytorch3d.renderer.cameras import CamerasBase
 
 
 logger = logging.getLogger(__name__)
@@ -177,20 +175,20 @@ def __post_init__(self) -> None:
         self._extract_and_set_eval_batches()
 
         self.blob_loader = BlobLoader(
-            dataset_root = self.dataset_root,
-            load_images = self.load_images,
-            load_depths = self.load_depths,
-            load_depth_masks = self.load_depth_masks,
-            load_masks = self.load_masks,
-            load_point_clouds = self.load_point_clouds,
-            max_points = self.max_points,
-            mask_images = self.mask_images,
-            mask_depths = self.mask_depths,
-            image_height = self.image_height,
-            image_width = self.image_width,
-            box_crop = self.box_crop,
-            box_crop_mask_thr = self.box_crop_mask_thr,
-            box_crop_context = self.box_crop_context,
+            dataset_root=self.dataset_root,
+            load_images=self.load_images,
+            load_depths=self.load_depths,
+            load_depth_masks=self.load_depth_masks,
+            load_masks=self.load_masks,
+            load_point_clouds=self.load_point_clouds,
+            max_points=self.max_points,
+            mask_images=self.mask_images,
+            mask_depths=self.mask_depths,
+            image_height=self.image_height,
+            image_width=self.image_width,
+            box_crop=self.box_crop,
+            box_crop_mask_thr=self.box_crop_mask_thr,
+            box_crop_context=self.box_crop_context,
         )
         logger.info(str(self))
 
diff --git a/pytorch3d/implicitron/dataset/load_blob.py b/pytorch3d/implicitron/dataset/load_blob.py
index 905351896..2d6d2d220 100644
--- a/pytorch3d/implicitron/dataset/load_blob.py
+++ b/pytorch3d/implicitron/dataset/load_blob.py
@@ -1,15 +1,18 @@
 import functools
 import os
 import warnings
+from pathlib import Path
+from typing import Any, Optional, Tuple, Union
 
 import numpy as np
-from PIL import Image
 import torch
-from typing import Any, Optional, Tuple
+from PIL import Image
 
 from pytorch3d.implicitron.dataset import types
 from pytorch3d.implicitron.dataset.dataset_base import FrameData
 from pytorch3d.io import IO
+from pytorch3d.renderer.cameras import PerspectiveCameras
+from pytorch3d.structures.pointclouds import Pointclouds
 
 
 class BlobLoader:
@@ -44,6 +47,7 @@ class BlobLoader:
         box_crop_context: The amount of additional padding added to each
                 dimension of the cropping bounding box, relative to box size.
     """
+
     path_manager: Any = None
 
     def __init__(
@@ -371,7 +375,7 @@ def _get_bbox_from_mask(
         masks_for_box = (mask > thr).astype(np.float32)
         thr -= decrease_quant
     if thr <= 0.0:
-        warnings.warn(f"Empty masks_for_bbox (thr={thr}) => using full image.")
+        warnings.warn(f"Empty masks_for_bbox (thr={thr}) => using full image.", stacklevel=1)
 
     x0, x1 = _get_1d_bounds(masks_for_box.sum(axis=-2))
     y0, y1 = _get_1d_bounds(masks_for_box.sum(axis=-1))

From 9b431bd5698050bfc5574881a569f2fb9cab5be7 Mon Sep 17 00:00:00 2001
From: Ildar Salakhiev <ildar@fb.com>
Date: Tue, 28 Feb 2023 16:18:11 +0000
Subject: [PATCH 04/43] linter

---
 pytorch3d/implicitron/dataset/load_blob.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pytorch3d/implicitron/dataset/load_blob.py b/pytorch3d/implicitron/dataset/load_blob.py
index 2d6d2d220..0cad7b4f1 100644
--- a/pytorch3d/implicitron/dataset/load_blob.py
+++ b/pytorch3d/implicitron/dataset/load_blob.py
@@ -375,7 +375,9 @@ def _get_bbox_from_mask(
         masks_for_box = (mask > thr).astype(np.float32)
         thr -= decrease_quant
     if thr <= 0.0:
-        warnings.warn(f"Empty masks_for_bbox (thr={thr}) => using full image.", stacklevel=1)
+        warnings.warn(
+            f"Empty masks_for_bbox (thr={thr}) => using full image.", stacklevel=1
+        )
 
     x0, x1 = _get_1d_bounds(masks_for_box.sum(axis=-2))
     y0, y1 = _get_1d_bounds(masks_for_box.sum(axis=-1))

From 627e60fb4cf989c7ce0a75b1cb198cd5f99a027a Mon Sep 17 00:00:00 2001
From: Ildar Salakhiev <ildar@fb.com>
Date: Tue, 28 Feb 2023 17:02:36 +0000
Subject: [PATCH 05/43] deleted chore pyre-ignore

---
 pytorch3d/implicitron/dataset/load_blob.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pytorch3d/implicitron/dataset/load_blob.py b/pytorch3d/implicitron/dataset/load_blob.py
index 0cad7b4f1..9193a147d 100644
--- a/pytorch3d/implicitron/dataset/load_blob.py
+++ b/pytorch3d/implicitron/dataset/load_blob.py
@@ -537,7 +537,6 @@ def _safe_as_tensor(data, dtype):
 # each batch is loaded and collated by a single worker;
 # since sequences tend to co-occur within batches, this is useful.
 @functools.lru_cache(maxsize=256)
-# pyre-ignore
 def _load_pointcloud(pcl_path: Union[str, Path], max_points: int = 0) -> Pointclouds:
     pcl = IO().load_pointcloud(pcl_path)
     if max_points > 0:

From 0aa27a6488afe16dbda6b667a34e802a627f2b77 Mon Sep 17 00:00:00 2001
From: Ildar Salakhiev <ildar@fb.com>
Date: Wed, 1 Mar 2023 09:49:02 +0000
Subject: [PATCH 06/43] renamed load_blob to blob_loader

---
 pytorch3d/implicitron/dataset/{load_blob.py => blob_loader.py} | 0
 pytorch3d/implicitron/dataset/json_index_dataset.py            | 2 +-
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename pytorch3d/implicitron/dataset/{load_blob.py => blob_loader.py} (100%)

diff --git a/pytorch3d/implicitron/dataset/load_blob.py b/pytorch3d/implicitron/dataset/blob_loader.py
similarity index 100%
rename from pytorch3d/implicitron/dataset/load_blob.py
rename to pytorch3d/implicitron/dataset/blob_loader.py
diff --git a/pytorch3d/implicitron/dataset/json_index_dataset.py b/pytorch3d/implicitron/dataset/json_index_dataset.py
index 0d5aa1796..2ad041bf7 100644
--- a/pytorch3d/implicitron/dataset/json_index_dataset.py
+++ b/pytorch3d/implicitron/dataset/json_index_dataset.py
@@ -34,7 +34,7 @@
 
 from pytorch3d.implicitron.dataset import types
 from pytorch3d.implicitron.dataset.dataset_base import DatasetBase, FrameData
-from pytorch3d.implicitron.dataset.load_blob import BlobLoader
+from pytorch3d.implicitron.dataset.blob_loader import BlobLoader
 from pytorch3d.implicitron.dataset.utils import is_known_frame_scalar
 
 from pytorch3d.implicitron.tools.config import registry, ReplaceableBase

From 53823cf6d330af23046ec66a5ce52c17a0c038ec Mon Sep 17 00:00:00 2001
From: Ildar Salakhiev <ildar@fb.com>
Date: Wed, 1 Mar 2023 09:56:19 +0000
Subject: [PATCH 07/43] sending to BlobLoader whore seq_annotation

---
 pytorch3d/implicitron/dataset/blob_loader.py        | 6 +++---
 pytorch3d/implicitron/dataset/json_index_dataset.py | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/pytorch3d/implicitron/dataset/blob_loader.py b/pytorch3d/implicitron/dataset/blob_loader.py
index 9193a147d..3c624a2ce 100644
--- a/pytorch3d/implicitron/dataset/blob_loader.py
+++ b/pytorch3d/implicitron/dataset/blob_loader.py
@@ -89,7 +89,7 @@ def load(
         # pyre-ignore
         entry: types.FrameAnnotation,
         # pyre-ignore
-        point_cloud: types.PointCloudAnnotation,
+        seq_annotation: types.SequenceAnnotation,
     ) -> FrameData:
         """Main method for loader."""
         (
@@ -128,8 +128,8 @@ def load(
                 clamp_bbox_xyxy,
             )
 
-        if self.load_point_clouds and point_cloud is not None:
-            pcl_path = self._fix_point_cloud_path(point_cloud.path)
+        if self.load_point_clouds and seq_annotation.point_cloud is not None:
+            pcl_path = self._fix_point_cloud_path(seq_annotation.point_cloud.path)
             frame_data.sequence_point_cloud = _load_pointcloud(
                 self._local_path(pcl_path), max_points=self.max_points
             )
diff --git a/pytorch3d/implicitron/dataset/json_index_dataset.py b/pytorch3d/implicitron/dataset/json_index_dataset.py
index 2ad041bf7..0ceb7dec0 100644
--- a/pytorch3d/implicitron/dataset/json_index_dataset.py
+++ b/pytorch3d/implicitron/dataset/json_index_dataset.py
@@ -451,7 +451,7 @@ def __getitem__(self, index) -> FrameData:
 
         # Optional field
         frame_data.frame_type = self._get_frame_type(self.frame_annots[index])
-        return self.blob_loader.load(frame_data, entry, point_cloud)
+        return self.blob_loader.load(frame_data, entry, self.seq_annots[entry.sequence_name])
 
     def _load_frames(self) -> None:
         logger.info(f"Loading Co3D frames from {self.frame_annotations_file}.")

From d6f13eb629d6607ebd60bfc1e026027309402fc9 Mon Sep 17 00:00:00 2001
From: Ildar Salakhiev <ildar@fb.com>
Date: Wed, 1 Mar 2023 10:00:09 +0000
Subject: [PATCH 08/43] made blob_loader dataclass to avoid boilerplate

---
 pytorch3d/implicitron/dataset/blob_loader.py | 48 +++++++-------------
 1 file changed, 16 insertions(+), 32 deletions(-)

diff --git a/pytorch3d/implicitron/dataset/blob_loader.py b/pytorch3d/implicitron/dataset/blob_loader.py
index 3c624a2ce..29c41837f 100644
--- a/pytorch3d/implicitron/dataset/blob_loader.py
+++ b/pytorch3d/implicitron/dataset/blob_loader.py
@@ -1,6 +1,7 @@
 import functools
 import os
 import warnings
+from dataclasses import dataclass
 from pathlib import Path
 from typing import Any, Optional, Tuple, Union
 
@@ -15,6 +16,7 @@
 from pytorch3d.structures.pointclouds import Pointclouds
 
 
+@dataclass
 class BlobLoader:
     """
     A loader for correctly (according to setup) loading blobs for FrameData
@@ -48,40 +50,22 @@ class BlobLoader:
                 dimension of the cropping bounding box, relative to box size.
     """
 
+    dataset_root: str
+    load_images: bool
+    load_depths: bool
+    load_depth_masks: bool
+    load_masks: bool
+    load_point_clouds: bool
+    max_points: int
+    mask_images: bool
+    mask_depths: bool
+    image_height: int
+    image_width: int
+    box_crop: bool
+    box_crop_mask_thr: float
+    box_crop_context: float
     path_manager: Any = None
 
-    def __init__(
-        self,
-        dataset_root,
-        load_images,
-        load_depths,
-        load_depth_masks,
-        load_masks,
-        load_point_clouds,
-        max_points,
-        mask_images,
-        mask_depths,
-        image_height,
-        image_width,
-        box_crop,
-        box_crop_mask_thr,
-        box_crop_context,
-    ):
-        self.dataset_root: str = dataset_root
-        self.load_images: bool = load_images
-        self.load_depths: bool = load_depths
-        self.load_depth_masks: bool = load_depth_masks
-        self.load_masks: bool = load_masks
-        self.load_point_clouds: bool = load_point_clouds
-        self.max_points: int = max_points
-        self.mask_images: bool = mask_images
-        self.mask_depths: bool = mask_depths
-        self.image_height: int = image_height
-        self.image_width: int = image_width
-        self.box_crop: bool = box_crop
-        self.box_crop_mask_thr: float = box_crop_mask_thr
-        self.box_crop_context: float = box_crop_context
-
     def load(
         self,
         # pyre-ignore

From 86e64f77fb89b10acd51576620aeda709bd0505c Mon Sep 17 00:00:00 2001
From: Ildar Salakhiev <ildar@fb.com>
Date: Wed, 1 Mar 2023 10:02:21 +0000
Subject: [PATCH 09/43] documented, that FrameData modification done inplace

---
 pytorch3d/implicitron/dataset/blob_loader.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/pytorch3d/implicitron/dataset/blob_loader.py b/pytorch3d/implicitron/dataset/blob_loader.py
index 29c41837f..48578927d 100644
--- a/pytorch3d/implicitron/dataset/blob_loader.py
+++ b/pytorch3d/implicitron/dataset/blob_loader.py
@@ -19,7 +19,8 @@
 @dataclass
 class BlobLoader:
     """
-    A loader for correctly (according to setup) loading blobs for FrameData
+    A loader for correctly (according to setup) loading blobs for FrameData.
+    Beware that modification done in place
 
     Args:
         dataset_root: The root folder of the dataset; all the paths in jsons are
@@ -75,7 +76,9 @@ def load(
         # pyre-ignore
         seq_annotation: types.SequenceAnnotation,
     ) -> FrameData:
-        """Main method for loader."""
+        """Main method for loader.
+        FrameData modification done inplace
+        """
         (
             frame_data.fg_probability,
             frame_data.mask_path,

From 2f1704939fb1795e7ad2e0eca1b18fb30d12fba4 Mon Sep 17 00:00:00 2001
From: Ildar Salakhiev <ildar@fb.com>
Date: Wed, 1 Mar 2023 10:19:14 +0000
Subject: [PATCH 10/43] spliited JsonIndexDataset args to 2 gorups:
 Matadata-related  and Blob-loading

---
 .../implicitron/dataset/json_index_dataset.py | 37 ++++++++++---------
 1 file changed, 19 insertions(+), 18 deletions(-)

diff --git a/pytorch3d/implicitron/dataset/json_index_dataset.py b/pytorch3d/implicitron/dataset/json_index_dataset.py
index 0ceb7dec0..671161680 100644
--- a/pytorch3d/implicitron/dataset/json_index_dataset.py
+++ b/pytorch3d/implicitron/dataset/json_index_dataset.py
@@ -62,7 +62,7 @@ class JsonIndexDataset(DatasetBase, ReplaceableBase):
     A dataset with annotations in json files like the Common Objects in 3D
     (CO3D) dataset.
 
-    Args:
+    Metadata-related args::
         frame_annotations_file: A zipped json file containing metadata of the
             frames in the dataset, serialized List[types.FrameAnnotation].
         sequence_annotations_file: A zipped json file containing metadata of the
@@ -80,6 +80,24 @@ class JsonIndexDataset(DatasetBase, ReplaceableBase):
         pick_sequence: A list of sequence names to restrict the dataset to.
         exclude_sequence: A list of the names of the sequences to exclude.
         limit_category_to: Restrict the dataset to the given list of categories.
+        remove_empty_masks: Removes the frames with no active foreground pixels
+            in the segmentation mask after thresholding (see box_crop_mask_thr).
+        n_frames_per_sequence: If > 0, randomly samples #n_frames_per_sequence
+            frames in each sequences uniformly without replacement if it has
+            more frames than that; applied before other frame-level filters.
+        seed: The seed of the random generator sampling #n_frames_per_sequence
+            random frames per sequence.
+        sort_frames: Enable frame annotations sorting to group frames from the
+            same sequences together and order them by timestamps
+        eval_batches: A list of batches that form the evaluation set;
+            list of batch-sized lists of indices corresponding to __getitem__
+            of this class, thus it can be used directly as a batch sampler.
+        eval_batch_index:
+            ( Optional[List[List[Union[Tuple[str, int, str], Tuple[str, int]]]] )
+            A list of batches of frames described as (sequence_name, frame_idx)
+            that can form the evaluation set, `eval_batches` will be set from this.
+
+    Blob-loading parameters:
         dataset_root: The root folder of the dataset; all the paths in jsons are
             specified relative to this root (but not json paths themselves).
         load_images: Enable loading the frame RGB data.
@@ -106,23 +124,6 @@ class JsonIndexDataset(DatasetBase, ReplaceableBase):
             is greater than this threshold, the loader lowers it and repeats.
         box_crop_context: The amount of additional padding added to each
             dimension of the cropping bounding box, relative to box size.
-        remove_empty_masks: Removes the frames with no active foreground pixels
-            in the segmentation mask after thresholding (see box_crop_mask_thr).
-        n_frames_per_sequence: If > 0, randomly samples #n_frames_per_sequence
-            frames in each sequences uniformly without replacement if it has
-            more frames than that; applied before other frame-level filters.
-        seed: The seed of the random generator sampling #n_frames_per_sequence
-            random frames per sequence.
-        sort_frames: Enable frame annotations sorting to group frames from the
-            same sequences together and order them by timestamps
-        eval_batches: A list of batches that form the evaluation set;
-            list of batch-sized lists of indices corresponding to __getitem__
-            of this class, thus it can be used directly as a batch sampler.
-        eval_batch_index:
-            ( Optional[List[List[Union[Tuple[str, int, str], Tuple[str, int]]]] )
-            A list of batches of frames described as (sequence_name, frame_idx)
-            that can form the evaluation set, `eval_batches` will be set from this.
-
     """
 
     frame_annotations_type: ClassVar[

From 527ec098e44c15f1386b607d34c7b9e760528813 Mon Sep 17 00:00:00 2001
From: Ildar Salakhiev <ildar@fb.com>
Date: Wed, 1 Mar 2023 12:08:46 +0000
Subject: [PATCH 11/43] code refactoring to delete chore pyre-ignore

---
 pytorch3d/implicitron/dataset/blob_loader.py  | 19 +++---
 .../implicitron/dataset/json_index_dataset.py | 58 ++++++-------------
 pytorch3d/implicitron/dataset/visualize.py    |  1 -
 3 files changed, 29 insertions(+), 49 deletions(-)

diff --git a/pytorch3d/implicitron/dataset/blob_loader.py b/pytorch3d/implicitron/dataset/blob_loader.py
index 48578927d..fce26b255 100644
--- a/pytorch3d/implicitron/dataset/blob_loader.py
+++ b/pytorch3d/implicitron/dataset/blob_loader.py
@@ -1,3 +1,9 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
 import functools
 import os
 import warnings
@@ -60,8 +66,8 @@ class BlobLoader:
     max_points: int
     mask_images: bool
     mask_depths: bool
-    image_height: int
-    image_width: int
+    image_height: Optional[int]
+    image_width: Optional[int]
     box_crop: bool
     box_crop_mask_thr: float
     box_crop_context: float
@@ -69,11 +75,8 @@ class BlobLoader:
 
     def load(
         self,
-        # pyre-ignore
         frame_data: FrameData,
-        # pyre-ignore
         entry: types.FrameAnnotation,
-        # pyre-ignore
         seq_annotation: types.SequenceAnnotation,
     ) -> FrameData:
         """Main method for loader.
@@ -242,7 +245,7 @@ def _get_pytorch3d_camera(
         entry: types.FrameAnnotation,
         scale: float,
         clamp_bbox_xyxy: Optional[torch.Tensor],
-    ) -> PerspectiveCameras:  # pyre-ignore
+    ) -> PerspectiveCameras:
         entry_viewpoint = entry.viewpoint
         assert entry_viewpoint is not None
         # principal point and focal length
@@ -331,9 +334,9 @@ def _resize_image(
             align_corners=False if mode == "bilinear" else None,
             recompute_scale_factor=True,
         )[0]
-        imre_ = torch.zeros(image.shape[0], self.image_height, self.image_width)
+        imre_ = torch.zeros(image.shape[0], image_height, image_width)
         imre_[:, 0 : imre.shape[1], 0 : imre.shape[2]] = imre
-        mask = torch.zeros(1, self.image_height, self.image_width)
+        mask = torch.zeros(1, image_height, image_width)
         mask[:, 0 : imre.shape[1], 0 : imre.shape[2]] = 1.0
         return imre_, minscale, mask
 
diff --git a/pytorch3d/implicitron/dataset/json_index_dataset.py b/pytorch3d/implicitron/dataset/json_index_dataset.py
index 671161680..cf63b9b43 100644
--- a/pytorch3d/implicitron/dataset/json_index_dataset.py
+++ b/pytorch3d/implicitron/dataset/json_index_dataset.py
@@ -14,6 +14,7 @@
 import random
 import warnings
 from collections import defaultdict
+from dataclasses import field
 from itertools import islice
 from typing import (
     Any,
@@ -30,16 +31,16 @@
 )
 
 import torch
-from tqdm import tqdm
 
 from pytorch3d.implicitron.dataset import types
-from pytorch3d.implicitron.dataset.dataset_base import DatasetBase, FrameData
 from pytorch3d.implicitron.dataset.blob_loader import BlobLoader
+from pytorch3d.implicitron.dataset.dataset_base import DatasetBase, FrameData
 from pytorch3d.implicitron.dataset.utils import is_known_frame_scalar
 
 from pytorch3d.implicitron.tools.config import registry, ReplaceableBase
 from pytorch3d.renderer.camera_utils import join_cameras_as_batch
 from pytorch3d.renderer.cameras import CamerasBase
+from tqdm import tqdm
 
 
 logger = logging.getLogger(__name__)
@@ -160,13 +161,14 @@ class JsonIndexDataset(DatasetBase, ReplaceableBase):
     sort_frames: bool = False
     eval_batches: Any = None
     eval_batch_index: Any = None
-    blob_loader: BlobLoader
-    # frame_annots: List[FrameAnnotsEntry] = field(init=False)
-    # seq_annots: Dict[str, types.SequenceAnnotation] = field(init=False)
+    subset_to_image_path: Any = None
+    # initialised in __post_init__
+    blob_loader: BlobLoader = field(init=False)
+    frame_annots: List[FrameAnnotsEntry] = field(init=False)
+    seq_annots: Dict[str, types.SequenceAnnotation] = field(init=False)
+    _seq_to_idx: Dict[str, List[int]] = field(init=False)
 
     def __post_init__(self) -> None:
-        # pyre-fixme[16]: `JsonIndexDataset` has no attribute `subset_to_image_path`.
-        self.subset_to_image_path = None
         self._load_frames()
         self._load_sequences()
         if self.sort_frames:
@@ -206,7 +208,8 @@ def _extract_and_set_eval_batches(self):
                 self.eval_batch_index
             )
 
-    def join(self, other_datasets: Iterable[DatasetBase]) -> None:
+    # pyre-ignore
+    def join(self, other_datasets: Iterable["JsonIndexDataset"]) -> None:
         """
         Join the dataset with other JsonIndexDataset objects.
 
@@ -216,9 +219,7 @@ def join(self, other_datasets: Iterable[DatasetBase]) -> None:
         """
         if not all(isinstance(d, JsonIndexDataset) for d in other_datasets):
             raise ValueError("This function can only join a list of JsonIndexDataset")
-        # pyre-ignore[16]
         self.frame_annots.extend([fa for d in other_datasets for fa in d.frame_annots])
-        # pyre-ignore[16]
         self.seq_annots.update(
             # https://gist.github.com/treyhunner/f35292e676efa0be1728
             functools.reduce(
@@ -266,7 +267,7 @@ def seq_frame_index_to_dataset_index(
         allow_missing_indices: bool = False,
         remove_missing_indices: bool = False,
         suppress_missing_index_warning: bool = True,
-    ) -> List[List[Union[Optional[int], int]]]:
+    ) -> Union[List[List[Optional[int]]], List[List[int]]]:
         """
         Obtain indices into the dataset object given a list of frame ids.
 
@@ -294,11 +295,9 @@ def seq_frame_index_to_dataset_index(
         """
         _dataset_seq_frame_n_index = {
             seq: {
-                # pyre-ignore[16]
                 self.frame_annots[idx]["frame_annotation"].frame_number: idx
                 for idx in seq_idx
             }
-            # pyre-ignore[16]
             for seq, seq_idx in self._seq_to_idx.items()
         }
 
@@ -321,7 +320,6 @@ def _get_dataset_idx(
                 # Check that the loaded frame path is consistent
                 # with the one stored in self.frame_annots.
                 assert os.path.normpath(
-                    # pyre-ignore[16]
                     self.frame_annots[idx]["frame_annotation"].image.path
                 ) == os.path.normpath(
                     path
@@ -338,9 +336,7 @@ def _get_dataset_idx(
             valid_dataset_idx = [
                 [b for b in batch if b is not None] for batch in dataset_idx
             ]
-            return [  # pyre-ignore[7]
-                batch for batch in valid_dataset_idx if len(batch) > 0
-            ]
+            return [batch for batch in valid_dataset_idx if len(batch) > 0]
 
         return dataset_idx
 
@@ -373,7 +369,7 @@ def subset_from_frame_index(
 
         # Deep copy the whole dataset except frame_annots, which are large so we
         # deep copy only the requested subset of frame_annots.
-        memo = {id(self.frame_annots): None}  # pyre-ignore[16]
+        memo = {id(self.frame_annots): None}
         dataset_new = copy.deepcopy(self, memo)
         dataset_new.frame_annots = copy.deepcopy(
             [self.frame_annots[i] for i in valid_dataset_indices]
@@ -401,11 +397,9 @@ def subset_from_frame_index(
         return dataset_new
 
     def __str__(self) -> str:
-        # pyre-ignore[16]
         return f"JsonIndexDataset #frames={len(self.frame_annots)}"
 
     def __len__(self) -> int:
-        # pyre-ignore[16]
         return len(self.frame_annots)
 
     def _get_frame_type(self, entry: FrameAnnotsEntry) -> Optional[str]:
@@ -417,7 +411,6 @@ def get_all_train_cameras(self) -> CamerasBase:
         """
         logger.info("Loading all train cameras.")
         cameras = []
-        # pyre-ignore[16]
         for frame_idx, frame_annot in enumerate(tqdm(self.frame_annots)):
             frame_type = self._get_frame_type(frame_annot)
             if frame_type is None:
@@ -427,12 +420,10 @@ def get_all_train_cameras(self) -> CamerasBase:
         return join_cameras_as_batch(cameras)
 
     def __getitem__(self, index) -> FrameData:
-        # pyre-ignore[16]
         if index >= len(self.frame_annots):
             raise IndexError(f"index {index} out of range {len(self.frame_annots)}")
 
         entry = self.frame_annots[index]["frame_annotation"]
-        # pyre-ignore[16]
         point_cloud = self.seq_annots[entry.sequence_name].point_cloud
         frame_data = FrameData(
             frame_number=_safe_as_tensor(entry.frame_number, torch.long),
@@ -452,7 +443,9 @@ def __getitem__(self, index) -> FrameData:
 
         # Optional field
         frame_data.frame_type = self._get_frame_type(self.frame_annots[index])
-        return self.blob_loader.load(frame_data, entry, self.seq_annots[entry.sequence_name])
+        return self.blob_loader.load(
+            frame_data, entry, self.seq_annots[entry.sequence_name]
+        )
 
     def _load_frames(self) -> None:
         logger.info(f"Loading Co3D frames from {self.frame_annotations_file}.")
@@ -463,7 +456,6 @@ def _load_frames(self) -> None:
             )
         if not frame_annots_list:
             raise ValueError("Empty dataset!")
-        # pyre-ignore[16]
         self.frame_annots = [
             FrameAnnotsEntry(frame_annotation=a, subset=None) for a in frame_annots_list
         ]
@@ -475,7 +467,6 @@ def _load_sequences(self) -> None:
             seq_annots = types.load_dataclass(zipfile, List[types.SequenceAnnotation])
         if not seq_annots:
             raise ValueError("Empty sequences file!")
-        # pyre-ignore[16]
         self.seq_annots = {entry.sequence_name: entry for entry in seq_annots}
 
     def _load_subset_lists(self) -> None:
@@ -491,7 +482,6 @@ def _load_subset_lists(self) -> None:
             for subset, frames in subset_to_seq_frame.items()
             for _, _, path in frames
         }
-        # pyre-ignore[16]
         for frame in self.frame_annots:
             frame["subset"] = frame_path_to_subset.get(
                 frame["frame_annotation"].image.path, None
@@ -504,7 +494,6 @@ def _load_subset_lists(self) -> None:
 
     def _sort_frames(self) -> None:
         # Sort frames to have them grouped by sequence, ordered by timestamp
-        # pyre-ignore[16]
         self.frame_annots = sorted(
             self.frame_annots,
             key=lambda f: (
@@ -516,7 +505,6 @@ def _sort_frames(self) -> None:
     def _filter_db(self) -> None:
         if self.remove_empty_masks:
             logger.info("Removing images with empty masks.")
-            # pyre-ignore[16]
             old_len = len(self.frame_annots)
 
             msg = "remove_empty_masks needs every MaskAnnotation.mass to be set."
@@ -557,7 +545,6 @@ def positive_mass(frame_annot: types.FrameAnnotation) -> bool:
 
         if len(self.limit_category_to) > 0:
             logger.info(f"Limiting dataset to categories: {self.limit_category_to}")
-            # pyre-ignore[16]
             self.seq_annots = {
                 name: entry
                 for name, entry in self.seq_annots.items()
@@ -595,7 +582,6 @@ def positive_mass(frame_annot: types.FrameAnnotation) -> bool:
         if self.n_frames_per_sequence > 0:
             logger.info(f"Taking max {self.n_frames_per_sequence} per sequence.")
             keep_idx = []
-            # pyre-ignore[16]
             for seq, seq_indices in self._seq_to_idx.items():
                 # infer the seed from the sequence name, this is reproducible
                 # and makes the selection differ for different sequences
@@ -625,20 +611,14 @@ def _invalidate_indexes(self, filter_seq_annots: bool = False) -> None:
         self._invalidate_seq_to_idx()
 
         if filter_seq_annots:
-            # pyre-ignore[16]
             self.seq_annots = {
-                k: v
-                for k, v in self.seq_annots.items()
-                # pyre-ignore[16]
-                if k in self._seq_to_idx
+                k: v for k, v in self.seq_annots.items() if k in self._seq_to_idx
             }
 
     def _invalidate_seq_to_idx(self) -> None:
         seq_to_idx = defaultdict(list)
-        # pyre-ignore[16]
         for idx, entry in enumerate(self.frame_annots):
             seq_to_idx[entry["frame_annotation"].sequence_name].append(idx)
-        # pyre-ignore[16]
         self._seq_to_idx = seq_to_idx
 
     def _local_path(self, path: str) -> str:
@@ -653,7 +633,6 @@ def get_frame_numbers_and_timestamps(
         for idx in idxs:
             if (
                 subset_filter is not None
-                # pyre-fixme[16]: `JsonIndexDataset` has no attribute `frame_annots`.
                 and self.frame_annots[idx]["subset"] not in subset_filter
             ):
                 continue
@@ -666,7 +645,6 @@ def get_frame_numbers_and_timestamps(
 
     def category_to_sequence_names(self) -> Dict[str, List[str]]:
         c2seq = defaultdict(list)
-        # pyre-ignore
         for sequence_name, sa in self.seq_annots.items():
             c2seq[sa.category].append(sequence_name)
         return dict(c2seq)
diff --git a/pytorch3d/implicitron/dataset/visualize.py b/pytorch3d/implicitron/dataset/visualize.py
index 6d0be0362..284e903a0 100644
--- a/pytorch3d/implicitron/dataset/visualize.py
+++ b/pytorch3d/implicitron/dataset/visualize.py
@@ -44,7 +44,6 @@ def get_implicitron_sequence_pointcloud(
         sequence_entries = [
             ei
             for ei in sequence_entries
-            # pyre-ignore[16]
             if dataset.frame_annots[ei]["frame_annotation"].sequence_name
             == sequence_name
         ]

From 24b731b853b54f741a2f9377118e36d14821fa7c Mon Sep 17 00:00:00 2001
From: Ildar Salakhiev <ildar@fb.com>
Date: Mon, 6 Mar 2023 12:47:23 +0000
Subject: [PATCH 12/43] deleted chore function

---
 pytorch3d/implicitron/dataset/blob_loader.py | 28 --------------------
 1 file changed, 28 deletions(-)

diff --git a/pytorch3d/implicitron/dataset/blob_loader.py b/pytorch3d/implicitron/dataset/blob_loader.py
index fce26b255..035e99a83 100644
--- a/pytorch3d/implicitron/dataset/blob_loader.py
+++ b/pytorch3d/implicitron/dataset/blob_loader.py
@@ -434,34 +434,6 @@ def _bbox_xyxy_to_xywh(xyxy: torch.Tensor) -> torch.Tensor:
     return xywh
 
 
-def _resize_image(
-    self, image, mode="bilinear"
-) -> Tuple[torch.Tensor, float, torch.Tensor]:
-    image_height, image_width = self.image_height, self.image_width
-    if image_height is None or image_width is None:
-        # skip the resizing
-        imre_ = torch.from_numpy(image)
-        return imre_, 1.0, torch.ones_like(imre_[:1])
-    # takes numpy array, returns pytorch tensor
-    minscale = min(
-        image_height / image.shape[-2],
-        image_width / image.shape[-1],
-    )
-    imre = torch.nn.functional.interpolate(
-        torch.from_numpy(image)[None],
-        scale_factor=minscale,
-        mode=mode,
-        align_corners=False if mode == "bilinear" else None,
-        recompute_scale_factor=True,
-    )[0]
-    # pyre-fixme[19]: Expected 1 positional argument.
-    imre_ = torch.zeros(image.shape[0], self.image_height, self.image_width)
-    imre_[:, 0 : imre.shape[1], 0 : imre.shape[2]] = imre
-    mask = torch.zeros(1, self.image_height, self.image_width)
-    mask[:, 0 : imre.shape[1], 0 : imre.shape[2]] = 1.0
-    return imre_, minscale, mask
-
-
 def _load_depth(path, scale_adjustment) -> np.ndarray:
     if not path.lower().endswith(".png"):
         raise ValueError('unsupported depth file name "%s"' % path)

From f484a12501b7d13027fe98707c1be8ece3546153 Mon Sep 17 00:00:00 2001
From: Ildar Salakhiev <ildar@fb.com>
Date: Mon, 6 Mar 2023 12:47:51 +0000
Subject: [PATCH 13/43] BloabLoader tests boilerplate

---
 tests/implicitron/test_bbox.py        |  2 +-
 tests/implicitron/test_blob_loader.py | 89 +++++++++++++++++++++++++++
 2 files changed, 90 insertions(+), 1 deletion(-)
 create mode 100644 tests/implicitron/test_blob_loader.py

diff --git a/tests/implicitron/test_bbox.py b/tests/implicitron/test_bbox.py
index 999dfc924..7d214d857 100644
--- a/tests/implicitron/test_bbox.py
+++ b/tests/implicitron/test_bbox.py
@@ -9,7 +9,7 @@
 import numpy as np
 
 import torch
-from pytorch3d.implicitron.dataset.json_index_dataset import (
+from pytorch3d.implicitron.dataset.blob_loader import (
     _bbox_xywh_to_xyxy,
     _bbox_xyxy_to_xywh,
     _get_bbox_from_mask,
diff --git a/tests/implicitron/test_blob_loader.py b/tests/implicitron/test_blob_loader.py
new file mode 100644
index 000000000..0e6bf6936
--- /dev/null
+++ b/tests/implicitron/test_blob_loader.py
@@ -0,0 +1,89 @@
+import contextlib
+import unittest
+
+import numpy as np
+
+import torch
+from pytorch3d.implicitron.dataset.blob_loader import (
+    _bbox_xywh_to_xyxy,
+    _bbox_xyxy_to_xywh,
+    _get_bbox_from_mask,
+)
+from pytorch3d.implicitron.dataset.json_index_dataset import JsonIndexDataset
+from pytorch3d.implicitron.dataset.blob_loader import BlobLoader
+from tests.common_testing import TestCaseMixin
+from pytorch3d.implicitron.tools.config import expand_args_fields
+from pytorch3d.implicitron.tools.config import get_default_args
+
+
+class TestBlobLoader(TestCaseMixin, unittest.TestCase):
+    def setUp(self):
+        torch.manual_seed(42)
+        self.blob_loader = BlobLoader()
+
+        category = "skateboard"
+        stack = contextlib.ExitStack()
+        dataset_root, path_manager = stack.enter_context(get_skateboard_data())
+        self.addCleanup(stack.close)
+        frame_file = os.path.join(dataset_root, category, "frame_annotations.jgz")
+        sequence_file = os.path.join(dataset_root, category, "sequence_annotations.jgz")
+        self.image_size = 256
+
+        expand_args_fields(JsonIndexDataset)
+
+        self.datasets = JsonIndexDataset(
+                frame_annotations_file=frame_file,
+                sequence_annotations_file=sequence_file,
+                dataset_root=dataset_root,
+                image_height=self.image_size,
+                image_width=self.image_size,
+                box_crop=True,
+                load_point_clouds=True,
+                path_manager=path_manager,
+        )
+
+    def test_BlobLoader_args(self):
+        # test that BlobLoader works with get_default_args
+        get_default_args(BlobLoader)
+
+    def test_load_crop_fg_probability(self):
+        pass
+
+    def test_load_crop_images(self):
+        pass
+
+    def test_load_mask_depth(self):
+        pass
+
+    def test_fix_point_cloud_path(self):
+        pass
+
+    def test_resize_image(self):
+        pass
+
+    def test_crop_around_box(self):
+        pass
+
+    def test_clamp_box_to_image_bounds_and_round(self):
+        pass
+
+    def test_get_clamp_bbox(self):
+        pass
+
+    def test_load_depth(self):
+        pass
+
+    def test_load_16big_png_depth(self):
+        pass
+
+    def test_rescale_bbox(self):
+        pass
+
+    def test_load_1bit_png_mask(self):
+        pass
+
+    def test_load_depth_mask(self):
+        pass
+
+    def test_get_1d_bounds(self):
+        pass

From b8674eaa4c6645bcceae089dcc2d12dee730f657 Mon Sep 17 00:00:00 2001
From: Ildar Salakhiev <ildar@fb.com>
Date: Tue, 7 Mar 2023 13:11:45 +0000
Subject: [PATCH 14/43] tests WIP (not tested)

---
 tests/implicitron/test_bbox.py        |  43 +++++++++
 tests/implicitron/test_blob_loader.py | 124 +++++++++++++++++++-------
 2 files changed, 136 insertions(+), 31 deletions(-)

diff --git a/tests/implicitron/test_bbox.py b/tests/implicitron/test_bbox.py
index 7d214d857..ddbcd6bd1 100644
--- a/tests/implicitron/test_bbox.py
+++ b/tests/implicitron/test_bbox.py
@@ -13,6 +13,11 @@
     _bbox_xywh_to_xyxy,
     _bbox_xyxy_to_xywh,
     _get_bbox_from_mask,
+    _crop_around_box,
+    _clamp_box_to_image_bounds_and_round,
+    _get_clamp_bbox,
+    _rescale_bbox,
+    _get_1d_bounds,
 )
 from tests.common_testing import TestCaseMixin
 
@@ -76,3 +81,41 @@ def test_mask_to_bbox(self):
         expected_bbox_xywh = [2, 1, 2, 1]
         bbox_xywh = _get_bbox_from_mask(mask, 0.5)
         self.assertClose(bbox_xywh, expected_bbox_xywh)
+
+    def test_crop_around_box(self):
+        bbox = (0, 1, 2, 2) # (x_min, y_min, x_max, y_max)
+        image = torch.LongTensor(
+            [
+                [0, 0, 10, 20],
+                [10, 20, 5, 1],
+                [10, 20, 1, 1],
+                [5, 4, 0, 1],
+            ]
+        )
+        cropped = _crop_around_box(image, bbox)
+        self.assertClose(cropped, image[0:2, 1:2])
+
+    def test_clamp_box_to_image_bounds_and_round(self):
+        bbox = torch.LongTensor([0, 1, 10, 12])
+        image_size = (5, 6)
+        clamped_bbox = _clamp_box_to_image_bounds_and_round(bbox)
+        self.assertClose(clamped_bbox == [0, 1, 5, 6])
+
+    def test_get_clamp_bbox(self):
+        bbox_xywh = torch.LongTensor([1, 1, 4, 5])
+        clamped_bbox_xyxy = _get_clamp_bbox(bbox, box_crop_context=2)
+        # size multiplied by 2 and added coordinates
+        self.assertClose(clamped_bbox_xyxy == torch.LongTensor([0, 1, 9, 11]))
+
+    def test_rescale_bbox(self):
+        bbox = torch.LongTensor([0, 1, 3, 4])
+        original_resolution = (4, 4) #
+        new_resolution = (8, 8)
+        rescaled_bbox = _rescale_bbox(bbox, original_resolution, new_resolution)
+        self.assertClose(bbox * 2 == rescaled_bbox)
+
+    def test_get_1d_bounds(self):
+        array = [0, 1, 2]
+        bounds = _get_1d_bounds(array)
+        # make nonzero 1d bounds of image
+        assert bounds == [1, 2]
diff --git a/tests/implicitron/test_blob_loader.py b/tests/implicitron/test_blob_loader.py
index 0e6bf6936..da3326421 100644
--- a/tests/implicitron/test_blob_loader.py
+++ b/tests/implicitron/test_blob_loader.py
@@ -5,9 +5,12 @@
 
 import torch
 from pytorch3d.implicitron.dataset.blob_loader import (
-    _bbox_xywh_to_xyxy,
-    _bbox_xyxy_to_xywh,
-    _get_bbox_from_mask,
+    _load_image,
+    _load_mask,
+    _load_depth,
+    _load_16big_png_depth,
+    _load_1bit_png_mask,
+    _load_depth_mask,
 )
 from pytorch3d.implicitron.dataset.json_index_dataset import JsonIndexDataset
 from pytorch3d.implicitron.dataset.blob_loader import BlobLoader
@@ -41,49 +44,108 @@ def setUp(self):
                 load_point_clouds=True,
                 path_manager=path_manager,
         )
+        self.entry = self.datasets.frame_annots[index]["frame_annotation"]
 
     def test_BlobLoader_args(self):
         # test that BlobLoader works with get_default_args
         get_default_args(BlobLoader)
 
-    def test_load_crop_fg_probability(self):
-        pass
-
-    def test_load_crop_images(self):
-        pass
+    def test_load_pipeline(self):
+        (
+            fg_probability,
+            mask_path,
+            bbox_xywh,
+            clamp_bbox_xyxy,
+            crop_bbox_xywh,
+        ) = self.datasets.loader._load_crop_fg_probability(entry)
+
+        assert fg_probability
+        assert mask_path
+        assert bbox_xywh
+        assert clamp_bbox_xyxy
+        assert crop_bbox_xywh
+        (
+            image_rgb,
+            image_path,
+            mask_crop,
+            scale,
+        ) = self.dataset.loader._load_crop_images(
+            self.entry, fg_probability, clamp_bbox_xyxy,
+        )
+        assert image_rgb
+        assert image_path
+        assert mask_crop,
+        assert scale,
+        (
+            depth_map,
+            depth_path,
+            depth_mask,
+        ) = self.dataset.loader._load_mask_depth(
+            self.entry, clamp_bbox_xyxy, fg_probability,
+        )
+        assert depth_map
+        assert depth_path
+        assert depth_mask
 
-    def test_load_mask_depth(self):
-        pass
+        camera = self.dataset.loader._get_pytorch3d_camera(
+                self.entry, scale, clamp_bbox_xyxy,
+            )
+        assert camera
 
     def test_fix_point_cloud_path(self):
-        pass
+        """Some files in Co3Dv2 have an accidental absolute path stored."""
+        original_path = 'some_file_path'
+        modified_path = self.dataset.loader._fix_point_cloud_path(original_path)
+        assert original_path in modified_path
+        assert self.dataset.loader.dataset_root in modified_path
 
     def test_resize_image(self):
-        pass
-
-    def test_crop_around_box(self):
-        pass
-
-    def test_clamp_box_to_image_bounds_and_round(self):
-        pass
-
-    def test_get_clamp_bbox(self):
-        pass
+        image = None
+        image_rgb, scale, mask_crop = self.dataset.loader._resize_image(image)
+        assert image_rgb.shape == (self.dataset.loader.width, self.dataset.loader.height)
+        assert scale == 1
+        assert masc_crop.shape == (self.dataset.loader.width, self.dataset.loader.height)
+
+    def test_load_image(self):
+        image = _load_image(self.entry.image.path)
+        assert image.dtype == np.float32
+        assert torch.max(image) <= 1.0
+        assert torch.min(image) >= 0.0
+
+    def test_load_mask(self):
+        mask = _load_mask(self.entry.mask.path)
+        assert mask.dtype == np.float32
+        assert torch.max(mask) <= 1.0
+        assert torch.min(mask) >= 0.0
 
     def test_load_depth(self):
-        pass
+        entry_depth = self.entry.depth
+        # path = os.path.join(self.dataset_root, entry_depth.path)
+        path = entry_depth.path
+        depth_map = _load_depth(path, entry_depth.scale_adjustment)
+        assert depth_map.dtype == np.float32
+        assert depth_map.shape
 
     def test_load_16big_png_depth(self):
-        pass
-
-    def test_rescale_bbox(self):
-        pass
+        entry_depth = self.entry.depth
+        # path = os.path.join(self.dataset_root, entry_depth.path)
+        path = entry_depth.path
+        depth_map = _load_16big_png_depth(path)
+        assert depth_map.dtype == np.float32
+        assert depth_map.shape
 
     def test_load_1bit_png_mask(self):
-        pass
+        entry_depth = self.entry.depth
+        # mask_path = os.path.join(self.dataset_root, entry_depth.mask_path)
+        mask_path = entry_depth.mask_path
+        mask = _load_16big_png_depth(mask_path)
+        assert mask.dtype == np.float32
+        assert mask.shape
 
     def test_load_depth_mask(self):
-        pass
-
-    def test_get_1d_bounds(self):
-        pass
+        entry_depth = self.entry.depth
+        # mask_path = os.path.join(self.dataset_root, entry_depth.mask_path)
+        mask_path = entry_depth.mask_path
+        mask = _load_depth_mask(mask_path)
+        assert mask.dtype == np.float32
+        assert mask.shape

From faeffcf3aa61716640fca15fe25e260fd524e953 Mon Sep 17 00:00:00 2001
From: Ildar Salakhiev <ildar@fb.com>
Date: Thu, 9 Mar 2023 09:58:42 +0000
Subject: [PATCH 15/43] tests typos and errors WIP

---
 tests/implicitron/test_bbox.py        | 12 ++++++------
 tests/implicitron/test_blob_loader.py |  7 ++++---
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/tests/implicitron/test_bbox.py b/tests/implicitron/test_bbox.py
index ddbcd6bd1..1e351d049 100644
--- a/tests/implicitron/test_bbox.py
+++ b/tests/implicitron/test_bbox.py
@@ -83,7 +83,7 @@ def test_mask_to_bbox(self):
         self.assertClose(bbox_xywh, expected_bbox_xywh)
 
     def test_crop_around_box(self):
-        bbox = (0, 1, 2, 2) # (x_min, y_min, x_max, y_max)
+        bbox = torxh.LongTensor([0, 1, 2, 2]) # (x_min, y_min, x_max, y_max)
         image = torch.LongTensor(
             [
                 [0, 0, 10, 20],
@@ -98,24 +98,24 @@ def test_crop_around_box(self):
     def test_clamp_box_to_image_bounds_and_round(self):
         bbox = torch.LongTensor([0, 1, 10, 12])
         image_size = (5, 6)
-        clamped_bbox = _clamp_box_to_image_bounds_and_round(bbox)
+        clamped_bbox = _clamp_box_to_image_bounds_and_round(bbox, image_size)
         self.assertClose(clamped_bbox == [0, 1, 5, 6])
 
     def test_get_clamp_bbox(self):
         bbox_xywh = torch.LongTensor([1, 1, 4, 5])
-        clamped_bbox_xyxy = _get_clamp_bbox(bbox, box_crop_context=2)
+        clamped_bbox_xyxy = _get_clamp_bbox(bbox_xywh, box_crop_context=2)
         # size multiplied by 2 and added coordinates
         self.assertClose(clamped_bbox_xyxy == torch.LongTensor([0, 1, 9, 11]))
 
     def test_rescale_bbox(self):
         bbox = torch.LongTensor([0, 1, 3, 4])
-        original_resolution = (4, 4) #
+        original_resolution = (4, 4)
         new_resolution = (8, 8)
         rescaled_bbox = _rescale_bbox(bbox, original_resolution, new_resolution)
-        self.assertClose(bbox * 2 == rescaled_bbox)
+        self.assertClose(bbox * 2, rescaled_bbox)
 
     def test_get_1d_bounds(self):
         array = [0, 1, 2]
         bounds = _get_1d_bounds(array)
         # make nonzero 1d bounds of image
-        assert bounds == [1, 2]
+        assert bounds == [1, 3]
diff --git a/tests/implicitron/test_blob_loader.py b/tests/implicitron/test_blob_loader.py
index da3326421..692ecbd62 100644
--- a/tests/implicitron/test_blob_loader.py
+++ b/tests/implicitron/test_blob_loader.py
@@ -18,11 +18,12 @@
 from pytorch3d.implicitron.tools.config import expand_args_fields
 from pytorch3d.implicitron.tools.config import get_default_args
 
+from tests.implicitron.common_resources import get_skateboard_data
+
 
 class TestBlobLoader(TestCaseMixin, unittest.TestCase):
     def setUp(self):
         torch.manual_seed(42)
-        self.blob_loader = BlobLoader()
 
         category = "skateboard"
         stack = contextlib.ExitStack()
@@ -74,8 +75,8 @@ def test_load_pipeline(self):
         )
         assert image_rgb
         assert image_path
-        assert mask_crop,
-        assert scale,
+        assert mask_crop
+        assert scale
         (
             depth_map,
             depth_path,

From bc24e29d7640773e0892288b919b3e1f851ec37d Mon Sep 17 00:00:00 2001
From: Ildar Salakhiev <ildar@fb.com>
Date: Thu, 9 Mar 2023 10:23:19 +0000
Subject: [PATCH 16/43] tests typos and errors WIP

---
 tests/implicitron/test_bbox.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/tests/implicitron/test_bbox.py b/tests/implicitron/test_bbox.py
index 1e351d049..5381e709e 100644
--- a/tests/implicitron/test_bbox.py
+++ b/tests/implicitron/test_bbox.py
@@ -83,7 +83,7 @@ def test_mask_to_bbox(self):
         self.assertClose(bbox_xywh, expected_bbox_xywh)
 
     def test_crop_around_box(self):
-        bbox = torxh.LongTensor([0, 1, 2, 2]) # (x_min, y_min, x_max, y_max)
+        bbox = torch.LongTensor([0, 1, 2, 2]) # (x_min, y_min, x_max, y_max)
         image = torch.LongTensor(
             [
                 [0, 0, 10, 20],
@@ -95,27 +95,31 @@ def test_crop_around_box(self):
         cropped = _crop_around_box(image, bbox)
         self.assertClose(cropped, image[0:2, 1:2])
 
+
+
     def test_clamp_box_to_image_bounds_and_round(self):
         bbox = torch.LongTensor([0, 1, 10, 12])
         image_size = (5, 6)
         clamped_bbox = _clamp_box_to_image_bounds_and_round(bbox, image_size)
-        self.assertClose(clamped_bbox == [0, 1, 5, 6])
+        self.assertClose(clamped_bbox, [0, 1, 5, 6])
 
     def test_get_clamp_bbox(self):
         bbox_xywh = torch.LongTensor([1, 1, 4, 5])
         clamped_bbox_xyxy = _get_clamp_bbox(bbox_xywh, box_crop_context=2)
         # size multiplied by 2 and added coordinates
-        self.assertClose(clamped_bbox_xyxy == torch.LongTensor([0, 1, 9, 11]))
+        self.assertClose(clamped_bbox_xyxy, torch.LongTensor([0, 1, 9, 11]))
 
     def test_rescale_bbox(self):
         bbox = torch.LongTensor([0, 1, 3, 4])
         original_resolution = (4, 4)
         new_resolution = (8, 8)
         rescaled_bbox = _rescale_bbox(bbox, original_resolution, new_resolution)
+        print(rescaled_bbox)
         self.assertClose(bbox * 2, rescaled_bbox)
 
     def test_get_1d_bounds(self):
         array = [0, 1, 2]
         bounds = _get_1d_bounds(array)
         # make nonzero 1d bounds of image
+        print(bounds)
         assert bounds == [1, 3]

From e9c59693ed78dfb036db3e056724b88252f6fbe7 Mon Sep 17 00:00:00 2001
From: Ildar Salakhiev <ildar@fb.com>
Date: Thu, 9 Mar 2023 11:03:16 +0000
Subject: [PATCH 17/43] solved error and typos for test_bbox

---
 tests/implicitron/test_bbox.py | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/tests/implicitron/test_bbox.py b/tests/implicitron/test_bbox.py
index 5381e709e..89b624199 100644
--- a/tests/implicitron/test_bbox.py
+++ b/tests/implicitron/test_bbox.py
@@ -83,7 +83,7 @@ def test_mask_to_bbox(self):
         self.assertClose(bbox_xywh, expected_bbox_xywh)
 
     def test_crop_around_box(self):
-        bbox = torch.LongTensor([0, 1, 2, 2]) # (x_min, y_min, x_max, y_max)
+        bbox = torch.LongTensor([0, 1, 2, 3]) # (x_min, y_min, x_max, y_max)
         image = torch.LongTensor(
             [
                 [0, 0, 10, 20],
@@ -93,33 +93,30 @@ def test_crop_around_box(self):
             ]
         )
         cropped = _crop_around_box(image, bbox)
-        self.assertClose(cropped, image[0:2, 1:2])
-
-
+        self.assertClose(cropped, image[1:3, 0:2])
 
     def test_clamp_box_to_image_bounds_and_round(self):
         bbox = torch.LongTensor([0, 1, 10, 12])
         image_size = (5, 6)
+        expected_clamped_bbox = torch.LongTensor([0, 1, image_size[1], image_size[0]])
         clamped_bbox = _clamp_box_to_image_bounds_and_round(bbox, image_size)
-        self.assertClose(clamped_bbox, [0, 1, 5, 6])
+        self.assertClose(clamped_bbox, expected_clamped_bbox)
 
     def test_get_clamp_bbox(self):
         bbox_xywh = torch.LongTensor([1, 1, 4, 5])
         clamped_bbox_xyxy = _get_clamp_bbox(bbox_xywh, box_crop_context=2)
         # size multiplied by 2 and added coordinates
-        self.assertClose(clamped_bbox_xyxy, torch.LongTensor([0, 1, 9, 11]))
+        self.assertClose(clamped_bbox_xyxy, torch.Tensor([-3, -4, 9, 11]))
 
     def test_rescale_bbox(self):
-        bbox = torch.LongTensor([0, 1, 3, 4])
+        bbox = torch.Tensor([0.0, 1.0, 3.0, 4.0])
         original_resolution = (4, 4)
-        new_resolution = (8, 8)
+        new_resolution = (8, 8)  # twice bigger
         rescaled_bbox = _rescale_bbox(bbox, original_resolution, new_resolution)
-        print(rescaled_bbox)
         self.assertClose(bbox * 2, rescaled_bbox)
 
     def test_get_1d_bounds(self):
         array = [0, 1, 2]
         bounds = _get_1d_bounds(array)
         # make nonzero 1d bounds of image
-        print(bounds)
-        assert bounds == [1, 3]
+        self.assertClose(bounds, [1, 3])

From 44cfcfb9f243c16f6153617166eb28461705f1cc Mon Sep 17 00:00:00 2001
From: Ildar Salakhiev <ildar@fb.com>
Date: Thu, 9 Mar 2023 13:28:54 +0000
Subject: [PATCH 18/43] updating test_blob_loader WIP

---
 tests/implicitron/test_blob_loader.py | 78 +++++++++++++++------------
 1 file changed, 43 insertions(+), 35 deletions(-)

diff --git a/tests/implicitron/test_blob_loader.py b/tests/implicitron/test_blob_loader.py
index 692ecbd62..d54754d88 100644
--- a/tests/implicitron/test_blob_loader.py
+++ b/tests/implicitron/test_blob_loader.py
@@ -1,3 +1,5 @@
+import os
+import math
 import contextlib
 import unittest
 
@@ -14,6 +16,7 @@
 )
 from pytorch3d.implicitron.dataset.json_index_dataset import JsonIndexDataset
 from pytorch3d.implicitron.dataset.blob_loader import BlobLoader
+from pytorch3d.renderer.cameras import PerspectiveCameras
 from tests.common_testing import TestCaseMixin
 from pytorch3d.implicitron.tools.config import expand_args_fields
 from pytorch3d.implicitron.tools.config import get_default_args
@@ -27,7 +30,7 @@ def setUp(self):
 
         category = "skateboard"
         stack = contextlib.ExitStack()
-        dataset_root, path_manager = stack.enter_context(get_skateboard_data())
+        self.dataset_root, self.path_manager = stack.enter_context(get_skateboard_data())
         self.addCleanup(stack.close)
         frame_file = os.path.join(dataset_root, category, "frame_annotations.jgz")
         sequence_file = os.path.join(dataset_root, category, "sequence_annotations.jgz")
@@ -35,17 +38,18 @@ def setUp(self):
 
         expand_args_fields(JsonIndexDataset)
 
-        self.datasets = JsonIndexDataset(
+        self.dataset = JsonIndexDataset(
                 frame_annotations_file=frame_file,
                 sequence_annotations_file=sequence_file,
-                dataset_root=dataset_root,
+                dataset_root=self.dataset_root,
                 image_height=self.image_size,
                 image_width=self.image_size,
                 box_crop=True,
                 load_point_clouds=True,
-                path_manager=path_manager,
+                path_manager=self.path_manager,
         )
-        self.entry = self.datasets.frame_annots[index]["frame_annotation"]
+        index = 7000
+        self.entry = self.dataset.frame_annots[index]["frame_annotation"]
 
     def test_BlobLoader_args(self):
         # test that BlobLoader works with get_default_args
@@ -58,57 +62,66 @@ def test_load_pipeline(self):
             bbox_xywh,
             clamp_bbox_xyxy,
             crop_bbox_xywh,
-        ) = self.datasets.loader._load_crop_fg_probability(entry)
+        ) = self.dataset.blob_loader._load_crop_fg_probability(self.entry)
 
-        assert fg_probability
+        assert torch.is_tensor(fg_probability)
         assert mask_path
         assert bbox_xywh
-        assert clamp_bbox_xyxy
-        assert crop_bbox_xywh
+        assert torch.is_tensor(clamp_bbox_xyxy)
+        assert torch.is_tensor(crop_bbox_xywh)
         (
             image_rgb,
             image_path,
             mask_crop,
             scale,
-        ) = self.dataset.loader._load_crop_images(
+        ) = self.dataset.blob_loader._load_crop_images(
             self.entry, fg_probability, clamp_bbox_xyxy,
         )
-        assert image_rgb
+        assert torch.is_tensor(image_rgb)
         assert image_path
-        assert mask_crop
+        assert torch.is_tensor(mask_crop)
         assert scale
         (
             depth_map,
             depth_path,
             depth_mask,
-        ) = self.dataset.loader._load_mask_depth(
+        ) = self.dataset.blob_loader._load_mask_depth(
             self.entry, clamp_bbox_xyxy, fg_probability,
         )
-        assert depth_map
-        assert depth_path
-        assert depth_mask
+        assert torch.is_tensor(depth_map)
+        assert torch.is_tensor(depth_path)
+        assert torch.is_tensor(depth_mask)
 
-        camera = self.dataset.loader._get_pytorch3d_camera(
+        camera = self.dataset.blob_loader._get_pytorch3d_camera(
                 self.entry, scale, clamp_bbox_xyxy,
             )
-        assert camera
+        assert type(camera) == PerspectiveCameras
 
     def test_fix_point_cloud_path(self):
         """Some files in Co3Dv2 have an accidental absolute path stored."""
         original_path = 'some_file_path'
-        modified_path = self.dataset.loader._fix_point_cloud_path(original_path)
+        modified_path = self.dataset.blob_loader._fix_point_cloud_path(original_path)
         assert original_path in modified_path
-        assert self.dataset.loader.dataset_root in modified_path
+        assert self.dataset.blob_loader.dataset_root in modified_path
 
     def test_resize_image(self):
-        image = None
-        image_rgb, scale, mask_crop = self.dataset.loader._resize_image(image)
-        assert image_rgb.shape == (self.dataset.loader.width, self.dataset.loader.height)
-        assert scale == 1
-        assert masc_crop.shape == (self.dataset.loader.width, self.dataset.loader.height)
+        path = os.path.join(self.dataset_root, self.entry.image.path)
+        local_path = self.path_manager.get_local_path(path)
+        image = _load_image(local_path)
+        image_rgb, scale, mask_crop = self.dataset.blob_loader._resize_image(image)
+
+        original_shape = image.shape[-2:]
+        expected_shape = (self.dataset.blob_loader.image_width, self.dataset.blob_loader.image_height)
+        expected_scale = expected_shape[0] / original_shape[0]
+
+        assert scale == expected_scale
+        assert image_rgb.shape[-2:] == expected_shape
+        assert mask_crop.shape[-2:] == expected_shape
 
     def test_load_image(self):
-        image = _load_image(self.entry.image.path)
+        path = os.path.join(self.dataset_root, self.entry.image.path)
+        local_path = self.path_manager.get_local_path(path)
+        image = _load_image(local_path)
         assert image.dtype == np.float32
         assert torch.max(image) <= 1.0
         assert torch.min(image) >= 0.0
@@ -120,32 +133,27 @@ def test_load_mask(self):
         assert torch.min(mask) >= 0.0
 
     def test_load_depth(self):
-        entry_depth = self.entry.depth
-        # path = os.path.join(self.dataset_root, entry_depth.path)
+        path = os.path.join(self.dataset_root, entry_depth.path)
         path = entry_depth.path
         depth_map = _load_depth(path, entry_depth.scale_adjustment)
         assert depth_map.dtype == np.float32
         assert depth_map.shape
 
     def test_load_16big_png_depth(self):
-        entry_depth = self.entry.depth
-        # path = os.path.join(self.dataset_root, entry_depth.path)
-        path = entry_depth.path
+        path = os.path.join(self.dataset_root, self.entry.depth.path)
         depth_map = _load_16big_png_depth(path)
         assert depth_map.dtype == np.float32
         assert depth_map.shape
 
     def test_load_1bit_png_mask(self):
-        entry_depth = self.entry.depth
-        # mask_path = os.path.join(self.dataset_root, entry_depth.mask_path)
+        mask_path = os.path.join(self.dataset_root, self.entry.depth.mask_path)
         mask_path = entry_depth.mask_path
         mask = _load_16big_png_depth(mask_path)
         assert mask.dtype == np.float32
         assert mask.shape
 
     def test_load_depth_mask(self):
-        entry_depth = self.entry.depth
-        # mask_path = os.path.join(self.dataset_root, entry_depth.mask_path)
+        mask_path = os.path.join(self.dataset_root, self.entry.depth.mask_path)
         mask_path = entry_depth.mask_path
         mask = _load_depth_mask(mask_path)
         assert mask.dtype == np.float32

From 11def0a8b452a1479d63fe9ba665f2adc6687553 Mon Sep 17 00:00:00 2001
From: Ildar Salakhiev <ildar@fb.com>
Date: Thu, 9 Mar 2023 14:50:11 +0000
Subject: [PATCH 19/43] blob loader tests ready for review

---
 tests/implicitron/test_bbox.py        |   9 +-
 tests/implicitron/test_blob_loader.py | 119 ++++++++++++++++----------
 2 files changed, 81 insertions(+), 47 deletions(-)

diff --git a/tests/implicitron/test_bbox.py b/tests/implicitron/test_bbox.py
index 89b624199..8dffd751d 100644
--- a/tests/implicitron/test_bbox.py
+++ b/tests/implicitron/test_bbox.py
@@ -12,13 +12,14 @@
 from pytorch3d.implicitron.dataset.blob_loader import (
     _bbox_xywh_to_xyxy,
     _bbox_xyxy_to_xywh,
-    _get_bbox_from_mask,
-    _crop_around_box,
     _clamp_box_to_image_bounds_and_round,
+    _crop_around_box,
+    _get_1d_bounds,
+    _get_bbox_from_mask,
     _get_clamp_bbox,
     _rescale_bbox,
-    _get_1d_bounds,
 )
+
 from tests.common_testing import TestCaseMixin
 
 
@@ -83,7 +84,7 @@ def test_mask_to_bbox(self):
         self.assertClose(bbox_xywh, expected_bbox_xywh)
 
     def test_crop_around_box(self):
-        bbox = torch.LongTensor([0, 1, 2, 3]) # (x_min, y_min, x_max, y_max)
+        bbox = torch.LongTensor([0, 1, 2, 3])  # (x_min, y_min, x_max, y_max)
         image = torch.LongTensor(
             [
                 [0, 0, 10, 20],
diff --git a/tests/implicitron/test_blob_loader.py b/tests/implicitron/test_blob_loader.py
index d54754d88..461b2109c 100644
--- a/tests/implicitron/test_blob_loader.py
+++ b/tests/implicitron/test_blob_loader.py
@@ -1,25 +1,24 @@
-import os
-import math
 import contextlib
+import os
 import unittest
 
 import numpy as np
 
 import torch
 from pytorch3d.implicitron.dataset.blob_loader import (
-    _load_image,
-    _load_mask,
-    _load_depth,
     _load_16big_png_depth,
     _load_1bit_png_mask,
+    _load_depth,
     _load_depth_mask,
+    _load_image,
+    _load_mask,
+    BlobLoader,
 )
 from pytorch3d.implicitron.dataset.json_index_dataset import JsonIndexDataset
-from pytorch3d.implicitron.dataset.blob_loader import BlobLoader
+from pytorch3d.implicitron.tools.config import expand_args_fields, get_default_args
 from pytorch3d.renderer.cameras import PerspectiveCameras
+
 from tests.common_testing import TestCaseMixin
-from pytorch3d.implicitron.tools.config import expand_args_fields
-from pytorch3d.implicitron.tools.config import get_default_args
 
 from tests.implicitron.common_resources import get_skateboard_data
 
@@ -30,23 +29,28 @@ def setUp(self):
 
         category = "skateboard"
         stack = contextlib.ExitStack()
-        self.dataset_root, self.path_manager = stack.enter_context(get_skateboard_data())
+        self.dataset_root, self.path_manager = stack.enter_context(
+            get_skateboard_data()
+        )
         self.addCleanup(stack.close)
-        frame_file = os.path.join(dataset_root, category, "frame_annotations.jgz")
-        sequence_file = os.path.join(dataset_root, category, "sequence_annotations.jgz")
-        self.image_size = 256
+        frame_file = os.path.join(self.dataset_root, category, "frame_annotations.jgz")
+        sequence_file = os.path.join(
+            self.dataset_root, category, "sequence_annotations.jgz"
+        )
+        self.image_height = 768
+        self.image_width = 512
 
         expand_args_fields(JsonIndexDataset)
 
         self.dataset = JsonIndexDataset(
-                frame_annotations_file=frame_file,
-                sequence_annotations_file=sequence_file,
-                dataset_root=self.dataset_root,
-                image_height=self.image_size,
-                image_width=self.image_size,
-                box_crop=True,
-                load_point_clouds=True,
-                path_manager=self.path_manager,
+            frame_annotations_file=frame_file,
+            sequence_annotations_file=sequence_file,
+            dataset_root=self.dataset_root,
+            image_height=self.image_height,
+            image_width=self.image_width,
+            box_crop=True,
+            load_point_clouds=True,
+            path_manager=self.path_manager,
         )
         index = 7000
         self.entry = self.dataset.frame_annots[index]["frame_annotation"]
@@ -64,42 +68,68 @@ def test_load_pipeline(self):
             crop_bbox_xywh,
         ) = self.dataset.blob_loader._load_crop_fg_probability(self.entry)
 
-        assert torch.is_tensor(fg_probability)
         assert mask_path
-        assert bbox_xywh
+        assert torch.is_tensor(fg_probability)
+        assert torch.is_tensor(bbox_xywh)
         assert torch.is_tensor(clamp_bbox_xyxy)
         assert torch.is_tensor(crop_bbox_xywh)
+        # assert bboxes shape
+        assert fg_probability.shape == torch.Shape(
+            [1, self.image_height, self.image_width]
+        )
+        assert bbox_xywh.shape == torch.Shape([4])
+        assert clamp_bbox_xyxy == torch.Shape([4])
+        assert crop_bbox_xywh.shape == torch.Shape([4])
         (
             image_rgb,
             image_path,
             mask_crop,
             scale,
         ) = self.dataset.blob_loader._load_crop_images(
-            self.entry, fg_probability, clamp_bbox_xyxy,
+            self.entry,
+            fg_probability,
+            clamp_bbox_xyxy,
         )
         assert torch.is_tensor(image_rgb)
         assert image_path
         assert torch.is_tensor(mask_crop)
         assert scale
+        # assert image and mask shapes
+        assert image_rgb.shape == torch.Shape([3, self.image_height, self.image_width])
+        assert mask_crop.shape == torch.Shape(
+            [1, self.image_height, self.image_width],
+        )
+
         (
             depth_map,
             depth_path,
             depth_mask,
         ) = self.dataset.blob_loader._load_mask_depth(
-            self.entry, clamp_bbox_xyxy, fg_probability,
+            self.entry,
+            clamp_bbox_xyxy,
+            fg_probability,
         )
         assert torch.is_tensor(depth_map)
-        assert torch.is_tensor(depth_path)
+        assert depth_path
         assert torch.is_tensor(depth_mask)
+        # assert image and mask shapes
+        assert depth_map.shape == torch.Shape(
+            [1, self.image_height, self.image_width],
+        )
+        assert depth_mask.shape == torch.Shape(
+            [1, self.image_height, self.image_width],
+        )
 
         camera = self.dataset.blob_loader._get_pytorch3d_camera(
-                self.entry, scale, clamp_bbox_xyxy,
-            )
+            self.entry,
+            scale,
+            clamp_bbox_xyxy,
+        )
         assert type(camera) == PerspectiveCameras
 
     def test_fix_point_cloud_path(self):
         """Some files in Co3Dv2 have an accidental absolute path stored."""
-        original_path = 'some_file_path'
+        original_path = "some_file_path"
         modified_path = self.dataset.blob_loader._fix_point_cloud_path(original_path)
         assert original_path in modified_path
         assert self.dataset.blob_loader.dataset_root in modified_path
@@ -111,8 +141,13 @@ def test_resize_image(self):
         image_rgb, scale, mask_crop = self.dataset.blob_loader._resize_image(image)
 
         original_shape = image.shape[-2:]
-        expected_shape = (self.dataset.blob_loader.image_width, self.dataset.blob_loader.image_height)
-        expected_scale = expected_shape[0] / original_shape[0]
+        expected_shape = (
+            self.image_height,
+            self.image_width,
+        )
+        expected_scale = min(
+            expected_shape[0] / original_shape[0], expected_shape[1] / original_shape[1]
+        )
 
         assert scale == expected_scale
         assert image_rgb.shape[-2:] == expected_shape
@@ -123,19 +158,19 @@ def test_load_image(self):
         local_path = self.path_manager.get_local_path(path)
         image = _load_image(local_path)
         assert image.dtype == np.float32
-        assert torch.max(image) <= 1.0
-        assert torch.min(image) >= 0.0
+        assert np.max(image) <= 1.0
+        assert np.min(image) >= 0.0
 
     def test_load_mask(self):
-        mask = _load_mask(self.entry.mask.path)
+        path = os.path.join(self.dataset_root, self.entry.mask.path)
+        mask = _load_mask(path)
         assert mask.dtype == np.float32
-        assert torch.max(mask) <= 1.0
-        assert torch.min(mask) >= 0.0
+        assert np.max(mask) <= 1.0
+        assert np.min(mask) >= 0.0
 
     def test_load_depth(self):
-        path = os.path.join(self.dataset_root, entry_depth.path)
-        path = entry_depth.path
-        depth_map = _load_depth(path, entry_depth.scale_adjustment)
+        path = os.path.join(self.dataset_root, self.entry.depth.path)
+        depth_map = _load_depth(path, self.entry.depth.scale_adjustment)
         assert depth_map.dtype == np.float32
         assert depth_map.shape
 
@@ -147,14 +182,12 @@ def test_load_16big_png_depth(self):
 
     def test_load_1bit_png_mask(self):
         mask_path = os.path.join(self.dataset_root, self.entry.depth.mask_path)
-        mask_path = entry_depth.mask_path
-        mask = _load_16big_png_depth(mask_path)
+        mask = _load_1bit_png_mask(mask_path)
         assert mask.dtype == np.float32
-        assert mask.shape
+        assert len(mask.shape) == 3
 
     def test_load_depth_mask(self):
         mask_path = os.path.join(self.dataset_root, self.entry.depth.mask_path)
-        mask_path = entry_depth.mask_path
         mask = _load_depth_mask(mask_path)
         assert mask.dtype == np.float32
-        assert mask.shape
+        assert len(mask.shape) == 3

From bc52382a7991c69107645c0a91e5ea6dd7511f25 Mon Sep 17 00:00:00 2001
From: Ildar Salakhiev <ildar@fb.com>
Date: Thu, 9 Mar 2023 14:54:25 +0000
Subject: [PATCH 20/43] typo

---
 tests/implicitron/test_blob_loader.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/tests/implicitron/test_blob_loader.py b/tests/implicitron/test_blob_loader.py
index 461b2109c..cb2976011 100644
--- a/tests/implicitron/test_blob_loader.py
+++ b/tests/implicitron/test_blob_loader.py
@@ -74,12 +74,12 @@ def test_load_pipeline(self):
         assert torch.is_tensor(clamp_bbox_xyxy)
         assert torch.is_tensor(crop_bbox_xywh)
         # assert bboxes shape
-        assert fg_probability.shape == torch.Shape(
+        assert fg_probability.shape == torch.Size(
             [1, self.image_height, self.image_width]
         )
-        assert bbox_xywh.shape == torch.Shape([4])
-        assert clamp_bbox_xyxy == torch.Shape([4])
-        assert crop_bbox_xywh.shape == torch.Shape([4])
+        assert bbox_xywh.shape == torch.Size([4])
+        assert clamp_bbox_xyxy == torch.Size([4])
+        assert crop_bbox_xywh.shape == torch.Size([4])
         (
             image_rgb,
             image_path,
@@ -95,8 +95,8 @@ def test_load_pipeline(self):
         assert torch.is_tensor(mask_crop)
         assert scale
         # assert image and mask shapes
-        assert image_rgb.shape == torch.Shape([3, self.image_height, self.image_width])
-        assert mask_crop.shape == torch.Shape(
+        assert image_rgb.shape == torch.Size([3, self.image_height, self.image_width])
+        assert mask_crop.shape == torch.Size(
             [1, self.image_height, self.image_width],
         )
 
@@ -113,10 +113,10 @@ def test_load_pipeline(self):
         assert depth_path
         assert torch.is_tensor(depth_mask)
         # assert image and mask shapes
-        assert depth_map.shape == torch.Shape(
+        assert depth_map.shape == torch.Size(
             [1, self.image_height, self.image_width],
         )
-        assert depth_mask.shape == torch.Shape(
+        assert depth_mask.shape == torch.Size(
             [1, self.image_height, self.image_width],
         )
 

From 01493775ea0d2c55069fc6348ce80aaa56cbf104 Mon Sep 17 00:00:00 2001
From: Ildar Salakhiev <ildar@fb.com>
Date: Thu, 9 Mar 2023 14:58:43 +0000
Subject: [PATCH 21/43] typo

---
 tests/implicitron/test_blob_loader.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/implicitron/test_blob_loader.py b/tests/implicitron/test_blob_loader.py
index cb2976011..5f694b897 100644
--- a/tests/implicitron/test_blob_loader.py
+++ b/tests/implicitron/test_blob_loader.py
@@ -78,7 +78,7 @@ def test_load_pipeline(self):
             [1, self.image_height, self.image_width]
         )
         assert bbox_xywh.shape == torch.Size([4])
-        assert clamp_bbox_xyxy == torch.Size([4])
+        assert clamp_bbox_xyxy.shape == torch.Size([4])
         assert crop_bbox_xywh.shape == torch.Size([4])
         (
             image_rgb,
@@ -184,7 +184,7 @@ def test_load_1bit_png_mask(self):
         mask_path = os.path.join(self.dataset_root, self.entry.depth.mask_path)
         mask = _load_1bit_png_mask(mask_path)
         assert mask.dtype == np.float32
-        assert len(mask.shape) == 3
+        assert len(mask.shape) == 2
 
     def test_load_depth_mask(self):
         mask_path = os.path.join(self.dataset_root, self.entry.depth.mask_path)

From 3bcbd018cd04941a1541d58c724cd266803ae768 Mon Sep 17 00:00:00 2001
From: Ildar Salakhiev <ildar@fb.com>
Date: Thu, 9 Mar 2023 15:23:39 +0000
Subject: [PATCH 22/43] linter

---
 tests/implicitron/test_blob_loader.py | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/tests/implicitron/test_blob_loader.py b/tests/implicitron/test_blob_loader.py
index 5f694b897..96d8fac60 100644
--- a/tests/implicitron/test_blob_loader.py
+++ b/tests/implicitron/test_blob_loader.py
@@ -96,9 +96,7 @@ def test_load_pipeline(self):
         assert scale
         # assert image and mask shapes
         assert image_rgb.shape == torch.Size([3, self.image_height, self.image_width])
-        assert mask_crop.shape == torch.Size(
-            [1, self.image_height, self.image_width],
-        )
+        assert mask_crop.shape == torch.Size([1, self.image_height, self.image_width])
 
         (
             depth_map,
@@ -113,12 +111,8 @@ def test_load_pipeline(self):
         assert depth_path
         assert torch.is_tensor(depth_mask)
         # assert image and mask shapes
-        assert depth_map.shape == torch.Size(
-            [1, self.image_height, self.image_width],
-        )
-        assert depth_mask.shape == torch.Size(
-            [1, self.image_height, self.image_width],
-        )
+        assert depth_map.shape == torch.Size([1, self.image_height, self.image_width])
+        assert depth_mask.shape == torch.Size([1, self.image_height, self.image_width])
 
         camera = self.dataset.blob_loader._get_pytorch3d_camera(
             self.entry,
@@ -178,7 +172,7 @@ def test_load_16big_png_depth(self):
         path = os.path.join(self.dataset_root, self.entry.depth.path)
         depth_map = _load_16big_png_depth(path)
         assert depth_map.dtype == np.float32
-        assert depth_map.shape
+        assert len(depth_map.shape) == 2
 
     def test_load_1bit_png_mask(self):
         mask_path = os.path.join(self.dataset_root, self.entry.depth.mask_path)

From 269cffa9ca8327cabb90104407df1c285e3405e3 Mon Sep 17 00:00:00 2001
From: Ildar Salakhiev <ildar@fb.com>
Date: Thu, 9 Mar 2023 15:35:02 +0000
Subject: [PATCH 23/43] all entry tests run thru all frames

---
 tests/implicitron/test_blob_loader.py | 68 +++++++++++++++------------
 1 file changed, 39 insertions(+), 29 deletions(-)

diff --git a/tests/implicitron/test_blob_loader.py b/tests/implicitron/test_blob_loader.py
index 96d8fac60..619586f04 100644
--- a/tests/implicitron/test_blob_loader.py
+++ b/tests/implicitron/test_blob_loader.py
@@ -52,21 +52,38 @@ def setUp(self):
             load_point_clouds=True,
             path_manager=self.path_manager,
         )
-        index = 7000
-        self.entry = self.dataset.frame_annots[index]["frame_annotation"]
 
     def test_BlobLoader_args(self):
         # test that BlobLoader works with get_default_args
         get_default_args(BlobLoader)
 
-    def test_load_pipeline(self):
+    def test_fix_point_cloud_path(self):
+        """Some files in Co3Dv2 have an accidental absolute path stored."""
+        original_path = "some_file_path"
+        modified_path = self.dataset.blob_loader._fix_point_cloud_path(original_path)
+        assert original_path in modified_path
+        assert self.dataset.blob_loader.dataset_root in modified_path
+
+    def test_entry_loading_functions(self):
+        for index in range(len(self.dataset.frame_annots)):
+            entry = self.dataset.frame_annots[index]["frame_annotation"]
+            self.load_test(entry)
+            self._resize_image_test(entry)
+            self._load_image_test(entry)
+            self._load_mask_test(entry)
+            self._load_depth_test(entry)
+            self._load_16big_png_depth_test(entry)
+            self._load_1bit_png_mask_test(entry)
+            self._load_depth_mask_test(entry)
+
+    def load_test(self, entry):
         (
             fg_probability,
             mask_path,
             bbox_xywh,
             clamp_bbox_xyxy,
             crop_bbox_xywh,
-        ) = self.dataset.blob_loader._load_crop_fg_probability(self.entry)
+        ) = self.dataset.blob_loader._load_crop_fg_probability(entry)
 
         assert mask_path
         assert torch.is_tensor(fg_probability)
@@ -86,7 +103,7 @@ def test_load_pipeline(self):
             mask_crop,
             scale,
         ) = self.dataset.blob_loader._load_crop_images(
-            self.entry,
+            entry,
             fg_probability,
             clamp_bbox_xyxy,
         )
@@ -103,7 +120,7 @@ def test_load_pipeline(self):
             depth_path,
             depth_mask,
         ) = self.dataset.blob_loader._load_mask_depth(
-            self.entry,
+            entry,
             clamp_bbox_xyxy,
             fg_probability,
         )
@@ -115,21 +132,14 @@ def test_load_pipeline(self):
         assert depth_mask.shape == torch.Size([1, self.image_height, self.image_width])
 
         camera = self.dataset.blob_loader._get_pytorch3d_camera(
-            self.entry,
+            entry,
             scale,
             clamp_bbox_xyxy,
         )
         assert type(camera) == PerspectiveCameras
 
-    def test_fix_point_cloud_path(self):
-        """Some files in Co3Dv2 have an accidental absolute path stored."""
-        original_path = "some_file_path"
-        modified_path = self.dataset.blob_loader._fix_point_cloud_path(original_path)
-        assert original_path in modified_path
-        assert self.dataset.blob_loader.dataset_root in modified_path
-
-    def test_resize_image(self):
-        path = os.path.join(self.dataset_root, self.entry.image.path)
+    def _resize_image_test(self, entry):
+        path = os.path.join(self.dataset_root, entry.image.path)
         local_path = self.path_manager.get_local_path(path)
         image = _load_image(local_path)
         image_rgb, scale, mask_crop = self.dataset.blob_loader._resize_image(image)
@@ -147,41 +157,41 @@ def test_resize_image(self):
         assert image_rgb.shape[-2:] == expected_shape
         assert mask_crop.shape[-2:] == expected_shape
 
-    def test_load_image(self):
-        path = os.path.join(self.dataset_root, self.entry.image.path)
+    def _load_image_test(self, entry):
+        path = os.path.join(self.dataset_root, entry.image.path)
         local_path = self.path_manager.get_local_path(path)
         image = _load_image(local_path)
         assert image.dtype == np.float32
         assert np.max(image) <= 1.0
         assert np.min(image) >= 0.0
 
-    def test_load_mask(self):
-        path = os.path.join(self.dataset_root, self.entry.mask.path)
+    def _load_mask_test(self, entry):
+        path = os.path.join(self.dataset_root, entry.mask.path)
         mask = _load_mask(path)
         assert mask.dtype == np.float32
         assert np.max(mask) <= 1.0
         assert np.min(mask) >= 0.0
 
-    def test_load_depth(self):
-        path = os.path.join(self.dataset_root, self.entry.depth.path)
-        depth_map = _load_depth(path, self.entry.depth.scale_adjustment)
+    def _load_depth_test(self, entry):
+        path = os.path.join(self.dataset_root, entry.depth.path)
+        depth_map = _load_depth(path, entry.depth.scale_adjustment)
         assert depth_map.dtype == np.float32
         assert depth_map.shape
 
-    def test_load_16big_png_depth(self):
-        path = os.path.join(self.dataset_root, self.entry.depth.path)
+    def _load_16big_png_depth_test(self, entry):
+        path = os.path.join(self.dataset_root, entry.depth.path)
         depth_map = _load_16big_png_depth(path)
         assert depth_map.dtype == np.float32
         assert len(depth_map.shape) == 2
 
-    def test_load_1bit_png_mask(self):
-        mask_path = os.path.join(self.dataset_root, self.entry.depth.mask_path)
+    def _load_1bit_png_mask_test(self, entry):
+        mask_path = os.path.join(self.dataset_root, entry.depth.mask_path)
         mask = _load_1bit_png_mask(mask_path)
         assert mask.dtype == np.float32
         assert len(mask.shape) == 2
 
-    def test_load_depth_mask(self):
-        mask_path = os.path.join(self.dataset_root, self.entry.depth.mask_path)
+    def _load_depth_mask_test(self, entry):
+        mask_path = os.path.join(self.dataset_root, entry.depth.mask_path)
         mask = _load_depth_mask(mask_path)
         assert mask.dtype == np.float32
         assert len(mask.shape) == 3

From f930d71488ed978b7ca71525567e4f94ed721fc6 Mon Sep 17 00:00:00 2001
From: Ildar Salakhiev <ildar@fb.com>
Date: Fri, 10 Mar 2023 09:38:08 +0000
Subject: [PATCH 24/43] assert .. == .. to self.assertEqual(.., ..)

---
 tests/implicitron/test_blob_loader.py | 46 +++++++++++++--------------
 1 file changed, 22 insertions(+), 24 deletions(-)

diff --git a/tests/implicitron/test_blob_loader.py b/tests/implicitron/test_blob_loader.py
index 619586f04..059244c67 100644
--- a/tests/implicitron/test_blob_loader.py
+++ b/tests/implicitron/test_blob_loader.py
@@ -91,12 +91,10 @@ def load_test(self, entry):
         assert torch.is_tensor(clamp_bbox_xyxy)
         assert torch.is_tensor(crop_bbox_xywh)
         # assert bboxes shape
-        assert fg_probability.shape == torch.Size(
-            [1, self.image_height, self.image_width]
-        )
-        assert bbox_xywh.shape == torch.Size([4])
-        assert clamp_bbox_xyxy.shape == torch.Size([4])
-        assert crop_bbox_xywh.shape == torch.Size([4])
+        self.assertEqual(fg_probability.shape, torch.Size([1, self.image_height, self.image_width]))
+        self.assertEqual(bbox_xywh.shape, torch.Size([4]))
+        self.assertEqual(clamp_bbox_xyxy.shape, torch.Size([4]))
+        self.assertEqual(crop_bbox_xywh.shape, torch.Size([4]))
         (
             image_rgb,
             image_path,
@@ -112,8 +110,8 @@ def load_test(self, entry):
         assert torch.is_tensor(mask_crop)
         assert scale
         # assert image and mask shapes
-        assert image_rgb.shape == torch.Size([3, self.image_height, self.image_width])
-        assert mask_crop.shape == torch.Size([1, self.image_height, self.image_width])
+        self.assertEqual(image_rgb.shape, torch.Size([3, self.image_height, self.image_width]))
+        self.assertEqual(mask_crop.shape, torch.Size([1, self.image_height, self.image_width])
 
         (
             depth_map,
@@ -128,15 +126,15 @@ def load_test(self, entry):
         assert depth_path
         assert torch.is_tensor(depth_mask)
         # assert image and mask shapes
-        assert depth_map.shape == torch.Size([1, self.image_height, self.image_width])
-        assert depth_mask.shape == torch.Size([1, self.image_height, self.image_width])
+        self.assertEqual(depth_map.shape, torch.Size([1, self.image_height, self.image_width]))
+        self.assertEqual(depth_mask.shape, torch.Size([1, self.image_height, self.image_width]))
 
         camera = self.dataset.blob_loader._get_pytorch3d_camera(
             entry,
             scale,
             clamp_bbox_xyxy,
         )
-        assert type(camera) == PerspectiveCameras
+        self.assertEqual(type(camera), PerspectiveCameras)
 
     def _resize_image_test(self, entry):
         path = os.path.join(self.dataset_root, entry.image.path)
@@ -153,45 +151,45 @@ def _resize_image_test(self, entry):
             expected_shape[0] / original_shape[0], expected_shape[1] / original_shape[1]
         )
 
-        assert scale == expected_scale
-        assert image_rgb.shape[-2:] == expected_shape
-        assert mask_crop.shape[-2:] == expected_shape
+        self.assertEqual(scale, expected_scale)
+        self.assertEqual(image_rgb.shape[-2:], expected_shape)
+        self.assertEqual(mask_crop.shape[-2:], expected_shape)
 
     def _load_image_test(self, entry):
         path = os.path.join(self.dataset_root, entry.image.path)
         local_path = self.path_manager.get_local_path(path)
         image = _load_image(local_path)
-        assert image.dtype == np.float32
+        self.assertEqual(image.dtype, np.float32)
         assert np.max(image) <= 1.0
         assert np.min(image) >= 0.0
 
     def _load_mask_test(self, entry):
         path = os.path.join(self.dataset_root, entry.mask.path)
         mask = _load_mask(path)
-        assert mask.dtype == np.float32
+        self.assertEqual(mask.dtype, np.float32)
         assert np.max(mask) <= 1.0
         assert np.min(mask) >= 0.0
 
     def _load_depth_test(self, entry):
         path = os.path.join(self.dataset_root, entry.depth.path)
         depth_map = _load_depth(path, entry.depth.scale_adjustment)
-        assert depth_map.dtype == np.float32
-        assert depth_map.shape
+        self.assertEqual(depth_map.dtype, np.float32)
+        self.assertEqual(len(depth_map.shape), 2)
 
     def _load_16big_png_depth_test(self, entry):
         path = os.path.join(self.dataset_root, entry.depth.path)
         depth_map = _load_16big_png_depth(path)
-        assert depth_map.dtype == np.float32
-        assert len(depth_map.shape) == 2
+        self.assertEqual(depth_map.dtype, np.float32)
+        self.assertEqual(len(depth_map.shape), 2)
 
     def _load_1bit_png_mask_test(self, entry):
         mask_path = os.path.join(self.dataset_root, entry.depth.mask_path)
         mask = _load_1bit_png_mask(mask_path)
-        assert mask.dtype == np.float32
-        assert len(mask.shape) == 2
+        self.assertEqual(mask.dtype, np.float32)
+        self.assertEqual(len(mask.shape), 2)
 
     def _load_depth_mask_test(self, entry):
         mask_path = os.path.join(self.dataset_root, entry.depth.mask_path)
         mask = _load_depth_mask(mask_path)
-        assert mask.dtype == np.float32
-        assert len(mask.shape) == 3
+        self.assertEqual(mask.dtype, np.float32)
+        self.assertEqual(len(mask.shape), 3)

From dc7a70280eed0715ddf8ab04267f883e7a4de8e4 Mon Sep 17 00:00:00 2001
From: Ildar Salakhiev <ildar@fb.com>
Date: Fri, 10 Mar 2023 09:42:24 +0000
Subject: [PATCH 25/43] testing only on 1 frame

---
 tests/implicitron/test_blob_loader.py | 63 ++++++++++-----------------
 1 file changed, 23 insertions(+), 40 deletions(-)

diff --git a/tests/implicitron/test_blob_loader.py b/tests/implicitron/test_blob_loader.py
index 059244c67..a03e91537 100644
--- a/tests/implicitron/test_blob_loader.py
+++ b/tests/implicitron/test_blob_loader.py
@@ -52,6 +52,8 @@ def setUp(self):
             load_point_clouds=True,
             path_manager=self.path_manager,
         )
+        index = 7000
+        self.entry = self.dataset.frame_annots[index]["frame_annotation"]
 
     def test_BlobLoader_args(self):
         # test that BlobLoader works with get_default_args
@@ -64,26 +66,14 @@ def test_fix_point_cloud_path(self):
         assert original_path in modified_path
         assert self.dataset.blob_loader.dataset_root in modified_path
 
-    def test_entry_loading_functions(self):
-        for index in range(len(self.dataset.frame_annots)):
-            entry = self.dataset.frame_annots[index]["frame_annotation"]
-            self.load_test(entry)
-            self._resize_image_test(entry)
-            self._load_image_test(entry)
-            self._load_mask_test(entry)
-            self._load_depth_test(entry)
-            self._load_16big_png_depth_test(entry)
-            self._load_1bit_png_mask_test(entry)
-            self._load_depth_mask_test(entry)
-
-    def load_test(self, entry):
+    def test_load(self):
         (
             fg_probability,
             mask_path,
             bbox_xywh,
             clamp_bbox_xyxy,
             crop_bbox_xywh,
-        ) = self.dataset.blob_loader._load_crop_fg_probability(entry)
+        ) = self.dataset.blob_loader._load_crop_fg_probability(self.entry)
 
         assert mask_path
         assert torch.is_tensor(fg_probability)
@@ -96,15 +86,8 @@ def load_test(self, entry):
         self.assertEqual(clamp_bbox_xyxy.shape, torch.Size([4]))
         self.assertEqual(crop_bbox_xywh.shape, torch.Size([4]))
         (
-            image_rgb,
-            image_path,
-            mask_crop,
-            scale,
-        ) = self.dataset.blob_loader._load_crop_images(
-            entry,
-            fg_probability,
-            clamp_bbox_xyxy,
-        )
+            image_rgb, image_path, mask_crop, scale,
+        ) = self.dataset.blob_loader._load_crop_images(self.entry, fg_probability, clamp_bbox_xyxy)
         assert torch.is_tensor(image_rgb)
         assert image_path
         assert torch.is_tensor(mask_crop)
@@ -118,7 +101,7 @@ def load_test(self, entry):
             depth_path,
             depth_mask,
         ) = self.dataset.blob_loader._load_mask_depth(
-            entry,
+            self.entry,
             clamp_bbox_xyxy,
             fg_probability,
         )
@@ -130,14 +113,14 @@ def load_test(self, entry):
         self.assertEqual(depth_mask.shape, torch.Size([1, self.image_height, self.image_width]))
 
         camera = self.dataset.blob_loader._get_pytorch3d_camera(
-            entry,
+            self.entry,
             scale,
             clamp_bbox_xyxy,
         )
         self.assertEqual(type(camera), PerspectiveCameras)
 
-    def _resize_image_test(self, entry):
-        path = os.path.join(self.dataset_root, entry.image.path)
+    def test_resize_image(self):
+        path = os.path.join(self.dataset_root, self.entry.image.path)
         local_path = self.path_manager.get_local_path(path)
         image = _load_image(local_path)
         image_rgb, scale, mask_crop = self.dataset.blob_loader._resize_image(image)
@@ -155,41 +138,41 @@ def _resize_image_test(self, entry):
         self.assertEqual(image_rgb.shape[-2:], expected_shape)
         self.assertEqual(mask_crop.shape[-2:], expected_shape)
 
-    def _load_image_test(self, entry):
-        path = os.path.join(self.dataset_root, entry.image.path)
+    def test_load_image(self):
+        path = os.path.join(self.dataset_root, self.entry.image.path)
         local_path = self.path_manager.get_local_path(path)
         image = _load_image(local_path)
         self.assertEqual(image.dtype, np.float32)
         assert np.max(image) <= 1.0
         assert np.min(image) >= 0.0
 
-    def _load_mask_test(self, entry):
-        path = os.path.join(self.dataset_root, entry.mask.path)
+    def test_load_mask(self):
+        path = os.path.join(self.dataset_root, self.entry.mask.path)
         mask = _load_mask(path)
         self.assertEqual(mask.dtype, np.float32)
         assert np.max(mask) <= 1.0
         assert np.min(mask) >= 0.0
 
-    def _load_depth_test(self, entry):
-        path = os.path.join(self.dataset_root, entry.depth.path)
-        depth_map = _load_depth(path, entry.depth.scale_adjustment)
+    def test_load_depth(self):
+        path = os.path.join(self.dataset_root, self.entry.depth.path)
+        depth_map = _load_depth(path, self.entry.depth.scale_adjustment)
         self.assertEqual(depth_map.dtype, np.float32)
         self.assertEqual(len(depth_map.shape), 2)
 
-    def _load_16big_png_depth_test(self, entry):
-        path = os.path.join(self.dataset_root, entry.depth.path)
+    def test_load_16big_png_depth(self):
+        path = os.path.join(self.dataset_root, self.entry.depth.path)
         depth_map = _load_16big_png_depth(path)
         self.assertEqual(depth_map.dtype, np.float32)
         self.assertEqual(len(depth_map.shape), 2)
 
-    def _load_1bit_png_mask_test(self, entry):
-        mask_path = os.path.join(self.dataset_root, entry.depth.mask_path)
+    def test_load_1bit_png_mask(self):
+        mask_path = os.path.join(self.dataset_root, self.entry.depth.mask_path)
         mask = _load_1bit_png_mask(mask_path)
         self.assertEqual(mask.dtype, np.float32)
         self.assertEqual(len(mask.shape), 2)
 
-    def _load_depth_mask_test(self, entry):
-        mask_path = os.path.join(self.dataset_root, entry.depth.mask_path)
+    def test_load_depth_mask(self):
+        mask_path = os.path.join(self.dataset_root, self.entry.depth.mask_path)
         mask = _load_depth_mask(mask_path)
         self.assertEqual(mask.dtype, np.float32)
         self.assertEqual(len(mask.shape), 3)

From fcd8d8b45947c40370a2f93254da9e9169357a38 Mon Sep 17 00:00:00 2001
From: Ildar Salakhiev <ildar@fb.com>
Date: Fri, 10 Mar 2023 09:57:39 +0000
Subject: [PATCH 26/43] instead of loading whole dataset, loading only single
 frame annots

---
 tests/implicitron/test_blob_loader.py | 39 +++++++++++++--------------
 1 file changed, 18 insertions(+), 21 deletions(-)

diff --git a/tests/implicitron/test_blob_loader.py b/tests/implicitron/test_blob_loader.py
index a03e91537..b878dc0d0 100644
--- a/tests/implicitron/test_blob_loader.py
+++ b/tests/implicitron/test_blob_loader.py
@@ -14,6 +14,7 @@
     _load_mask,
     BlobLoader,
 )
+from pytorch3d.implicitron.dataset import types
 from pytorch3d.implicitron.dataset.json_index_dataset import JsonIndexDataset
 from pytorch3d.implicitron.tools.config import expand_args_fields, get_default_args
 from pytorch3d.renderer.cameras import PerspectiveCameras
@@ -40,20 +41,16 @@ def setUp(self):
         self.image_height = 768
         self.image_width = 512
 
-        expand_args_fields(JsonIndexDataset)
-
-        self.dataset = JsonIndexDataset(
-            frame_annotations_file=frame_file,
-            sequence_annotations_file=sequence_file,
-            dataset_root=self.dataset_root,
-            image_height=self.image_height,
-            image_width=self.image_width,
-            box_crop=True,
-            load_point_clouds=True,
-            path_manager=self.path_manager,
-        )
-        index = 7000
-        self.entry = self.dataset.frame_annots[index]["frame_annotation"]
+        expand_args_fields(BlobLoader)
+        self.blob_loader = BlobLoader()
+
+        # loading single frame annotation of dataset (see JsonIndexDataset._load_frames())
+        local_file = self.path_manager.get_local_path(frame_file)
+        with gzip.open(local_file, "rt", encoding="utf8") as zipfile:
+            frame_annots_list = types.load_dataclass(zipfile, List[self.frame_annotations_type])
+
+        index = 0
+        self.entry = FrameAnnotsEntry(frame_annotation=frame_annots_list[index], subset=None)
 
     def test_BlobLoader_args(self):
         # test that BlobLoader works with get_default_args
@@ -62,9 +59,9 @@ def test_BlobLoader_args(self):
     def test_fix_point_cloud_path(self):
         """Some files in Co3Dv2 have an accidental absolute path stored."""
         original_path = "some_file_path"
-        modified_path = self.dataset.blob_loader._fix_point_cloud_path(original_path)
+        modified_path = self.blob_loader._fix_point_cloud_path(original_path)
         assert original_path in modified_path
-        assert self.dataset.blob_loader.dataset_root in modified_path
+        assert self.blob_loader.dataset_root in modified_path
 
     def test_load(self):
         (
@@ -73,7 +70,7 @@ def test_load(self):
             bbox_xywh,
             clamp_bbox_xyxy,
             crop_bbox_xywh,
-        ) = self.dataset.blob_loader._load_crop_fg_probability(self.entry)
+        ) = self.blob_loader._load_crop_fg_probability(self.entry)
 
         assert mask_path
         assert torch.is_tensor(fg_probability)
@@ -87,7 +84,7 @@ def test_load(self):
         self.assertEqual(crop_bbox_xywh.shape, torch.Size([4]))
         (
             image_rgb, image_path, mask_crop, scale,
-        ) = self.dataset.blob_loader._load_crop_images(self.entry, fg_probability, clamp_bbox_xyxy)
+        ) = self.blob_loader._load_crop_images(self.entry, fg_probability, clamp_bbox_xyxy)
         assert torch.is_tensor(image_rgb)
         assert image_path
         assert torch.is_tensor(mask_crop)
@@ -100,7 +97,7 @@ def test_load(self):
             depth_map,
             depth_path,
             depth_mask,
-        ) = self.dataset.blob_loader._load_mask_depth(
+        ) = self.blob_loader._load_mask_depth(
             self.entry,
             clamp_bbox_xyxy,
             fg_probability,
@@ -112,7 +109,7 @@ def test_load(self):
         self.assertEqual(depth_map.shape, torch.Size([1, self.image_height, self.image_width]))
         self.assertEqual(depth_mask.shape, torch.Size([1, self.image_height, self.image_width]))
 
-        camera = self.dataset.blob_loader._get_pytorch3d_camera(
+        camera = self.blob_loader._get_pytorch3d_camera(
             self.entry,
             scale,
             clamp_bbox_xyxy,
@@ -123,7 +120,7 @@ def test_resize_image(self):
         path = os.path.join(self.dataset_root, self.entry.image.path)
         local_path = self.path_manager.get_local_path(path)
         image = _load_image(local_path)
-        image_rgb, scale, mask_crop = self.dataset.blob_loader._resize_image(image)
+        image_rgb, scale, mask_crop = self.blob_loader._resize_image(image)
 
         original_shape = image.shape[-2:]
         expected_shape = (

From c3bd722507a8bca9eb9dfa0828ee3e32fe005bb0 Mon Sep 17 00:00:00 2001
From: Ildar Salakhiev <ildar@fb.com>
Date: Fri, 10 Mar 2023 10:15:38 +0000
Subject: [PATCH 27/43] added default values to BlobLoader to ease
 initialisation

---
 pytorch3d/implicitron/dataset/blob_loader.py | 28 ++++++++++----------
 tests/implicitron/test_blob_loader.py        |  2 +-
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/pytorch3d/implicitron/dataset/blob_loader.py b/pytorch3d/implicitron/dataset/blob_loader.py
index 035e99a83..362d1c459 100644
--- a/pytorch3d/implicitron/dataset/blob_loader.py
+++ b/pytorch3d/implicitron/dataset/blob_loader.py
@@ -57,20 +57,20 @@ class BlobLoader:
                 dimension of the cropping bounding box, relative to box size.
     """
 
-    dataset_root: str
-    load_images: bool
-    load_depths: bool
-    load_depth_masks: bool
-    load_masks: bool
-    load_point_clouds: bool
-    max_points: int
-    mask_images: bool
-    mask_depths: bool
-    image_height: Optional[int]
-    image_width: Optional[int]
-    box_crop: bool
-    box_crop_mask_thr: float
-    box_crop_context: float
+    dataset_root: str = ""
+    load_images: bool = True
+    load_depths: bool = True
+    load_depth_masks: bool = True
+    load_masks: bool = True
+    load_point_clouds: bool = False
+    max_points: int = 0
+    mask_images: bool = False
+    mask_depths: bool = False
+    image_height: Optional[int] = 800
+    image_width: Optional[int] = 800
+    box_crop: bool = True
+    box_crop_mask_thr: float = 0.4
+    box_crop_context: float = 0.3
     path_manager: Any = None
 
     def load(
diff --git a/tests/implicitron/test_blob_loader.py b/tests/implicitron/test_blob_loader.py
index b878dc0d0..cdbb2d9c5 100644
--- a/tests/implicitron/test_blob_loader.py
+++ b/tests/implicitron/test_blob_loader.py
@@ -91,7 +91,7 @@ def test_load(self):
         assert scale
         # assert image and mask shapes
         self.assertEqual(image_rgb.shape, torch.Size([3, self.image_height, self.image_width]))
-        self.assertEqual(mask_crop.shape, torch.Size([1, self.image_height, self.image_width])
+        self.assertEqual(mask_crop.shape, torch.Size([1, self.image_height, self.image_width]))
 
         (
             depth_map,

From cb34c0134465bc8e2b5e463e5f123afa224920bf Mon Sep 17 00:00:00 2001
From: Ildar Salakhiev <ildar@fb.com>
Date: Fri, 10 Mar 2023 10:59:18 +0000
Subject: [PATCH 28/43] mackink tests on single loaded frame

---
 tests/implicitron/test_blob_loader.py | 89 +++++++++++++++------------
 1 file changed, 50 insertions(+), 39 deletions(-)

diff --git a/tests/implicitron/test_blob_loader.py b/tests/implicitron/test_blob_loader.py
index cdbb2d9c5..209830bbe 100644
--- a/tests/implicitron/test_blob_loader.py
+++ b/tests/implicitron/test_blob_loader.py
@@ -1,10 +1,13 @@
 import contextlib
+import gzip
 import os
 import unittest
+from typing import List
 
 import numpy as np
-
 import torch
+
+from pytorch3d.implicitron.dataset import types
 from pytorch3d.implicitron.dataset.blob_loader import (
     _load_16big_png_depth,
     _load_1bit_png_mask,
@@ -14,13 +17,10 @@
     _load_mask,
     BlobLoader,
 )
-from pytorch3d.implicitron.dataset import types
-from pytorch3d.implicitron.dataset.json_index_dataset import JsonIndexDataset
 from pytorch3d.implicitron.tools.config import expand_args_fields, get_default_args
 from pytorch3d.renderer.cameras import PerspectiveCameras
 
 from tests.common_testing import TestCaseMixin
-
 from tests.implicitron.common_resources import get_skateboard_data
 
 
@@ -34,23 +34,24 @@ def setUp(self):
             get_skateboard_data()
         )
         self.addCleanup(stack.close)
-        frame_file = os.path.join(self.dataset_root, category, "frame_annotations.jgz")
-        sequence_file = os.path.join(
-            self.dataset_root, category, "sequence_annotations.jgz"
-        )
         self.image_height = 768
         self.image_width = 512
 
-        expand_args_fields(BlobLoader)
-        self.blob_loader = BlobLoader()
+        self.blob_loader = BlobLoader(
+            image_height=self.image_height,
+            image_width=self.image_width,
+            dataset_root=self.dataset_root,
+            path_manager=self.path_manager,
+        )
 
         # loading single frame annotation of dataset (see JsonIndexDataset._load_frames())
+        frame_file = os.path.join(self.dataset_root, category, "frame_annotations.jgz")
         local_file = self.path_manager.get_local_path(frame_file)
         with gzip.open(local_file, "rt", encoding="utf8") as zipfile:
-            frame_annots_list = types.load_dataclass(zipfile, List[self.frame_annotations_type])
-
-        index = 0
-        self.entry = FrameAnnotsEntry(frame_annotation=frame_annots_list[index], subset=None)
+            frame_annots_list = types.load_dataclass(
+                zipfile, List[types.FrameAnnotation]
+            )
+            self.frame_annotation = frame_annots_list[0]
 
     def test_BlobLoader_args(self):
         # test that BlobLoader works with get_default_args
@@ -70,7 +71,7 @@ def test_load(self):
             bbox_xywh,
             clamp_bbox_xyxy,
             crop_bbox_xywh,
-        ) = self.blob_loader._load_crop_fg_probability(self.entry)
+        ) = self.blob_loader._load_crop_fg_probability(self.frame_annotation)
 
         assert mask_path
         assert torch.is_tensor(fg_probability)
@@ -78,27 +79,29 @@ def test_load(self):
         assert torch.is_tensor(clamp_bbox_xyxy)
         assert torch.is_tensor(crop_bbox_xywh)
         # assert bboxes shape
-        self.assertEqual(fg_probability.shape, torch.Size([1, self.image_height, self.image_width]))
+        self.assertEqual(
+            fg_probability.shape, torch.Size([1, self.image_height, self.image_width])
+        )
         self.assertEqual(bbox_xywh.shape, torch.Size([4]))
         self.assertEqual(clamp_bbox_xyxy.shape, torch.Size([4]))
         self.assertEqual(crop_bbox_xywh.shape, torch.Size([4]))
-        (
-            image_rgb, image_path, mask_crop, scale,
-        ) = self.blob_loader._load_crop_images(self.entry, fg_probability, clamp_bbox_xyxy)
+        (image_rgb, image_path, mask_crop, scale,) = self.blob_loader._load_crop_images(
+            self.frame_annotation, fg_probability, clamp_bbox_xyxy
+        )
         assert torch.is_tensor(image_rgb)
         assert image_path
         assert torch.is_tensor(mask_crop)
         assert scale
         # assert image and mask shapes
-        self.assertEqual(image_rgb.shape, torch.Size([3, self.image_height, self.image_width]))
-        self.assertEqual(mask_crop.shape, torch.Size([1, self.image_height, self.image_width]))
+        self.assertEqual(
+            image_rgb.shape, torch.Size([3, self.image_height, self.image_width])
+        )
+        self.assertEqual(
+            mask_crop.shape, torch.Size([1, self.image_height, self.image_width])
+        )
 
-        (
-            depth_map,
-            depth_path,
-            depth_mask,
-        ) = self.blob_loader._load_mask_depth(
-            self.entry,
+        (depth_map, depth_path, depth_mask,) = self.blob_loader._load_mask_depth(
+            self.frame_annotation,
             clamp_bbox_xyxy,
             fg_probability,
         )
@@ -106,18 +109,22 @@ def test_load(self):
         assert depth_path
         assert torch.is_tensor(depth_mask)
         # assert image and mask shapes
-        self.assertEqual(depth_map.shape, torch.Size([1, self.image_height, self.image_width]))
-        self.assertEqual(depth_mask.shape, torch.Size([1, self.image_height, self.image_width]))
+        self.assertEqual(
+            depth_map.shape, torch.Size([1, self.image_height, self.image_width])
+        )
+        self.assertEqual(
+            depth_mask.shape, torch.Size([1, self.image_height, self.image_width])
+        )
 
         camera = self.blob_loader._get_pytorch3d_camera(
-            self.entry,
+            self.frame_annotation,
             scale,
             clamp_bbox_xyxy,
         )
         self.assertEqual(type(camera), PerspectiveCameras)
 
     def test_resize_image(self):
-        path = os.path.join(self.dataset_root, self.entry.image.path)
+        path = os.path.join(self.dataset_root, self.frame_annotation.image.path)
         local_path = self.path_manager.get_local_path(path)
         image = _load_image(local_path)
         image_rgb, scale, mask_crop = self.blob_loader._resize_image(image)
@@ -136,7 +143,7 @@ def test_resize_image(self):
         self.assertEqual(mask_crop.shape[-2:], expected_shape)
 
     def test_load_image(self):
-        path = os.path.join(self.dataset_root, self.entry.image.path)
+        path = os.path.join(self.dataset_root, self.frame_annotation.image.path)
         local_path = self.path_manager.get_local_path(path)
         image = _load_image(local_path)
         self.assertEqual(image.dtype, np.float32)
@@ -144,32 +151,36 @@ def test_load_image(self):
         assert np.min(image) >= 0.0
 
     def test_load_mask(self):
-        path = os.path.join(self.dataset_root, self.entry.mask.path)
+        path = os.path.join(self.dataset_root, self.frame_annotation.mask.path)
         mask = _load_mask(path)
         self.assertEqual(mask.dtype, np.float32)
         assert np.max(mask) <= 1.0
         assert np.min(mask) >= 0.0
 
     def test_load_depth(self):
-        path = os.path.join(self.dataset_root, self.entry.depth.path)
-        depth_map = _load_depth(path, self.entry.depth.scale_adjustment)
+        path = os.path.join(self.dataset_root, self.frame_annotation.depth.path)
+        depth_map = _load_depth(path, self.frame_annotation.depth.scale_adjustment)
         self.assertEqual(depth_map.dtype, np.float32)
-        self.assertEqual(len(depth_map.shape), 2)
+        self.assertEqual(len(depth_map.shape), 3)
 
     def test_load_16big_png_depth(self):
-        path = os.path.join(self.dataset_root, self.entry.depth.path)
+        path = os.path.join(self.dataset_root, self.frame_annotation.depth.path)
         depth_map = _load_16big_png_depth(path)
         self.assertEqual(depth_map.dtype, np.float32)
         self.assertEqual(len(depth_map.shape), 2)
 
     def test_load_1bit_png_mask(self):
-        mask_path = os.path.join(self.dataset_root, self.entry.depth.mask_path)
+        mask_path = os.path.join(
+            self.dataset_root, self.frame_annotation.depth.mask_path
+        )
         mask = _load_1bit_png_mask(mask_path)
         self.assertEqual(mask.dtype, np.float32)
         self.assertEqual(len(mask.shape), 2)
 
     def test_load_depth_mask(self):
-        mask_path = os.path.join(self.dataset_root, self.entry.depth.mask_path)
+        mask_path = os.path.join(
+            self.dataset_root, self.frame_annotation.depth.mask_path
+        )
         mask = _load_depth_mask(mask_path)
         self.assertEqual(mask.dtype, np.float32)
         self.assertEqual(len(mask.shape), 3)

From 04b7d1591cf38c9957c19dd2ba3da53aeb023715 Mon Sep 17 00:00:00 2001
From: Ildar Salakhiev <ildar@fb.com>
Date: Fri, 10 Mar 2023 15:29:08 +0000
Subject: [PATCH 29/43] made _resize_image separate function (will ease use in
 pixar replay)

---
 pytorch3d/implicitron/dataset/blob_loader.py | 75 ++++++++++++--------
 tests/implicitron/test_blob_loader.py        |  5 +-
 2 files changed, 50 insertions(+), 30 deletions(-)

diff --git a/pytorch3d/implicitron/dataset/blob_loader.py b/pytorch3d/implicitron/dataset/blob_loader.py
index 362d1c459..bedbc070e 100644
--- a/pytorch3d/implicitron/dataset/blob_loader.py
+++ b/pytorch3d/implicitron/dataset/blob_loader.py
@@ -165,7 +165,12 @@ def _load_crop_fg_probability(
 
                 mask = _crop_around_box(mask, clamp_bbox_xyxy, full_path)
 
-            fg_probability, _, _ = self._resize_image(mask, mode="nearest")
+            fg_probability, _, _ = _resize_image(
+                mask,
+                image_height=self.image_height,
+                image_width=self.image_width,
+                mode="nearest",
+            )
 
         return fg_probability, full_path, bbox_xywh, clamp_bbox_xyxy, crop_box_xywh
 
@@ -188,7 +193,9 @@ def _load_crop_images(
             assert clamp_bbox_xyxy is not None
             image_rgb = _crop_around_box(image_rgb, clamp_bbox_xyxy, path)
 
-        image_rgb, scale, mask_crop = self._resize_image(image_rgb)
+        image_rgb, scale, mask_crop = _resize_image(
+            image_rgb, image_height=self.image_height, image_width=self.image_width
+        )
 
         if self.mask_images:
             assert fg_probability is not None
@@ -214,7 +221,12 @@ def _load_mask_depth(
             )
             depth_map = _crop_around_box(depth_map, depth_bbox_xyxy, path)
 
-        depth_map, _, _ = self._resize_image(depth_map, mode="nearest")
+        depth_map, _, _ = _resize_image(
+            depth_map,
+            image_height=self.image_height,
+            image_width=self.image_width,
+            mode="nearest",
+        )
 
         if self.mask_depths:
             assert fg_probability is not None
@@ -234,7 +246,12 @@ def _load_mask_depth(
                     depth_mask, depth_mask_bbox_xyxy, mask_path
                 )
 
-            depth_mask, _, _ = self._resize_image(depth_mask, mode="nearest")
+            depth_mask, _, _ = _resize_image(
+                depth_mask,
+                image_height=self.image_height,
+                image_width=self.image_width,
+                mode="nearest",
+            )
         else:
             depth_mask = torch.ones_like(depth_map)
 
@@ -314,31 +331,31 @@ def _local_path(self, path: str) -> str:
             return path
         return self.path_manager.get_local_path(path)
 
-    def _resize_image(
-        self, image, mode="bilinear"
-    ) -> Tuple[torch.Tensor, float, torch.Tensor]:
-        image_height, image_width = self.image_height, self.image_width
-        if image_height is None or image_width is None:
-            # skip the resizing
-            imre_ = torch.from_numpy(image)
-            return imre_, 1.0, torch.ones_like(imre_[:1])
-        # takes numpy array, returns pytorch tensor
-        minscale = min(
-            image_height / image.shape[-2],
-            image_width / image.shape[-1],
-        )
-        imre = torch.nn.functional.interpolate(
-            torch.from_numpy(image)[None],
-            scale_factor=minscale,
-            mode=mode,
-            align_corners=False if mode == "bilinear" else None,
-            recompute_scale_factor=True,
-        )[0]
-        imre_ = torch.zeros(image.shape[0], image_height, image_width)
-        imre_[:, 0 : imre.shape[1], 0 : imre.shape[2]] = imre
-        mask = torch.zeros(1, image_height, image_width)
-        mask[:, 0 : imre.shape[1], 0 : imre.shape[2]] = 1.0
-        return imre_, minscale, mask
+
+def _resize_image(
+    self, image, image_height, image_width, mode="bilinear"
+) -> Tuple[torch.Tensor, float, torch.Tensor]:
+    if image_height is None or image_width is None:
+        # skip the resizing
+        imre_ = torch.from_numpy(image)
+        return imre_, 1.0, torch.ones_like(imre_[:1])
+    # takes numpy array, returns pytorch tensor
+    minscale = min(
+        image_height / image.shape[-2],
+        image_width / image.shape[-1],
+    )
+    imre = torch.nn.functional.interpolate(
+        torch.from_numpy(image)[None],
+        scale_factor=minscale,
+        mode=mode,
+        align_corners=False if mode == "bilinear" else None,
+        recompute_scale_factor=True,
+    )[0]
+    imre_ = torch.zeros(image.shape[0], image_height, image_width)
+    imre_[:, 0 : imre.shape[1], 0 : imre.shape[2]] = imre
+    mask = torch.zeros(1, image_height, image_width)
+    mask[:, 0 : imre.shape[1], 0 : imre.shape[2]] = 1.0
+    return imre_, minscale, mask
 
 
 def _load_image(path) -> np.ndarray:
diff --git a/tests/implicitron/test_blob_loader.py b/tests/implicitron/test_blob_loader.py
index 209830bbe..5d432ba69 100644
--- a/tests/implicitron/test_blob_loader.py
+++ b/tests/implicitron/test_blob_loader.py
@@ -15,6 +15,7 @@
     _load_depth_mask,
     _load_image,
     _load_mask,
+    _resize_image,
     BlobLoader,
 )
 from pytorch3d.implicitron.tools.config import expand_args_fields, get_default_args
@@ -127,7 +128,9 @@ def test_resize_image(self):
         path = os.path.join(self.dataset_root, self.frame_annotation.image.path)
         local_path = self.path_manager.get_local_path(path)
         image = _load_image(local_path)
-        image_rgb, scale, mask_crop = self.blob_loader._resize_image(image)
+        image_rgb, scale, mask_crop = _resize_image(
+            image, image_height=self.image_height, image_width=self.image_width
+        )
 
         original_shape = image.shape[-2:]
         expected_shape = (

From 76f45aa27dc5c99c576e9242b480267ceeb55db6 Mon Sep 17 00:00:00 2001
From: Ildar Salakhiev <ildar@fb.com>
Date: Fri, 10 Mar 2023 15:31:22 +0000
Subject: [PATCH 30/43] type in function arguments

---
 pytorch3d/implicitron/dataset/blob_loader.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pytorch3d/implicitron/dataset/blob_loader.py b/pytorch3d/implicitron/dataset/blob_loader.py
index bedbc070e..ce15f116a 100644
--- a/pytorch3d/implicitron/dataset/blob_loader.py
+++ b/pytorch3d/implicitron/dataset/blob_loader.py
@@ -333,7 +333,7 @@ def _local_path(self, path: str) -> str:
 
 
 def _resize_image(
-    self, image, image_height, image_width, mode="bilinear"
+    image, image_height, image_width, mode="bilinear"
 ) -> Tuple[torch.Tensor, float, torch.Tensor]:
     if image_height is None or image_width is None:
         # skip the resizing

From e5d3a2b08c3e0c1ab2496ef55d9e3a012428ea7f Mon Sep 17 00:00:00 2001
From: Ildar Salakhiev <ildar@fb.com>
Date: Fri, 10 Mar 2023 15:44:12 +0000
Subject: [PATCH 31/43] moved tests for _resize_image to test_bbox

---
 tests/implicitron/test_bbox.py        | 18 ++++++++++++++++++
 tests/implicitron/test_blob_loader.py | 24 +-----------------------
 2 files changed, 19 insertions(+), 23 deletions(-)

diff --git a/tests/implicitron/test_bbox.py b/tests/implicitron/test_bbox.py
index 8dffd751d..4f518dfee 100644
--- a/tests/implicitron/test_bbox.py
+++ b/tests/implicitron/test_bbox.py
@@ -18,6 +18,7 @@
     _get_bbox_from_mask,
     _get_clamp_bbox,
     _rescale_bbox,
+    _resize_image,
 )
 
 from tests.common_testing import TestCaseMixin
@@ -121,3 +122,20 @@ def test_get_1d_bounds(self):
         bounds = _get_1d_bounds(array)
         # make nonzero 1d bounds of image
         self.assertClose(bounds, [1, 3])
+
+    def test_resize_image(self):
+        image = torch.rand(3, 300, 500)  # rgb image 300x500
+        expected_shape = (150, 250)
+
+        resized_image, scale, mask_crop = _resize_image(
+            image, image_height=expected_shape[0], image_width=expected_shape[1]
+        )
+
+        original_shape = image.shape[-2:]
+        expected_scale = min(
+            expected_shape[0] / original_shape[0], expected_shape[1] / original_shape[1]
+        )
+
+        self.assertEqual(scale, expected_scale)
+        self.assertEqual(resized_image.shape[-2:], expected_shape)
+        self.assertEqual(mask_crop.shape[-2:], expected_shape)
diff --git a/tests/implicitron/test_blob_loader.py b/tests/implicitron/test_blob_loader.py
index 5d432ba69..5634854e9 100644
--- a/tests/implicitron/test_blob_loader.py
+++ b/tests/implicitron/test_blob_loader.py
@@ -15,10 +15,9 @@
     _load_depth_mask,
     _load_image,
     _load_mask,
-    _resize_image,
     BlobLoader,
 )
-from pytorch3d.implicitron.tools.config import expand_args_fields, get_default_args
+from pytorch3d.implicitron.tools.config import get_default_args
 from pytorch3d.renderer.cameras import PerspectiveCameras
 
 from tests.common_testing import TestCaseMixin
@@ -124,27 +123,6 @@ def test_load(self):
         )
         self.assertEqual(type(camera), PerspectiveCameras)
 
-    def test_resize_image(self):
-        path = os.path.join(self.dataset_root, self.frame_annotation.image.path)
-        local_path = self.path_manager.get_local_path(path)
-        image = _load_image(local_path)
-        image_rgb, scale, mask_crop = _resize_image(
-            image, image_height=self.image_height, image_width=self.image_width
-        )
-
-        original_shape = image.shape[-2:]
-        expected_shape = (
-            self.image_height,
-            self.image_width,
-        )
-        expected_scale = min(
-            expected_shape[0] / original_shape[0], expected_shape[1] / original_shape[1]
-        )
-
-        self.assertEqual(scale, expected_scale)
-        self.assertEqual(image_rgb.shape[-2:], expected_shape)
-        self.assertEqual(mask_crop.shape[-2:], expected_shape)
-
     def test_load_image(self):
         path = os.path.join(self.dataset_root, self.frame_annotation.image.path)
         local_path = self.path_manager.get_local_path(path)

From 1ba1a3a6896ee947a7791df704c39ca436eff377 Mon Sep 17 00:00:00 2001
From: Ildar Salakhiev <ildar@fb.com>
Date: Fri, 10 Mar 2023 15:46:17 +0000
Subject: [PATCH 32/43] np array instead of tensor to resize_image

---
 tests/implicitron/test_bbox.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/implicitron/test_bbox.py b/tests/implicitron/test_bbox.py
index 4f518dfee..48a8421bb 100644
--- a/tests/implicitron/test_bbox.py
+++ b/tests/implicitron/test_bbox.py
@@ -124,7 +124,7 @@ def test_get_1d_bounds(self):
         self.assertClose(bounds, [1, 3])
 
     def test_resize_image(self):
-        image = torch.rand(3, 300, 500)  # rgb image 300x500
+        image = np.random.rand(3, 300, 500)  # rgb image 300x500
         expected_shape = (150, 250)
 
         resized_image, scale, mask_crop = _resize_image(

From cd9aa5ccd0bb3899cee14058277d99b7fc2daffa Mon Sep 17 00:00:00 2001
From: Ildar Salakhiev <ildar@fb.com>
Date: Mon, 13 Mar 2023 10:59:25 +0000
Subject: [PATCH 33/43] setting up default scale value to correct one

---
 pytorch3d/implicitron/dataset/blob_loader.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/pytorch3d/implicitron/dataset/blob_loader.py b/pytorch3d/implicitron/dataset/blob_loader.py
index ce15f116a..2d77e6c08 100644
--- a/pytorch3d/implicitron/dataset/blob_loader.py
+++ b/pytorch3d/implicitron/dataset/blob_loader.py
@@ -90,7 +90,10 @@ def load(
             frame_data.crop_bbox_xywh,
         ) = self._load_crop_fg_probability(entry)
 
-        scale = 1.0
+        scale = min(
+            self.image_height / entry.image.size[0],
+            self.image_width / entry.image.size[1],
+        )
         if self.load_images and entry.image is not None:
             # original image size
             frame_data.image_size_hw = _safe_as_tensor(entry.image.size, torch.long)

From ce9fd400d1ca3a2b7b767cda31ddc530d68eee1a Mon Sep 17 00:00:00 2001
From: Ildar Salakhiev <ildar@fb.com>
Date: Tue, 14 Mar 2023 10:48:26 +0000
Subject: [PATCH 34/43] renamed funciton to load_ to make more obvious inplace
 modification

---
 pytorch3d/implicitron/dataset/blob_loader.py        | 3 +--
 pytorch3d/implicitron/dataset/json_index_dataset.py | 3 ++-
 tests/implicitron/test_blob_loader.py               | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/pytorch3d/implicitron/dataset/blob_loader.py b/pytorch3d/implicitron/dataset/blob_loader.py
index 2d77e6c08..6d0dc7fa4 100644
--- a/pytorch3d/implicitron/dataset/blob_loader.py
+++ b/pytorch3d/implicitron/dataset/blob_loader.py
@@ -73,7 +73,7 @@ class BlobLoader:
     box_crop_context: float = 0.3
     path_manager: Any = None
 
-    def load(
+    def load_(
         self,
         frame_data: FrameData,
         entry: types.FrameAnnotation,
@@ -127,7 +127,6 @@ def load(
                 self._local_path(pcl_path), max_points=self.max_points
             )
             frame_data.sequence_point_cloud_path = pcl_path
-        return frame_data
 
     def _load_crop_fg_probability(
         self, entry: types.FrameAnnotation
diff --git a/pytorch3d/implicitron/dataset/json_index_dataset.py b/pytorch3d/implicitron/dataset/json_index_dataset.py
index cf63b9b43..636630680 100644
--- a/pytorch3d/implicitron/dataset/json_index_dataset.py
+++ b/pytorch3d/implicitron/dataset/json_index_dataset.py
@@ -443,9 +443,10 @@ def __getitem__(self, index) -> FrameData:
 
         # Optional field
         frame_data.frame_type = self._get_frame_type(self.frame_annots[index])
-        return self.blob_loader.load(
+        self.blob_loader.load_(
             frame_data, entry, self.seq_annots[entry.sequence_name]
         )
+        return frame_data
 
     def _load_frames(self) -> None:
         logger.info(f"Loading Co3D frames from {self.frame_annotations_file}.")
diff --git a/tests/implicitron/test_blob_loader.py b/tests/implicitron/test_blob_loader.py
index 5634854e9..fd8d8fd81 100644
--- a/tests/implicitron/test_blob_loader.py
+++ b/tests/implicitron/test_blob_loader.py
@@ -64,7 +64,7 @@ def test_fix_point_cloud_path(self):
         assert original_path in modified_path
         assert self.blob_loader.dataset_root in modified_path
 
-    def test_load(self):
+    def test_load_(self):
         (
             fg_probability,
             mask_path,

From f217eb1fcc2f3ee561aacab161488b2142aafb9a Mon Sep 17 00:00:00 2001
From: Ildar Salakhiev <ildar@fb.com>
Date: Tue, 14 Mar 2023 17:55:25 +0000
Subject: [PATCH 35/43] moved crop_by_bbox to FrameData as method

---
 pytorch3d/implicitron/dataset/blob_loader.py  | 181 ++----------------
 pytorch3d/implicitron/dataset/dataset_base.py |  32 ++++
 pytorch3d/implicitron/dataset/utils.py        | 101 ++++++++++
 tests/implicitron/test_bbox.py                |   5 +-
 4 files changed, 152 insertions(+), 167 deletions(-)

diff --git a/pytorch3d/implicitron/dataset/blob_loader.py b/pytorch3d/implicitron/dataset/blob_loader.py
index 6d0dc7fa4..ce59c542d 100644
--- a/pytorch3d/implicitron/dataset/blob_loader.py
+++ b/pytorch3d/implicitron/dataset/blob_loader.py
@@ -20,6 +20,9 @@
 from pytorch3d.io import IO
 from pytorch3d.renderer.cameras import PerspectiveCameras
 from pytorch3d.structures.pointclouds import Pointclouds
+from pytorch3d.implicitron.dataset.utils import (
+    _get_bbox_from_mask,
+)
 
 
 @dataclass
@@ -85,9 +88,7 @@ def load_(
         (
             frame_data.fg_probability,
             frame_data.mask_path,
-            frame_data.bbox_xywh,
-            clamp_bbox_xyxy,
-            frame_data.crop_bbox_xywh,
+            bbox_xywh,
         ) = self._load_crop_fg_probability(entry)
 
         scale = min(
@@ -103,23 +104,17 @@ def load_(
                 frame_data.image_path,
                 frame_data.mask_crop,
                 scale,
-            ) = self._load_crop_images(
-                entry, frame_data.fg_probability, clamp_bbox_xyxy
-            )
+            ) = self._load_crop_images(entry, frame_data.fg_probability)
 
         if self.load_depths and entry.depth is not None:
             (
                 frame_data.depth_map,
                 frame_data.depth_path,
                 frame_data.depth_mask,
-            ) = self._load_mask_depth(entry, clamp_bbox_xyxy, frame_data.fg_probability)
+            ) = self._load_mask_depth(entry, frame_data.fg_probability)
 
         if entry.viewpoint is not None:
-            frame_data.camera = self._get_pytorch3d_camera(
-                entry,
-                scale,
-                clamp_bbox_xyxy,
-            )
+            frame_data.camera = self._get_pytorch3d_camera(entry, scale)
 
         if self.load_point_clouds and seq_annotation.point_cloud is not None:
             pcl_path = self._fix_point_cloud_path(seq_annotation.point_cloud.path)
@@ -128,45 +123,28 @@ def load_(
             )
             frame_data.sequence_point_cloud_path = pcl_path
 
+        if self.box_crop:
+            frame_data.crop_by_bbox(bbox_xywh, self.box_crop_context, )
+
+        return frame_data
+
     def _load_crop_fg_probability(
         self, entry: types.FrameAnnotation
-    ) -> Tuple[
-        Optional[torch.Tensor],
-        Optional[str],
-        Optional[torch.Tensor],
-        Optional[torch.Tensor],
-        Optional[torch.Tensor],
-    ]:
+    ) -> Tuple[Optional[torch.Tensor],Optional[str],Optional[torch.Tensor]]:
         fg_probability = None
         full_path = None
         bbox_xywh = None
-        clamp_bbox_xyxy = None
-        crop_box_xywh = None
 
-        if (self.load_masks or self.box_crop) and entry.mask is not None:
+        if (self.load_masks) and entry.mask is not None:
             full_path = os.path.join(self.dataset_root, entry.mask.path)
             mask = _load_mask(self._local_path(full_path))
+            bbox_xywh = torch.tensor(_get_bbox_from_mask(self.mask, self.box_crop_mask_thr))
 
             if mask.shape[-2:] != entry.image.size:
                 raise ValueError(
                     f"bad mask size: {mask.shape[-2:]} vs {entry.image.size}!"
                 )
 
-            bbox_xywh = torch.tensor(_get_bbox_from_mask(mask, self.box_crop_mask_thr))
-
-            if self.box_crop:
-                clamp_bbox_xyxy = _clamp_box_to_image_bounds_and_round(
-                    _get_clamp_bbox(
-                        bbox_xywh,
-                        image_path=entry.image.path,
-                        box_crop_context=self.box_crop_context,
-                    ),
-                    image_size_hw=tuple(mask.shape[-2:]),
-                )
-                crop_box_xywh = _bbox_xyxy_to_xywh(clamp_bbox_xyxy)
-
-                mask = _crop_around_box(mask, clamp_bbox_xyxy, full_path)
-
             fg_probability, _, _ = _resize_image(
                 mask,
                 image_height=self.image_height,
@@ -174,13 +152,12 @@ def _load_crop_fg_probability(
                 mode="nearest",
             )
 
-        return fg_probability, full_path, bbox_xywh, clamp_bbox_xyxy, crop_box_xywh
+        return fg_probability, full_path, bbox_xywh
 
     def _load_crop_images(
         self,
         entry: types.FrameAnnotation,
         fg_probability: Optional[torch.Tensor],
-        clamp_bbox_xyxy: Optional[torch.Tensor],
     ) -> Tuple[torch.Tensor, str, torch.Tensor, float]:
         assert self.dataset_root is not None and entry.image is not None
         path = os.path.join(self.dataset_root, entry.image.path)
@@ -191,10 +168,6 @@ def _load_crop_images(
                 f"bad image size: {image_rgb.shape[-2:]} vs {entry.image.size}!"
             )
 
-        if self.box_crop:
-            assert clamp_bbox_xyxy is not None
-            image_rgb = _crop_around_box(image_rgb, clamp_bbox_xyxy, path)
-
         image_rgb, scale, mask_crop = _resize_image(
             image_rgb, image_height=self.image_height, image_width=self.image_width
         )
@@ -208,7 +181,6 @@ def _load_crop_images(
     def _load_mask_depth(
         self,
         entry: types.FrameAnnotation,
-        clamp_bbox_xyxy: Optional[torch.Tensor],
         fg_probability: Optional[torch.Tensor],
     ) -> Tuple[torch.Tensor, str, torch.Tensor]:
         entry_depth = entry.depth
@@ -216,13 +188,6 @@ def _load_mask_depth(
         path = os.path.join(self.dataset_root, entry_depth.path)
         depth_map = _load_depth(self._local_path(path), entry_depth.scale_adjustment)
 
-        if self.box_crop:
-            assert clamp_bbox_xyxy is not None
-            depth_bbox_xyxy = _rescale_bbox(
-                clamp_bbox_xyxy, entry.image.size, depth_map.shape[-2:]
-            )
-            depth_map = _crop_around_box(depth_map, depth_bbox_xyxy, path)
-
         depth_map, _, _ = _resize_image(
             depth_map,
             image_height=self.image_height,
@@ -239,15 +204,6 @@ def _load_mask_depth(
             mask_path = os.path.join(self.dataset_root, entry_depth.mask_path)
             depth_mask = _load_depth_mask(self._local_path(mask_path))
 
-            if self.box_crop:
-                assert clamp_bbox_xyxy is not None
-                depth_mask_bbox_xyxy = _rescale_bbox(
-                    clamp_bbox_xyxy, entry.image.size, depth_mask.shape[-2:]
-                )
-                depth_mask = _crop_around_box(
-                    depth_mask, depth_mask_bbox_xyxy, mask_path
-                )
-
             depth_mask, _, _ = _resize_image(
                 depth_mask,
                 image_height=self.image_height,
@@ -263,7 +219,6 @@ def _get_pytorch3d_camera(
         self,
         entry: types.FrameAnnotation,
         scale: float,
-        clamp_bbox_xyxy: Optional[torch.Tensor],
     ) -> PerspectiveCameras:
         entry_viewpoint = entry.viewpoint
         assert entry_viewpoint is not None
@@ -290,9 +245,6 @@ def _get_pytorch3d_camera(
         # principal point and focal length in pixels
         principal_point_px = half_image_size_wh_orig - principal_point * rescale
         focal_length_px = focal_length * rescale
-        if self.box_crop:
-            assert clamp_bbox_xyxy is not None
-            principal_point_px -= clamp_bbox_xyxy[:2]
 
         # now, convert from pixels to PyTorch3D v0.5+ NDC convention
         if self.image_height is None or self.image_width is None:
@@ -375,84 +327,6 @@ def _load_mask(path) -> np.ndarray:
     return mask[None]  # fake feature channel
 
 
-def _get_bbox_from_mask(
-    mask, thr, decrease_quant: float = 0.05
-) -> Tuple[int, int, int, int]:
-    # bbox in xywh
-    masks_for_box = np.zeros_like(mask)
-    while masks_for_box.sum() <= 1.0:
-        masks_for_box = (mask > thr).astype(np.float32)
-        thr -= decrease_quant
-    if thr <= 0.0:
-        warnings.warn(
-            f"Empty masks_for_bbox (thr={thr}) => using full image.", stacklevel=1
-        )
-
-    x0, x1 = _get_1d_bounds(masks_for_box.sum(axis=-2))
-    y0, y1 = _get_1d_bounds(masks_for_box.sum(axis=-1))
-
-    return x0, y0, x1 - x0, y1 - y0
-
-
-def _crop_around_box(tensor, bbox, impath: str = ""):
-    # bbox is xyxy, where the upper bound is corrected with +1
-    bbox = _clamp_box_to_image_bounds_and_round(
-        bbox,
-        image_size_hw=tensor.shape[-2:],
-    )
-    tensor = tensor[..., bbox[1] : bbox[3], bbox[0] : bbox[2]]
-    assert all(c > 0 for c in tensor.shape), f"squashed image {impath}"
-    return tensor
-
-
-def _clamp_box_to_image_bounds_and_round(
-    bbox_xyxy: torch.Tensor,
-    image_size_hw: Tuple[int, int],
-) -> torch.LongTensor:
-    bbox_xyxy = bbox_xyxy.clone()
-    bbox_xyxy[[0, 2]] = torch.clamp(bbox_xyxy[[0, 2]], 0, image_size_hw[-1])
-    bbox_xyxy[[1, 3]] = torch.clamp(bbox_xyxy[[1, 3]], 0, image_size_hw[-2])
-    if not isinstance(bbox_xyxy, torch.LongTensor):
-        bbox_xyxy = bbox_xyxy.round().long()
-    return bbox_xyxy  # pyre-ignore [7]
-
-
-def _get_clamp_bbox(
-    bbox: torch.Tensor,
-    box_crop_context: float = 0.0,
-    image_path: str = "",
-) -> torch.Tensor:
-    # box_crop_context: rate of expansion for bbox
-    # returns possibly expanded bbox xyxy as float
-
-    bbox = bbox.clone()  # do not edit bbox in place
-
-    # increase box size
-    if box_crop_context > 0.0:
-        c = box_crop_context
-        bbox = bbox.float()
-        bbox[0] -= bbox[2] * c / 2
-        bbox[1] -= bbox[3] * c / 2
-        bbox[2] += bbox[2] * c
-        bbox[3] += bbox[3] * c
-
-    if (bbox[2:] <= 1.0).any():
-        raise ValueError(
-            f"squashed image {image_path}!! The bounding box contains no pixels."
-        )
-
-    bbox[2:] = torch.clamp(bbox[2:], 2)  # set min height, width to 2 along both axes
-    bbox_xyxy = _bbox_xywh_to_xyxy(bbox, clamp_size=2)
-
-    return bbox_xyxy
-
-
-def _bbox_xyxy_to_xywh(xyxy: torch.Tensor) -> torch.Tensor:
-    wh = xyxy[2:] - xyxy[:2]
-    xywh = torch.cat([xyxy[:2], wh])
-    return xywh
-
-
 def _load_depth(path, scale_adjustment) -> np.ndarray:
     if not path.lower().endswith(".png"):
         raise ValueError('unsupported depth file name "%s"' % path)
@@ -474,14 +348,6 @@ def _load_16big_png_depth(depth_png) -> np.ndarray:
     return depth
 
 
-def _rescale_bbox(bbox: torch.Tensor, orig_res, new_res) -> torch.Tensor:
-    assert bbox is not None
-    assert np.prod(orig_res) > 1e-8
-    # average ratio of dimensions
-    rel_size = (new_res[0] / orig_res[0] + new_res[1] / orig_res[1]) / 2.0
-    return bbox * rel_size
-
-
 def _load_1bit_png_mask(file: str) -> np.ndarray:
     with Image.open(file) as pil_im:
         mask = (np.array(pil_im.convert("L")) > 0.0).astype(np.float32)
@@ -495,21 +361,6 @@ def _load_depth_mask(path: str) -> np.ndarray:
     return m[None]  # fake feature channel
 
 
-def _get_1d_bounds(arr) -> Tuple[int, int]:
-    nz = np.flatnonzero(arr)
-    return nz[0], nz[-1] + 1
-
-
-def _bbox_xywh_to_xyxy(
-    xywh: torch.Tensor, clamp_size: Optional[int] = None
-) -> torch.Tensor:
-    xyxy = xywh.clone()
-    if clamp_size is not None:
-        xyxy[2:] = torch.clamp(xyxy[2:], clamp_size)
-    xyxy[2:] += xyxy[:2]
-    return xyxy
-
-
 def _safe_as_tensor(data, dtype):
     return torch.tensor(data, dtype=dtype) if data is not None else None
 
diff --git a/pytorch3d/implicitron/dataset/dataset_base.py b/pytorch3d/implicitron/dataset/dataset_base.py
index 283ef3dcd..322d1889b 100644
--- a/pytorch3d/implicitron/dataset/dataset_base.py
+++ b/pytorch3d/implicitron/dataset/dataset_base.py
@@ -26,6 +26,13 @@
 from pytorch3d.renderer.camera_utils import join_cameras_as_batch
 from pytorch3d.renderer.cameras import CamerasBase, PerspectiveCameras
 from pytorch3d.structures.pointclouds import join_pointclouds_as_batch, Pointclouds
+from pytorch3d.implicitron.dataset.utils import (
+    _crop_around_box,
+    _clamp_box_to_image_bounds_and_round,
+    _bbox_xyxy_to_xywh,
+    _get_clamp_bbox,
+    _rescale_bbox,
+)
 
 
 @dataclass
@@ -144,6 +151,31 @@ def __getitem__(self, key):
     def __len__(self):
         return len(fields(self))
 
+    def crop_by_bbox(self, bbox_xywh, box_crop_context):
+        clamp_bbox_xyxy = _clamp_box_to_image_bounds_and_round(
+            _get_clamp_bbox(
+                bbox_xywh,
+                image_path=self.image.path,
+                box_crop_context=box_crop_context,
+            ),
+            image_size_hw=tuple(self.fg_probability.shape[-2:]),
+        )
+        self.crop_bbox_xywh = _bbox_xyxy_to_xywh(clamp_bbox_xyxy)
+
+        self.fg_probability = _crop_around_box(
+            self.fg_probability, clamp_bbox_xyxy, self.mask_path
+        )
+        self.image_rgb = _crop_around_box(self.image_rgb, clamp_bbox_xyxy, self.image.path)
+
+        depth_bbox_xyxy = _rescale_bbox(clamp_bbox_xyxy, entry.image.size, self.depth_map.shape[-2:])
+        self.depth_map = _crop_around_box(self.depth_map, depth_bbox_xyxy, self.depth_path)
+
+        depth_mask_bbox_xyxy = _rescale_bbox(clamp_bbox_xyxy, entry.image.size, self.depth_mask.shape[-2:])
+        self.depth_mask = _crop_around_box(self.depth_mask, depth_mask_bbox_xyxy, self.mask_path)
+
+
+        principal_point_px -= clamp_bbox_xyxy[:2]
+
     @classmethod
     def collate(cls, batch):
         """
diff --git a/pytorch3d/implicitron/dataset/utils.py b/pytorch3d/implicitron/dataset/utils.py
index 05252aff1..b2ac99f36 100644
--- a/pytorch3d/implicitron/dataset/utils.py
+++ b/pytorch3d/implicitron/dataset/utils.py
@@ -52,3 +52,104 @@ def is_train_frame(
         dtype=torch.bool,
         device=device,
     )
+
+
+def _get_bbox_from_mask(
+        mask, thr, decrease_quant: float = 0.05
+    ) -> Tuple[int, int, int, int]:
+        # bbox in xywh
+        masks_for_box = np.zeros_like(mask)
+        while masks_for_box.sum() <= 1.0:
+            masks_for_box = (mask > thr).astype(np.float32)
+            thr -= decrease_quant
+        if thr <= 0.0:
+            warnings.warn(
+                f"Empty masks_for_bbox (thr={thr}) => using full image.", stacklevel=1
+            )
+
+        x0, x1 = _get_1d_bounds(masks_for_box.sum(axis=-2))
+        y0, y1 = _get_1d_bounds(masks_for_box.sum(axis=-1))
+
+        return x0, y0, x1 - x0, y1 - y0
+
+
+def _crop_around_box(tensor, bbox, impath: str = ""):
+    # bbox is xyxy, where the upper bound is corrected with +1
+    bbox = _clamp_box_to_image_bounds_and_round(
+        bbox,
+        image_size_hw=tensor.shape[-2:],
+    )
+    tensor = tensor[..., bbox[1] : bbox[3], bbox[0] : bbox[2]]
+    assert all(c > 0 for c in tensor.shape), f"squashed image {impath}"
+    return tensor
+
+
+def _clamp_box_to_image_bounds_and_round(
+    bbox_xyxy: torch.Tensor,
+    image_size_hw: Tuple[int, int],
+) -> torch.LongTensor:
+    bbox_xyxy = bbox_xyxy.clone()
+    bbox_xyxy[[0, 2]] = torch.clamp(bbox_xyxy[[0, 2]], 0, image_size_hw[-1])
+    bbox_xyxy[[1, 3]] = torch.clamp(bbox_xyxy[[1, 3]], 0, image_size_hw[-2])
+    if not isinstance(bbox_xyxy, torch.LongTensor):
+        bbox_xyxy = bbox_xyxy.round().long()
+    return bbox_xyxy  # pyre-ignore [7]
+
+
+def _bbox_xyxy_to_xywh(xyxy: torch.Tensor) -> torch.Tensor:
+    wh = xyxy[2:] - xyxy[:2]
+    xywh = torch.cat([xyxy[:2], wh])
+    return xywh
+
+
+def _get_clamp_bbox(
+    bbox: torch.Tensor,
+    box_crop_context: float = 0.0,
+    image_path: str = "",
+) -> torch.Tensor:
+    # box_crop_context: rate of expansion for bbox
+    # returns possibly expanded bbox xyxy as float
+
+    bbox = bbox.clone()  # do not edit bbox in place
+
+    # increase box size
+    if box_crop_context > 0.0:
+        c = box_crop_context
+        bbox = bbox.float()
+        bbox[0] -= bbox[2] * c / 2
+        bbox[1] -= bbox[3] * c / 2
+        bbox[2] += bbox[2] * c
+        bbox[3] += bbox[3] * c
+
+    if (bbox[2:] <= 1.0).any():
+        raise ValueError(
+            f"squashed image {image_path}!! The bounding box contains no pixels."
+        )
+
+    bbox[2:] = torch.clamp(bbox[2:], 2)  # set min height, width to 2 along both axes
+    bbox_xyxy = _bbox_xywh_to_xyxy(bbox, clamp_size=2)
+
+    return bbox_xyxy
+
+
+def _rescale_bbox(bbox: torch.Tensor, orig_res, new_res) -> torch.Tensor:
+    assert bbox is not None
+    assert np.prod(orig_res) > 1e-8
+    # average ratio of dimensions
+    rel_size = (new_res[0] / orig_res[0] + new_res[1] / orig_res[1]) / 2.0
+    return bbox * rel_size
+
+
+def _bbox_xywh_to_xyxy(
+    xywh: torch.Tensor, clamp_size: Optional[int] = None
+) -> torch.Tensor:
+    xyxy = xywh.clone()
+    if clamp_size is not None:
+        xyxy[2:] = torch.clamp(xyxy[2:], clamp_size)
+    xyxy[2:] += xyxy[:2]
+    return xyxy
+
+
+def _get_1d_bounds(arr) -> Tuple[int, int]:
+    nz = np.flatnonzero(arr)
+    return nz[0], nz[-1] + 1
diff --git a/tests/implicitron/test_bbox.py b/tests/implicitron/test_bbox.py
index 48a8421bb..16199ad1e 100644
--- a/tests/implicitron/test_bbox.py
+++ b/tests/implicitron/test_bbox.py
@@ -9,7 +9,9 @@
 import numpy as np
 
 import torch
-from pytorch3d.implicitron.dataset.blob_loader import (
+from pytorch3d.implicitron.dataset.blob_loader import _resize_image
+
+from pytorch3d.implicitron.dataset.utils import (
     _bbox_xywh_to_xyxy,
     _bbox_xyxy_to_xywh,
     _clamp_box_to_image_bounds_and_round,
@@ -18,7 +20,6 @@
     _get_bbox_from_mask,
     _get_clamp_bbox,
     _rescale_bbox,
-    _resize_image,
 )
 
 from tests.common_testing import TestCaseMixin

From 664d35d66de59e815f3feec581d6ad80bc0bdea0 Mon Sep 17 00:00:00 2001
From: Ildar Salakhiev <ildar@fb.com>
Date: Tue, 14 Mar 2023 18:16:40 +0000
Subject: [PATCH 36/43] tests fix, typos, linter

---
 pytorch3d/implicitron/dataset/blob_loader.py | 11 +++---
 pytorch3d/implicitron/dataset/utils.py       | 37 +++++++++++---------
 tests/implicitron/test_blob_loader.py        | 17 ++++-----
 3 files changed, 30 insertions(+), 35 deletions(-)

diff --git a/pytorch3d/implicitron/dataset/blob_loader.py b/pytorch3d/implicitron/dataset/blob_loader.py
index ce59c542d..fa3a5ac29 100644
--- a/pytorch3d/implicitron/dataset/blob_loader.py
+++ b/pytorch3d/implicitron/dataset/blob_loader.py
@@ -6,7 +6,6 @@
 
 import functools
 import os
-import warnings
 from dataclasses import dataclass
 from pathlib import Path
 from typing import Any, Optional, Tuple, Union
@@ -17,12 +16,10 @@
 
 from pytorch3d.implicitron.dataset import types
 from pytorch3d.implicitron.dataset.dataset_base import FrameData
+from pytorch3d.implicitron.dataset.utils import _get_bbox_from_mask
 from pytorch3d.io import IO
 from pytorch3d.renderer.cameras import PerspectiveCameras
 from pytorch3d.structures.pointclouds import Pointclouds
-from pytorch3d.implicitron.dataset.utils import (
-    _get_bbox_from_mask,
-)
 
 
 @dataclass
@@ -124,13 +121,13 @@ def load_(
             frame_data.sequence_point_cloud_path = pcl_path
 
         if self.box_crop:
-            frame_data.crop_by_bbox(bbox_xywh, self.box_crop_context, )
+            frame_data.crop_by_bbox(bbox_xywh, self.box_crop_context)
 
         return frame_data
 
     def _load_crop_fg_probability(
         self, entry: types.FrameAnnotation
-    ) -> Tuple[Optional[torch.Tensor],Optional[str],Optional[torch.Tensor]]:
+    ) -> Tuple[Optional[torch.Tensor], Optional[str], Optional[torch.Tensor]]:
         fg_probability = None
         full_path = None
         bbox_xywh = None
@@ -138,7 +135,7 @@ def _load_crop_fg_probability(
         if (self.load_masks) and entry.mask is not None:
             full_path = os.path.join(self.dataset_root, entry.mask.path)
             mask = _load_mask(self._local_path(full_path))
-            bbox_xywh = torch.tensor(_get_bbox_from_mask(self.mask, self.box_crop_mask_thr))
+            bbox_xywh = torch.tensor(_get_bbox_from_mask(mask, self.box_crop_mask_thr))
 
             if mask.shape[-2:] != entry.image.size:
                 raise ValueError(
diff --git a/pytorch3d/implicitron/dataset/utils.py b/pytorch3d/implicitron/dataset/utils.py
index b2ac99f36..6e9af933d 100644
--- a/pytorch3d/implicitron/dataset/utils.py
+++ b/pytorch3d/implicitron/dataset/utils.py
@@ -5,7 +5,10 @@
 # LICENSE file in the root directory of this source tree.
 
 
-from typing import List, Optional
+import warnings
+from typing import List, Optional, Tuple
+
+import numpy as np
 
 import torch
 
@@ -55,22 +58,22 @@ def is_train_frame(
 
 
 def _get_bbox_from_mask(
-        mask, thr, decrease_quant: float = 0.05
-    ) -> Tuple[int, int, int, int]:
-        # bbox in xywh
-        masks_for_box = np.zeros_like(mask)
-        while masks_for_box.sum() <= 1.0:
-            masks_for_box = (mask > thr).astype(np.float32)
-            thr -= decrease_quant
-        if thr <= 0.0:
-            warnings.warn(
-                f"Empty masks_for_bbox (thr={thr}) => using full image.", stacklevel=1
-            )
-
-        x0, x1 = _get_1d_bounds(masks_for_box.sum(axis=-2))
-        y0, y1 = _get_1d_bounds(masks_for_box.sum(axis=-1))
-
-        return x0, y0, x1 - x0, y1 - y0
+    mask, thr, decrease_quant: float = 0.05
+) -> Tuple[int, int, int, int]:
+    # bbox in xywh
+    masks_for_box = np.zeros_like(mask)
+    while masks_for_box.sum() <= 1.0:
+        masks_for_box = (mask > thr).astype(np.float32)
+        thr -= decrease_quant
+    if thr <= 0.0:
+        warnings.warn(
+            f"Empty masks_for_bbox (thr={thr}) => using full image.", stacklevel=1
+        )
+
+    x0, x1 = _get_1d_bounds(masks_for_box.sum(axis=-2))
+    y0, y1 = _get_1d_bounds(masks_for_box.sum(axis=-1))
+
+    return x0, y0, x1 - x0, y1 - y0
 
 
 def _crop_around_box(tensor, bbox, impath: str = ""):
diff --git a/tests/implicitron/test_blob_loader.py b/tests/implicitron/test_blob_loader.py
index fd8d8fd81..d2a612d48 100644
--- a/tests/implicitron/test_blob_loader.py
+++ b/tests/implicitron/test_blob_loader.py
@@ -69,25 +69,22 @@ def test_load_(self):
             fg_probability,
             mask_path,
             bbox_xywh,
-            clamp_bbox_xyxy,
-            crop_bbox_xywh,
         ) = self.blob_loader._load_crop_fg_probability(self.frame_annotation)
 
         assert mask_path
         assert torch.is_tensor(fg_probability)
         assert torch.is_tensor(bbox_xywh)
-        assert torch.is_tensor(clamp_bbox_xyxy)
-        assert torch.is_tensor(crop_bbox_xywh)
         # assert bboxes shape
         self.assertEqual(
             fg_probability.shape, torch.Size([1, self.image_height, self.image_width])
         )
         self.assertEqual(bbox_xywh.shape, torch.Size([4]))
-        self.assertEqual(clamp_bbox_xyxy.shape, torch.Size([4]))
-        self.assertEqual(crop_bbox_xywh.shape, torch.Size([4]))
-        (image_rgb, image_path, mask_crop, scale,) = self.blob_loader._load_crop_images(
-            self.frame_annotation, fg_probability, clamp_bbox_xyxy
-        )
+        (
+            image_rgb,
+            image_path,
+            mask_crop,
+            scale,
+        ) = self.blob_loader._load_crop_images(self.frame_annotation, fg_probability)
         assert torch.is_tensor(image_rgb)
         assert image_path
         assert torch.is_tensor(mask_crop)
@@ -102,7 +99,6 @@ def test_load_(self):
 
         (depth_map, depth_path, depth_mask,) = self.blob_loader._load_mask_depth(
             self.frame_annotation,
-            clamp_bbox_xyxy,
             fg_probability,
         )
         assert torch.is_tensor(depth_map)
@@ -119,7 +115,6 @@ def test_load_(self):
         camera = self.blob_loader._get_pytorch3d_camera(
             self.frame_annotation,
             scale,
-            clamp_bbox_xyxy,
         )
         self.assertEqual(type(camera), PerspectiveCameras)
 

From 5c249db0a0160cf9c1b4043634a4e0a495cff6e1 Mon Sep 17 00:00:00 2001
From: Ildar Salakhiev <ildar@fb.com>
Date: Tue, 14 Mar 2023 18:25:50 +0000
Subject: [PATCH 37/43] renamed crop to crop_ to show inplace modification

---
 pytorch3d/implicitron/dataset/blob_loader.py  | 4 ++--
 pytorch3d/implicitron/dataset/dataset_base.py | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/pytorch3d/implicitron/dataset/blob_loader.py b/pytorch3d/implicitron/dataset/blob_loader.py
index fa3a5ac29..19417a639 100644
--- a/pytorch3d/implicitron/dataset/blob_loader.py
+++ b/pytorch3d/implicitron/dataset/blob_loader.py
@@ -85,7 +85,7 @@ def load_(
         (
             frame_data.fg_probability,
             frame_data.mask_path,
-            bbox_xywh,
+            frame_data.bbox_xywh,
         ) = self._load_crop_fg_probability(entry)
 
         scale = min(
@@ -121,7 +121,7 @@ def load_(
             frame_data.sequence_point_cloud_path = pcl_path
 
         if self.box_crop:
-            frame_data.crop_by_bbox(bbox_xywh, self.box_crop_context)
+            frame_data.crop_by_bbox_(self.box_crop_context)
 
         return frame_data
 
diff --git a/pytorch3d/implicitron/dataset/dataset_base.py b/pytorch3d/implicitron/dataset/dataset_base.py
index 322d1889b..7ddc9e122 100644
--- a/pytorch3d/implicitron/dataset/dataset_base.py
+++ b/pytorch3d/implicitron/dataset/dataset_base.py
@@ -151,10 +151,10 @@ def __getitem__(self, key):
     def __len__(self):
         return len(fields(self))
 
-    def crop_by_bbox(self, bbox_xywh, box_crop_context):
+    def crop_by_bbox_(self, box_crop_context):
         clamp_bbox_xyxy = _clamp_box_to_image_bounds_and_round(
             _get_clamp_bbox(
-                bbox_xywh,
+                self.bbox_xywh,
                 image_path=self.image.path,
                 box_crop_context=box_crop_context,
             ),

From 530b9a42d1ebfde8afa92dc3bded73f18d6e0a25 Mon Sep 17 00:00:00 2001
From: Ildar Salakhiev <ildar@fb.com>
Date: Tue, 14 Mar 2023 18:33:44 +0000
Subject: [PATCH 38/43] shifting camera according to bbox

---
 pytorch3d/implicitron/dataset/dataset_base.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pytorch3d/implicitron/dataset/dataset_base.py b/pytorch3d/implicitron/dataset/dataset_base.py
index 7ddc9e122..2c1bb7527 100644
--- a/pytorch3d/implicitron/dataset/dataset_base.py
+++ b/pytorch3d/implicitron/dataset/dataset_base.py
@@ -173,8 +173,7 @@ def crop_by_bbox_(self, box_crop_context):
         depth_mask_bbox_xyxy = _rescale_bbox(clamp_bbox_xyxy, entry.image.size, self.depth_mask.shape[-2:])
         self.depth_mask = _crop_around_box(self.depth_mask, depth_mask_bbox_xyxy, self.mask_path)
 
-
-        principal_point_px -= clamp_bbox_xyxy[:2]
+        self.camera.principal_point_px -= clamp_bbox_xyxy[:2]
 
     @classmethod
     def collate(cls, batch):

From e5500f329d3016740af50f4dc420b07d1383942a Mon Sep 17 00:00:00 2001
From: Ildar Salakhiev <ildar@fb.com>
Date: Wed, 15 Mar 2023 18:22:18 +0000
Subject: [PATCH 39/43] delegated reize_image to FrameData, made bbox_xywh
 optinal external parameter for load_, linter, fbcode tests

---
 pytorch3d/implicitron/dataset/blob_loader.py  | 130 ++++++-----------
 pytorch3d/implicitron/dataset/dataset_base.py | 118 +++++++++++++---
 .../implicitron/dataset/json_index_dataset.py |  44 ++++--
 pytorch3d/implicitron/dataset/utils.py        |  29 ++++
 pytorch3d/implicitron/dataset/visualize.py    |   1 +
 tests/implicitron/test_bbox.py                |   2 +-
 tests/implicitron/test_blob_loader.py         | 131 +++++++++++++-----
 7 files changed, 307 insertions(+), 148 deletions(-)

diff --git a/pytorch3d/implicitron/dataset/blob_loader.py b/pytorch3d/implicitron/dataset/blob_loader.py
index 19417a639..9ccf53b2f 100644
--- a/pytorch3d/implicitron/dataset/blob_loader.py
+++ b/pytorch3d/implicitron/dataset/blob_loader.py
@@ -38,23 +38,23 @@ class BlobLoader:
         load_masks: Enable loading frame foreground masks.
         load_point_clouds: Enable loading sequence-level point clouds.
         max_points: Cap on the number of loaded points in the point cloud;
-                if reached, they are randomly sampled without replacement.
+            if reached, they are randomly sampled without replacement.
         mask_images: Whether to mask the images with the loaded foreground masks;
-                0 value is used for background.
+            0 value is used for background.
         mask_depths: Whether to mask the depth maps with the loaded foreground
             masks; 0 value is used for background.
         image_height: The height of the returned images, masks, and depth maps;
-                aspect ratio is preserved during cropping/resizing.
+            aspect ratio is preserved during cropping/resizing.
         image_width: The width of the returned images, masks, and depth maps;
             aspect ratio is preserved during cropping/resizing.
         box_crop: Enable cropping of the image around the bounding box inferred
-                from the foreground region of the loaded segmentation mask; masks
-                and depth maps are cropped accordingly; cameras are corrected.
+            from the foreground region of the loaded segmentation mask; masks
+            and depth maps are cropped accordingly; cameras are corrected.
         box_crop_mask_thr: The threshold used to separate pixels into foreground
-                and background based on the foreground_probability mask; if no value
-                is greater than this threshold, the loader lowers it and repeats.
+            and background based on the foreground_probability mask; if no value
+            is greater than this threshold, the loader lowers it and repeats.
         box_crop_context: The amount of additional padding added to each
-                dimension of the cropping bounding box, relative to box size.
+            dimension of the cropping bounding box, relative to box size.
     """
 
     dataset_root: str = ""
@@ -78,20 +78,18 @@ def load_(
         frame_data: FrameData,
         entry: types.FrameAnnotation,
         seq_annotation: types.SequenceAnnotation,
+        bbox_xywh: Optional[torch.Tensor] = None,
     ) -> FrameData:
         """Main method for loader.
         FrameData modification done inplace
+        if bbox_xywh not provided bbox will be calculated from mask
         """
         (
             frame_data.fg_probability,
             frame_data.mask_path,
             frame_data.bbox_xywh,
-        ) = self._load_crop_fg_probability(entry)
+        ) = self._load_fg_probability(entry, bbox_xywh)
 
-        scale = min(
-            self.image_height / entry.image.size[0],
-            self.image_width / entry.image.size[1],
-        )
         if self.load_images and entry.image is not None:
             # original image size
             frame_data.image_size_hw = _safe_as_tensor(entry.image.size, torch.long)
@@ -99,9 +97,7 @@ def load_(
             (
                 frame_data.image_rgb,
                 frame_data.image_path,
-                frame_data.mask_crop,
-                scale,
-            ) = self._load_crop_images(entry, frame_data.fg_probability)
+            ) = self._load_images(entry, frame_data.fg_probability)
 
         if self.load_depths and entry.depth is not None:
             (
@@ -110,9 +106,6 @@ def load_(
                 frame_data.depth_mask,
             ) = self._load_mask_depth(entry, frame_data.fg_probability)
 
-        if entry.viewpoint is not None:
-            frame_data.camera = self._get_pytorch3d_camera(entry, scale)
-
         if self.load_point_clouds and seq_annotation.point_cloud is not None:
             pcl_path = self._fix_point_cloud_path(seq_annotation.point_cloud.path)
             frame_data.sequence_point_cloud = _load_pointcloud(
@@ -120,42 +113,50 @@ def load_(
             )
             frame_data.sequence_point_cloud_path = pcl_path
 
+        clamp_bbox_xyxy = None
         if self.box_crop:
-            frame_data.crop_by_bbox_(self.box_crop_context)
+            clamp_bbox_xyxy = frame_data.crop_by_bbox_(self.box_crop_context)
+
+        scale = 1.0
+
+        if self.image_height is not None and self.image_width is not None:
+            scale = frame_data.resize_frame_(self.image_height, self.image_width)
 
+        # creating camera taking to account bbox and resize scale
+        if entry.viewpoint is not None:
+            frame_data.camera = self._get_pytorch3d_camera(
+                entry, scale, clamp_bbox_xyxy
+            )
         return frame_data
 
-    def _load_crop_fg_probability(
-        self, entry: types.FrameAnnotation
+    def _load_fg_probability(
+        self,
+        entry: types.FrameAnnotation,
+        bbox_xywh: Optional[torch.Tensor],
     ) -> Tuple[Optional[torch.Tensor], Optional[str], Optional[torch.Tensor]]:
         fg_probability = None
         full_path = None
-        bbox_xywh = None
 
         if (self.load_masks) and entry.mask is not None:
             full_path = os.path.join(self.dataset_root, entry.mask.path)
-            mask = _load_mask(self._local_path(full_path))
-            bbox_xywh = torch.tensor(_get_bbox_from_mask(mask, self.box_crop_mask_thr))
-
-            if mask.shape[-2:] != entry.image.size:
+            fg_probability = _load_mask(self._local_path(full_path))
+            # we can use provided bbox_xywh or calculate it based on mask
+            if bbox_xywh is None:
+                bbox_xywh = torch.tensor(
+                    _get_bbox_from_mask(fg_probability, self.box_crop_mask_thr)
+                )
+            if fg_probability.shape[-2:] != entry.image.size:
                 raise ValueError(
-                    f"bad mask size: {mask.shape[-2:]} vs {entry.image.size}!"
+                    f"bad mask size: {fg_probability.shape[-2:]} vs {entry.image.size}!"
                 )
 
-            fg_probability, _, _ = _resize_image(
-                mask,
-                image_height=self.image_height,
-                image_width=self.image_width,
-                mode="nearest",
-            )
-
-        return fg_probability, full_path, bbox_xywh
+        return torch.tensor(fg_probability), full_path, bbox_xywh
 
-    def _load_crop_images(
+    def _load_images(
         self,
         entry: types.FrameAnnotation,
         fg_probability: Optional[torch.Tensor],
-    ) -> Tuple[torch.Tensor, str, torch.Tensor, float]:
+    ) -> Tuple[torch.Tensor, str]:
         assert self.dataset_root is not None and entry.image is not None
         path = os.path.join(self.dataset_root, entry.image.path)
         image_rgb = _load_image(self._local_path(path))
@@ -165,15 +166,11 @@ def _load_crop_images(
                 f"bad image size: {image_rgb.shape[-2:]} vs {entry.image.size}!"
             )
 
-        image_rgb, scale, mask_crop = _resize_image(
-            image_rgb, image_height=self.image_height, image_width=self.image_width
-        )
-
         if self.mask_images:
             assert fg_probability is not None
             image_rgb *= fg_probability
 
-        return image_rgb, path, mask_crop, scale
+        return image_rgb, path
 
     def _load_mask_depth(
         self,
@@ -185,13 +182,6 @@ def _load_mask_depth(
         path = os.path.join(self.dataset_root, entry_depth.path)
         depth_map = _load_depth(self._local_path(path), entry_depth.scale_adjustment)
 
-        depth_map, _, _ = _resize_image(
-            depth_map,
-            image_height=self.image_height,
-            image_width=self.image_width,
-            mode="nearest",
-        )
-
         if self.mask_depths:
             assert fg_probability is not None
             depth_map *= fg_probability
@@ -200,22 +190,16 @@ def _load_mask_depth(
             assert entry_depth.mask_path is not None
             mask_path = os.path.join(self.dataset_root, entry_depth.mask_path)
             depth_mask = _load_depth_mask(self._local_path(mask_path))
-
-            depth_mask, _, _ = _resize_image(
-                depth_mask,
-                image_height=self.image_height,
-                image_width=self.image_width,
-                mode="nearest",
-            )
         else:
             depth_mask = torch.ones_like(depth_map)
 
-        return depth_map, path, depth_mask
+        return torch.tensor(depth_map), path, torch.tensor(depth_mask)
 
     def _get_pytorch3d_camera(
         self,
         entry: types.FrameAnnotation,
         scale: float,
+        clamp_bbox_xyxy: Optional[torch.Tensor],
     ) -> PerspectiveCameras:
         entry_viewpoint = entry.viewpoint
         assert entry_viewpoint is not None
@@ -243,6 +227,10 @@ def _get_pytorch3d_camera(
         principal_point_px = half_image_size_wh_orig - principal_point * rescale
         focal_length_px = focal_length * rescale
 
+        # changing principal_point according to bbox_crop
+        if clamp_bbox_xyxy is not None:
+            principal_point_px -= clamp_bbox_xyxy[:2]
+
         # now, convert from pixels to PyTorch3D v0.5+ NDC convention
         if self.image_height is None or self.image_width is None:
             out_size = list(reversed(entry.image.size))
@@ -283,32 +271,6 @@ def _local_path(self, path: str) -> str:
         return self.path_manager.get_local_path(path)
 
 
-def _resize_image(
-    image, image_height, image_width, mode="bilinear"
-) -> Tuple[torch.Tensor, float, torch.Tensor]:
-    if image_height is None or image_width is None:
-        # skip the resizing
-        imre_ = torch.from_numpy(image)
-        return imre_, 1.0, torch.ones_like(imre_[:1])
-    # takes numpy array, returns pytorch tensor
-    minscale = min(
-        image_height / image.shape[-2],
-        image_width / image.shape[-1],
-    )
-    imre = torch.nn.functional.interpolate(
-        torch.from_numpy(image)[None],
-        scale_factor=minscale,
-        mode=mode,
-        align_corners=False if mode == "bilinear" else None,
-        recompute_scale_factor=True,
-    )[0]
-    imre_ = torch.zeros(image.shape[0], image_height, image_width)
-    imre_[:, 0 : imre.shape[1], 0 : imre.shape[2]] = imre
-    mask = torch.zeros(1, image_height, image_width)
-    mask[:, 0 : imre.shape[1], 0 : imre.shape[2]] = 1.0
-    return imre_, minscale, mask
-
-
 def _load_image(path) -> np.ndarray:
     with Image.open(path) as pil_im:
         im = np.array(pil_im.convert("RGB"))
diff --git a/pytorch3d/implicitron/dataset/dataset_base.py b/pytorch3d/implicitron/dataset/dataset_base.py
index 2c1bb7527..cbc871a1e 100644
--- a/pytorch3d/implicitron/dataset/dataset_base.py
+++ b/pytorch3d/implicitron/dataset/dataset_base.py
@@ -4,6 +4,7 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
+import warnings
 from collections import defaultdict
 from dataclasses import dataclass, field, fields
 from typing import (
@@ -23,16 +24,17 @@
 
 import numpy as np
 import torch
-from pytorch3d.renderer.camera_utils import join_cameras_as_batch
-from pytorch3d.renderer.cameras import CamerasBase, PerspectiveCameras
-from pytorch3d.structures.pointclouds import join_pointclouds_as_batch, Pointclouds
 from pytorch3d.implicitron.dataset.utils import (
-    _crop_around_box,
-    _clamp_box_to_image_bounds_and_round,
     _bbox_xyxy_to_xywh,
+    _clamp_box_to_image_bounds_and_round,
+    _crop_around_box,
     _get_clamp_bbox,
     _rescale_bbox,
+    _resize_image,
 )
+from pytorch3d.renderer.camera_utils import join_cameras_as_batch
+from pytorch3d.renderer.cameras import CamerasBase, PerspectiveCameras
+from pytorch3d.structures.pointclouds import join_pointclouds_as_batch, Pointclouds
 
 
 @dataclass
@@ -97,6 +99,7 @@ class FrameData(Mapping[str, Any]):
         frame_type: The type of the loaded frame specified in
             `subset_lists_file`, if provided.
         meta: A dict for storing additional frame information.
+        cropped: Bool to avoid cropping FrameData twice
     """
 
     frame_number: Optional[torch.LongTensor]
@@ -123,6 +126,7 @@ class FrameData(Mapping[str, Any]):
     sequence_point_cloud_idx: Optional[torch.Tensor] = None
     frame_type: Union[str, List[str], None] = None  # known | unseen
     meta: dict = field(default_factory=lambda: {})
+    cropped: bool = False
 
     def to(self, *args, **kwargs):
         new_params = {}
@@ -151,29 +155,105 @@ def __getitem__(self, key):
     def __len__(self):
         return len(fields(self))
 
-    def crop_by_bbox_(self, box_crop_context):
+    def crop_by_bbox_(self, box_crop_context) -> Optional[torch.Tensor]:
+        if self.cropped:
+            warnings.warn(
+                f"You called cropping on same frame twice "
+                f"sequence_name: {self.sequence_name}, skipping cropping"
+            )
+            return None
+
+        if (
+            self.bbox_xywh is None
+            or self.fg_probability is None
+            or self.mask_path is None
+            or self.image_path is None
+        ):
+            warnings.warn(
+                "You called cropping without loading frame data"
+                "please call blob_loader.load_ first, skipping cropping"
+            )
+            return None
+
+        bbox_xyxy = _get_clamp_bbox(
+            self.bbox_xywh,
+            # pyre-ignore
+            image_path=self.image_path,
+            box_crop_context=box_crop_context,
+        )
         clamp_bbox_xyxy = _clamp_box_to_image_bounds_and_round(
-            _get_clamp_bbox(
-                self.bbox_xywh,
-                image_path=self.image.path,
-                box_crop_context=box_crop_context,
-            ),
-            image_size_hw=tuple(self.fg_probability.shape[-2:]),
+            bbox_xyxy,
+            # pyre-ignore
+            image_size_hw=tuple(self.image_size_hw),
         )
         self.crop_bbox_xywh = _bbox_xyxy_to_xywh(clamp_bbox_xyxy)
 
         self.fg_probability = _crop_around_box(
-            self.fg_probability, clamp_bbox_xyxy, self.mask_path
+            self.fg_probability,
+            clamp_bbox_xyxy,
+            # pyre-ignore
+            self.mask_path,
+        )
+        self.image_rgb = _crop_around_box(
+            self.image_rgb,
+            clamp_bbox_xyxy,
+            # pyre-ignore
+            self.image_path,
         )
-        self.image_rgb = _crop_around_box(self.image_rgb, clamp_bbox_xyxy, self.image.path)
 
-        depth_bbox_xyxy = _rescale_bbox(clamp_bbox_xyxy, entry.image.size, self.depth_map.shape[-2:])
-        self.depth_map = _crop_around_box(self.depth_map, depth_bbox_xyxy, self.depth_path)
+        if self.depth_map is not None:
+            self.depth_map = _crop_around_box(
+                self.depth_map,
+                clamp_bbox_xyxy,
+                # pyre-ignore
+                self.depth_path,
+            )
+        if self.depth_mask is not None:
+            self.depth_mask = _crop_around_box(
+                self.depth_mask,
+                clamp_bbox_xyxy,
+                # pyre-ignore
+                self.mask_path,
+            )
+        self.cropped = True
+        return clamp_bbox_xyxy
+
+    def resize_frame_(self, image_height, image_width) -> float:
+        if self.bbox_xywh is not None:
+            self.bbox_xywh = _rescale_bbox(
+                self.bbox_xywh,
+                np.array(self.image_size_hw),
+                # pyre-ignore
+                self.image_rgb.shape[-2:],
+            )
+
+        self.image_rgb, scale, self.mask_crop = _resize_image(
+            self.image_rgb, image_height=image_height, image_width=image_width
+        )
 
-        depth_mask_bbox_xyxy = _rescale_bbox(clamp_bbox_xyxy, entry.image.size, self.depth_mask.shape[-2:])
-        self.depth_mask = _crop_around_box(self.depth_mask, depth_mask_bbox_xyxy, self.mask_path)
+        self.fg_probability, _, _ = _resize_image(
+            self.fg_probability,
+            image_height=image_height,
+            image_width=image_width,
+            mode="nearest",
+        )
 
-        self.camera.principal_point_px -= clamp_bbox_xyxy[:2]
+        if self.depth_map is not None:
+            self.depth_map, _, _ = _resize_image(
+                self.depth_map,
+                image_height=image_height,
+                image_width=image_width,
+                mode="nearest",
+            )
+
+        if self.depth_mask is not None:
+            self.depth_mask, _, _ = _resize_image(
+                self.depth_mask,
+                image_height=image_height,
+                image_width=image_width,
+                mode="nearest",
+            )
+        return scale
 
     @classmethod
     def collate(cls, batch):
diff --git a/pytorch3d/implicitron/dataset/json_index_dataset.py b/pytorch3d/implicitron/dataset/json_index_dataset.py
index 636630680..5f9b2685a 100644
--- a/pytorch3d/implicitron/dataset/json_index_dataset.py
+++ b/pytorch3d/implicitron/dataset/json_index_dataset.py
@@ -14,7 +14,6 @@
 import random
 import warnings
 from collections import defaultdict
-from dataclasses import field
 from itertools import islice
 from typing import (
     Any,
@@ -161,12 +160,12 @@ class JsonIndexDataset(DatasetBase, ReplaceableBase):
     sort_frames: bool = False
     eval_batches: Any = None
     eval_batch_index: Any = None
-    subset_to_image_path: Any = None
     # initialised in __post_init__
-    blob_loader: BlobLoader = field(init=False)
-    frame_annots: List[FrameAnnotsEntry] = field(init=False)
-    seq_annots: Dict[str, types.SequenceAnnotation] = field(init=False)
-    _seq_to_idx: Dict[str, List[int]] = field(init=False)
+    # commented because of OmegaConf (for tests to pass)
+    # blob_loader: BlobLoader = field(init=False)
+    # frame_annots: List[FrameAnnotsEntry] = field(init=False)
+    # seq_annots: Dict[str, types.SequenceAnnotation] = field(init=False)
+    # _seq_to_idx: Dict[str, List[int]] = field(init=False)
 
     def __post_init__(self) -> None:
         self._load_frames()
@@ -177,6 +176,7 @@ def __post_init__(self) -> None:
         self._filter_db()  # also computes sequence indices
         self._extract_and_set_eval_batches()
 
+        # pyre-ignore
         self.blob_loader = BlobLoader(
             dataset_root=self.dataset_root,
             load_images=self.load_images,
@@ -219,7 +219,9 @@ def join(self, other_datasets: Iterable["JsonIndexDataset"]) -> None:
         """
         if not all(isinstance(d, JsonIndexDataset) for d in other_datasets):
             raise ValueError("This function can only join a list of JsonIndexDataset")
+        # pyre-ignore
         self.frame_annots.extend([fa for d in other_datasets for fa in d.frame_annots])
+        # pyre-ignore
         self.seq_annots.update(
             # https://gist.github.com/treyhunner/f35292e676efa0be1728
             functools.reduce(
@@ -295,9 +297,11 @@ def seq_frame_index_to_dataset_index(
         """
         _dataset_seq_frame_n_index = {
             seq: {
+                # pyre-ignore
                 self.frame_annots[idx]["frame_annotation"].frame_number: idx
                 for idx in seq_idx
             }
+            # pyre-ignore
             for seq, seq_idx in self._seq_to_idx.items()
         }
 
@@ -320,6 +324,7 @@ def _get_dataset_idx(
                 # Check that the loaded frame path is consistent
                 # with the one stored in self.frame_annots.
                 assert os.path.normpath(
+                    # pyre-ignore
                     self.frame_annots[idx]["frame_annotation"].image.path
                 ) == os.path.normpath(
                     path
@@ -369,6 +374,7 @@ def subset_from_frame_index(
 
         # Deep copy the whole dataset except frame_annots, which are large so we
         # deep copy only the requested subset of frame_annots.
+        # pyre-ignore
         memo = {id(self.frame_annots): None}
         dataset_new = copy.deepcopy(self, memo)
         dataset_new.frame_annots = copy.deepcopy(
@@ -397,9 +403,11 @@ def subset_from_frame_index(
         return dataset_new
 
     def __str__(self) -> str:
+        # pyre-ignore
         return f"JsonIndexDataset #frames={len(self.frame_annots)}"
 
     def __len__(self) -> int:
+        # pyre-ignore
         return len(self.frame_annots)
 
     def _get_frame_type(self, entry: FrameAnnotsEntry) -> Optional[str]:
@@ -411,6 +419,7 @@ def get_all_train_cameras(self) -> CamerasBase:
         """
         logger.info("Loading all train cameras.")
         cameras = []
+        # pyre-ignore
         for frame_idx, frame_annot in enumerate(tqdm(self.frame_annots)):
             frame_type = self._get_frame_type(frame_annot)
             if frame_type is None:
@@ -420,10 +429,12 @@ def get_all_train_cameras(self) -> CamerasBase:
         return join_cameras_as_batch(cameras)
 
     def __getitem__(self, index) -> FrameData:
+        # pyre-ignore
         if index >= len(self.frame_annots):
             raise IndexError(f"index {index} out of range {len(self.frame_annots)}")
 
         entry = self.frame_annots[index]["frame_annotation"]
+        # pyre-ignore
         point_cloud = self.seq_annots[entry.sequence_name].point_cloud
         frame_data = FrameData(
             frame_number=_safe_as_tensor(entry.frame_number, torch.long),
@@ -443,9 +454,8 @@ def __getitem__(self, index) -> FrameData:
 
         # Optional field
         frame_data.frame_type = self._get_frame_type(self.frame_annots[index])
-        self.blob_loader.load_(
-            frame_data, entry, self.seq_annots[entry.sequence_name]
-        )
+        # pyre-ignore
+        self.blob_loader.load_(frame_data, entry, self.seq_annots[entry.sequence_name])
         return frame_data
 
     def _load_frames(self) -> None:
@@ -457,6 +467,7 @@ def _load_frames(self) -> None:
             )
         if not frame_annots_list:
             raise ValueError("Empty dataset!")
+        # pyre-ignore
         self.frame_annots = [
             FrameAnnotsEntry(frame_annotation=a, subset=None) for a in frame_annots_list
         ]
@@ -468,6 +479,7 @@ def _load_sequences(self) -> None:
             seq_annots = types.load_dataclass(zipfile, List[types.SequenceAnnotation])
         if not seq_annots:
             raise ValueError("Empty sequences file!")
+        # pyre-ignore
         self.seq_annots = {entry.sequence_name: entry for entry in seq_annots}
 
     def _load_subset_lists(self) -> None:
@@ -483,6 +495,7 @@ def _load_subset_lists(self) -> None:
             for subset, frames in subset_to_seq_frame.items()
             for _, _, path in frames
         }
+        # pyre-ignore
         for frame in self.frame_annots:
             frame["subset"] = frame_path_to_subset.get(
                 frame["frame_annotation"].image.path, None
@@ -495,6 +508,7 @@ def _load_subset_lists(self) -> None:
 
     def _sort_frames(self) -> None:
         # Sort frames to have them grouped by sequence, ordered by timestamp
+        # pyre-ignore
         self.frame_annots = sorted(
             self.frame_annots,
             key=lambda f: (
@@ -506,6 +520,7 @@ def _sort_frames(self) -> None:
     def _filter_db(self) -> None:
         if self.remove_empty_masks:
             logger.info("Removing images with empty masks.")
+            # pyre-ignore
             old_len = len(self.frame_annots)
 
             msg = "remove_empty_masks needs every MaskAnnotation.mass to be set."
@@ -546,6 +561,7 @@ def positive_mass(frame_annot: types.FrameAnnotation) -> bool:
 
         if len(self.limit_category_to) > 0:
             logger.info(f"Limiting dataset to categories: {self.limit_category_to}")
+            # pyre-ignore
             self.seq_annots = {
                 name: entry
                 for name, entry in self.seq_annots.items()
@@ -583,6 +599,7 @@ def positive_mass(frame_annot: types.FrameAnnotation) -> bool:
         if self.n_frames_per_sequence > 0:
             logger.info(f"Taking max {self.n_frames_per_sequence} per sequence.")
             keep_idx = []
+            # pyre-ignore
             for seq, seq_indices in self._seq_to_idx.items():
                 # infer the seed from the sequence name, this is reproducible
                 # and makes the selection differ for different sequences
@@ -612,14 +629,19 @@ def _invalidate_indexes(self, filter_seq_annots: bool = False) -> None:
         self._invalidate_seq_to_idx()
 
         if filter_seq_annots:
+            # pyre-ignore
             self.seq_annots = {
-                k: v for k, v in self.seq_annots.items() if k in self._seq_to_idx
+                k: v
+                for k, v in self.seq_annots.items()
+                if k in self._seq_to_idx  # pyre-ignore
             }
 
     def _invalidate_seq_to_idx(self) -> None:
         seq_to_idx = defaultdict(list)
+        # pyre-ignore
         for idx, entry in enumerate(self.frame_annots):
             seq_to_idx[entry["frame_annotation"].sequence_name].append(idx)
+        # pyre-ignore
         self._seq_to_idx = seq_to_idx
 
     def _local_path(self, path: str) -> str:
@@ -634,6 +656,7 @@ def get_frame_numbers_and_timestamps(
         for idx in idxs:
             if (
                 subset_filter is not None
+                # pyre-ignore
                 and self.frame_annots[idx]["subset"] not in subset_filter
             ):
                 continue
@@ -646,6 +669,7 @@ def get_frame_numbers_and_timestamps(
 
     def category_to_sequence_names(self) -> Dict[str, List[str]]:
         c2seq = defaultdict(list)
+        # pyre-ignore
         for sequence_name, sa in self.seq_annots.items():
             c2seq[sa.category].append(sequence_name)
         return dict(c2seq)
diff --git a/pytorch3d/implicitron/dataset/utils.py b/pytorch3d/implicitron/dataset/utils.py
index 6e9af933d..aca0507dd 100644
--- a/pytorch3d/implicitron/dataset/utils.py
+++ b/pytorch3d/implicitron/dataset/utils.py
@@ -156,3 +156,32 @@ def _bbox_xywh_to_xyxy(
 def _get_1d_bounds(arr) -> Tuple[int, int]:
     nz = np.flatnonzero(arr)
     return nz[0], nz[-1] + 1
+
+
+def _resize_image(
+    image, image_height, image_width, mode="bilinear"
+) -> Tuple[torch.Tensor, float, torch.Tensor]:
+
+    if type(image) == np.ndarray:
+        image = torch.from_numpy(image)
+
+    if image_height is None or image_width is None:
+        # skip the resizing
+        return image, 1.0, torch.ones_like(image[:1])
+    # takes numpy array or tensor, returns pytorch tensor
+    minscale = min(
+        image_height / image.shape[-2],
+        image_width / image.shape[-1],
+    )
+    imre = torch.nn.functional.interpolate(
+        image[None],
+        scale_factor=minscale,
+        mode=mode,
+        align_corners=False if mode == "bilinear" else None,
+        recompute_scale_factor=True,
+    )[0]
+    imre_ = torch.zeros(image.shape[0], image_height, image_width)
+    imre_[:, 0 : imre.shape[1], 0 : imre.shape[2]] = imre
+    mask = torch.zeros(1, image_height, image_width)
+    mask[:, 0 : imre.shape[1], 0 : imre.shape[2]] = 1.0
+    return imre_, minscale, mask
diff --git a/pytorch3d/implicitron/dataset/visualize.py b/pytorch3d/implicitron/dataset/visualize.py
index 284e903a0..6d0be0362 100644
--- a/pytorch3d/implicitron/dataset/visualize.py
+++ b/pytorch3d/implicitron/dataset/visualize.py
@@ -44,6 +44,7 @@ def get_implicitron_sequence_pointcloud(
         sequence_entries = [
             ei
             for ei in sequence_entries
+            # pyre-ignore[16]
             if dataset.frame_annots[ei]["frame_annotation"].sequence_name
             == sequence_name
         ]
diff --git a/tests/implicitron/test_bbox.py b/tests/implicitron/test_bbox.py
index 16199ad1e..3c45ee793 100644
--- a/tests/implicitron/test_bbox.py
+++ b/tests/implicitron/test_bbox.py
@@ -9,7 +9,6 @@
 import numpy as np
 
 import torch
-from pytorch3d.implicitron.dataset.blob_loader import _resize_image
 
 from pytorch3d.implicitron.dataset.utils import (
     _bbox_xywh_to_xyxy,
@@ -20,6 +19,7 @@
     _get_bbox_from_mask,
     _get_clamp_bbox,
     _rescale_bbox,
+    _resize_image,
 )
 
 from tests.common_testing import TestCaseMixin
diff --git a/tests/implicitron/test_blob_loader.py b/tests/implicitron/test_blob_loader.py
index d2a612d48..ef18d6258 100644
--- a/tests/implicitron/test_blob_loader.py
+++ b/tests/implicitron/test_blob_loader.py
@@ -1,3 +1,9 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
 import contextlib
 import gzip
 import os
@@ -15,8 +21,10 @@
     _load_depth_mask,
     _load_image,
     _load_mask,
+    _safe_as_tensor,
     BlobLoader,
 )
+from pytorch3d.implicitron.dataset.dataset_base import FrameData
 from pytorch3d.implicitron.tools.config import get_default_args
 from pytorch3d.renderer.cameras import PerspectiveCameras
 
@@ -53,6 +61,37 @@ def setUp(self):
             )
             self.frame_annotation = frame_annots_list[0]
 
+        sequence_annotations_file = os.path.join(
+            self.dataset_root, category, "sequence_annotations.jgz"
+        )
+        local_file = self.path_manager.get_local_path(sequence_annotations_file)
+        with gzip.open(local_file, "rt", encoding="utf8") as zipfile:
+            seq_annots_list = types.load_dataclass(
+                zipfile, List[types.SequenceAnnotation]
+            )
+            seq_annots = {entry.sequence_name: entry for entry in seq_annots_list}
+            self.seq_annotation = seq_annots[self.frame_annotation.sequence_name]
+
+        point_cloud = self.seq_annotation.point_cloud
+        self.frame_data = FrameData(
+            frame_number=_safe_as_tensor(
+                self.frame_annotation.frame_number, torch.long
+            ),
+            frame_timestamp=_safe_as_tensor(
+                self.frame_annotation.frame_timestamp, torch.float
+            ),
+            sequence_name=self.frame_annotation.sequence_name,
+            sequence_category=self.seq_annotation.category,
+            camera_quality_score=_safe_as_tensor(
+                self.seq_annotation.viewpoint_quality_score, torch.float
+            ),
+            point_cloud_quality_score=_safe_as_tensor(
+                point_cloud.quality_score, torch.float
+            )
+            if point_cloud is not None
+            else None,
+        )
+
     def test_BlobLoader_args(self):
         # test that BlobLoader works with get_default_args
         get_default_args(BlobLoader)
@@ -65,58 +104,82 @@ def test_fix_point_cloud_path(self):
         assert self.blob_loader.dataset_root in modified_path
 
     def test_load_(self):
+        bbox_xywh = None
+        self.frame_data.image_size_hw = _safe_as_tensor(
+            self.frame_annotation.image.size, torch.long
+        )
         (
-            fg_probability,
-            mask_path,
-            bbox_xywh,
-        ) = self.blob_loader._load_crop_fg_probability(self.frame_annotation)
-
-        assert mask_path
-        assert torch.is_tensor(fg_probability)
-        assert torch.is_tensor(bbox_xywh)
+            self.frame_data.fg_probability,
+            self.frame_data.mask_path,
+            self.frame_data.bbox_xywh,
+        ) = self.blob_loader._load_fg_probability(self.frame_annotation, bbox_xywh)
+
+        assert self.frame_data.mask_path
+        assert torch.is_tensor(self.frame_data.fg_probability)
+        assert torch.is_tensor(self.frame_data.bbox_xywh)
         # assert bboxes shape
-        self.assertEqual(
-            fg_probability.shape, torch.Size([1, self.image_height, self.image_width])
+        self.assertEqual(self.frame_data.bbox_xywh.shape, torch.Size([4]))
+        (
+            self.frame_data.image_rgb,
+            self.frame_data.image_path,
+        ) = self.blob_loader._load_images(
+            self.frame_annotation, self.frame_data.fg_probability
         )
-        self.assertEqual(bbox_xywh.shape, torch.Size([4]))
+        self.assertEqual(type(self.frame_data.image_rgb), np.ndarray)
+        assert self.frame_data.image_path
+
         (
-            image_rgb,
-            image_path,
-            mask_crop,
-            scale,
-        ) = self.blob_loader._load_crop_images(self.frame_annotation, fg_probability)
-        assert torch.is_tensor(image_rgb)
-        assert image_path
-        assert torch.is_tensor(mask_crop)
+            self.frame_data.depth_map,
+            depth_path,
+            self.frame_data.depth_mask,
+        ) = self.blob_loader._load_mask_depth(
+            self.frame_annotation,
+            self.frame_data.fg_probability,
+        )
+        assert torch.is_tensor(self.frame_data.depth_map)
+        assert depth_path
+        assert torch.is_tensor(self.frame_data.depth_mask)
+
+        clamp_bbox_xyxy = None
+        if self.blob_loader.box_crop:
+            clamp_bbox_xyxy = self.frame_data.crop_by_bbox_(
+                self.blob_loader.box_crop_context
+            )
+
+        # assert image and mask shapes after resize
+        scale = self.frame_data.resize_frame_(self.image_height, self.image_width)
         assert scale
-        # assert image and mask shapes
         self.assertEqual(
-            image_rgb.shape, torch.Size([3, self.image_height, self.image_width])
+            self.frame_data.mask_crop.shape,
+            torch.Size([1, self.image_height, self.image_width]),
         )
         self.assertEqual(
-            mask_crop.shape, torch.Size([1, self.image_height, self.image_width])
+            self.frame_data.image_rgb.shape,
+            torch.Size([3, self.image_height, self.image_width]),
         )
-
-        (depth_map, depth_path, depth_mask,) = self.blob_loader._load_mask_depth(
-            self.frame_annotation,
-            fg_probability,
+        self.assertEqual(
+            self.frame_data.mask_crop.shape,
+            torch.Size([1, self.image_height, self.image_width]),
+        )
+        self.assertEqual(
+            self.frame_data.fg_probability.shape,
+            torch.Size([1, self.image_height, self.image_width]),
         )
-        assert torch.is_tensor(depth_map)
-        assert depth_path
-        assert torch.is_tensor(depth_mask)
-        # assert image and mask shapes
         self.assertEqual(
-            depth_map.shape, torch.Size([1, self.image_height, self.image_width])
+            self.frame_data.depth_map.shape,
+            torch.Size([1, self.image_height, self.image_width]),
         )
         self.assertEqual(
-            depth_mask.shape, torch.Size([1, self.image_height, self.image_width])
+            self.frame_data.depth_mask.shape,
+            torch.Size([1, self.image_height, self.image_width]),
         )
 
-        camera = self.blob_loader._get_pytorch3d_camera(
+        self.frame_data.camera = self.blob_loader._get_pytorch3d_camera(
             self.frame_annotation,
             scale,
+            clamp_bbox_xyxy,
         )
-        self.assertEqual(type(camera), PerspectiveCameras)
+        self.assertEqual(type(self.frame_data.camera), PerspectiveCameras)
 
     def test_load_image(self):
         path = os.path.join(self.dataset_root, self.frame_annotation.image.path)

From 0fc3253d029ccf1551f0439b7c787fbb4d76f8bd Mon Sep 17 00:00:00 2001
From: Ildar Salakhiev <ildar@fb.com>
Date: Wed, 15 Mar 2023 18:52:32 +0000
Subject: [PATCH 40/43] using safe_as_tensor for fg_probability

---
 pytorch3d/implicitron/dataset/blob_loader.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/pytorch3d/implicitron/dataset/blob_loader.py b/pytorch3d/implicitron/dataset/blob_loader.py
index 9ccf53b2f..13eecdf79 100644
--- a/pytorch3d/implicitron/dataset/blob_loader.py
+++ b/pytorch3d/implicitron/dataset/blob_loader.py
@@ -142,15 +142,17 @@ def _load_fg_probability(
             fg_probability = _load_mask(self._local_path(full_path))
             # we can use provided bbox_xywh or calculate it based on mask
             if bbox_xywh is None:
-                bbox_xywh = torch.tensor(
-                    _get_bbox_from_mask(fg_probability, self.box_crop_mask_thr)
-                )
+                bbox_xywh = _get_bbox_from_mask(fg_probability, self.box_crop_mask_thr)
             if fg_probability.shape[-2:] != entry.image.size:
                 raise ValueError(
                     f"bad mask size: {fg_probability.shape[-2:]} vs {entry.image.size}!"
                 )
 
-        return torch.tensor(fg_probability), full_path, bbox_xywh
+        return (
+            _safe_as_tensor(fg_probability, torch.float),
+            full_path,
+            _safe_as_tensor(bbox_xywh, torch.long),
+        )
 
     def _load_images(
         self,

From 7c8d89daa2b3908a72f847f73edee704780d3f63 Mon Sep 17 00:00:00 2001
From: Ildar Salakhiev <ildar@fb.com>
Date: Wed, 15 Mar 2023 18:58:34 +0000
Subject: [PATCH 41/43] made resizing only for loaded objects

---
 pytorch3d/implicitron/dataset/dataset_base.py | 21 ++++++++++---------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/pytorch3d/implicitron/dataset/dataset_base.py b/pytorch3d/implicitron/dataset/dataset_base.py
index cbc871a1e..d567fb0b3 100644
--- a/pytorch3d/implicitron/dataset/dataset_base.py
+++ b/pytorch3d/implicitron/dataset/dataset_base.py
@@ -226,17 +226,18 @@ def resize_frame_(self, image_height, image_width) -> float:
                 # pyre-ignore
                 self.image_rgb.shape[-2:],
             )
+        if self.image_rgb is not None:
+            self.image_rgb, scale, self.mask_crop = _resize_image(
+                self.image_rgb, image_height=image_height, image_width=image_width
+            )
 
-        self.image_rgb, scale, self.mask_crop = _resize_image(
-            self.image_rgb, image_height=image_height, image_width=image_width
-        )
-
-        self.fg_probability, _, _ = _resize_image(
-            self.fg_probability,
-            image_height=image_height,
-            image_width=image_width,
-            mode="nearest",
-        )
+        if self.fg_probability is not None:
+            self.fg_probability, _, _ = _resize_image(
+                self.fg_probability,
+                image_height=image_height,
+                image_width=image_width,
+                mode="nearest",
+            )
 
         if self.depth_map is not None:
             self.depth_map, _, _ = _resize_image(

From 3027cd7e5f2b615fead37f3338cfe587e84cc9db Mon Sep 17 00:00:00 2001
From: Ildar Salakhiev <ildar@fb.com>
Date: Wed, 15 Mar 2023 19:05:26 +0000
Subject: [PATCH 42/43] fixing scale

---
 pytorch3d/implicitron/dataset/dataset_base.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/pytorch3d/implicitron/dataset/dataset_base.py b/pytorch3d/implicitron/dataset/dataset_base.py
index d567fb0b3..1684251fb 100644
--- a/pytorch3d/implicitron/dataset/dataset_base.py
+++ b/pytorch3d/implicitron/dataset/dataset_base.py
@@ -226,6 +226,8 @@ def resize_frame_(self, image_height, image_width) -> float:
                 # pyre-ignore
                 self.image_rgb.shape[-2:],
             )
+
+        scale = 1.0
         if self.image_rgb is not None:
             self.image_rgb, scale, self.mask_crop = _resize_image(
                 self.image_rgb, image_height=image_height, image_width=image_width
@@ -237,6 +239,7 @@ def resize_frame_(self, image_height, image_width) -> float:
                 image_height=image_height,
                 image_width=image_width,
                 mode="nearest",
+
             )
 
         if self.depth_map is not None:

From 7d570c179d94b28c00e3c0c749da6d7150e8d7e7 Mon Sep 17 00:00:00 2001
From: Ildar Salakhiev <ildar@fb.com>
Date: Wed, 15 Mar 2023 19:21:46 +0000
Subject: [PATCH 43/43] fixing scale again..

---
 pytorch3d/implicitron/dataset/blob_loader.py  | 15 +++++++++++++--
 pytorch3d/implicitron/dataset/dataset_base.py |  5 ++---
 2 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/pytorch3d/implicitron/dataset/blob_loader.py b/pytorch3d/implicitron/dataset/blob_loader.py
index 13eecdf79..83f39c78e 100644
--- a/pytorch3d/implicitron/dataset/blob_loader.py
+++ b/pytorch3d/implicitron/dataset/blob_loader.py
@@ -117,10 +117,21 @@ def load_(
         if self.box_crop:
             clamp_bbox_xyxy = frame_data.crop_by_bbox_(self.box_crop_context)
 
-        scale = 1.0
+        scale = (
+            min(
+                self.image_height / entry.image.size[0],
+                # pyre-ignore
+                self.image_width / entry.image.size[1],
+            )
+            if self.image_height is not None and self.image_width is not None
+            else 1.0
+        )
 
         if self.image_height is not None and self.image_width is not None:
-            scale = frame_data.resize_frame_(self.image_height, self.image_width)
+            optional_scale = frame_data.resize_frame_(
+                self.image_height, self.image_width
+            )
+            scale = optional_scale or scale
 
         # creating camera taking to account bbox and resize scale
         if entry.viewpoint is not None:
diff --git a/pytorch3d/implicitron/dataset/dataset_base.py b/pytorch3d/implicitron/dataset/dataset_base.py
index 1684251fb..7c4268fb9 100644
--- a/pytorch3d/implicitron/dataset/dataset_base.py
+++ b/pytorch3d/implicitron/dataset/dataset_base.py
@@ -218,7 +218,7 @@ def crop_by_bbox_(self, box_crop_context) -> Optional[torch.Tensor]:
         self.cropped = True
         return clamp_bbox_xyxy
 
-    def resize_frame_(self, image_height, image_width) -> float:
+    def resize_frame_(self, image_height, image_width) -> Optional[float]:
         if self.bbox_xywh is not None:
             self.bbox_xywh = _rescale_bbox(
                 self.bbox_xywh,
@@ -227,7 +227,7 @@ def resize_frame_(self, image_height, image_width) -> float:
                 self.image_rgb.shape[-2:],
             )
 
-        scale = 1.0
+        scale = None
         if self.image_rgb is not None:
             self.image_rgb, scale, self.mask_crop = _resize_image(
                 self.image_rgb, image_height=image_height, image_width=image_width
@@ -239,7 +239,6 @@ def resize_frame_(self, image_height, image_width) -> float:
                 image_height=image_height,
                 image_width=image_width,
                 mode="nearest",
-
             )
 
         if self.depth_map is not None: