From aa34aa0ec14dc31cece99563571a57f6483ca81c Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Tue, 28 Feb 2023 15:23:29 +0000 Subject: [PATCH 01/43] created class BlobLoader and moved all related function to sep file --- .../implicitron/dataset/json_index_dataset.py | 461 ++------------- pytorch3d/implicitron/dataset/load_blob.py | 542 ++++++++++++++++++ 2 files changed, 576 insertions(+), 427 deletions(-) create mode 100644 pytorch3d/implicitron/dataset/load_blob.py diff --git a/pytorch3d/implicitron/dataset/json_index_dataset.py b/pytorch3d/implicitron/dataset/json_index_dataset.py index 669f4e9b6..ac9daf02a 100644 --- a/pytorch3d/implicitron/dataset/json_index_dataset.py +++ b/pytorch3d/implicitron/dataset/json_index_dataset.py @@ -32,17 +32,16 @@ import numpy as np import torch -from PIL import Image +from tqdm import tqdm + from pytorch3d.implicitron.tools.config import registry, ReplaceableBase -from pytorch3d.io import IO +from pytorch3d.implicitron.dataset import types +from pytorch3d.implicitron.dataset.dataset_base import DatasetBase, FrameData +from pytorch3d.implicitron.dataset.load_blob import BlobLoader +from pytorch3d.implicitron.dataset.utils import is_known_frame_scalar from pytorch3d.renderer.camera_utils import join_cameras_as_batch from pytorch3d.renderer.cameras import CamerasBase, PerspectiveCameras from pytorch3d.structures.pointclouds import Pointclouds -from tqdm import tqdm - -from . import types -from .dataset_base import DatasetBase, FrameData -from .utils import is_known_frame_scalar logger = logging.getLogger(__name__) @@ -53,6 +52,7 @@ class FrameAnnotsEntry(TypedDict): subset: Optional[str] + # pyre-ignore frame_annotation: types.FrameAnnotation else: @@ -60,6 +60,7 @@ class FrameAnnotsEntry(TypedDict): @registry.register +# pyre-ignore class JsonIndexDataset(DatasetBase, ReplaceableBase): """ A dataset with annotations in json files like the Common Objects in 3D @@ -130,6 +131,7 @@ class JsonIndexDataset(DatasetBase, ReplaceableBase): frame_annotations_type: ClassVar[ Type[types.FrameAnnotation] + # pyre-ignore ] = types.FrameAnnotation path_manager: Any = None @@ -162,6 +164,7 @@ class JsonIndexDataset(DatasetBase, ReplaceableBase): sort_frames: bool = False eval_batches: Any = None eval_batch_index: Any = None + loader: BlobLoader # frame_annots: List[FrameAnnotsEntry] = field(init=False) # seq_annots: Dict[str, types.SequenceAnnotation] = field(init=False) @@ -175,6 +178,23 @@ def __post_init__(self) -> None: self._load_subset_lists() self._filter_db() # also computes sequence indices self._extract_and_set_eval_batches() + + self.loader = BlobLoader( + self.dataset_root, + self.load_images, + self.load_depths, + self.load_depth_masks, + self.load_masks, + self.load_point_clouds, + self.max_points, + self.mask_images, + self.mask_depths, + self.image_height, + self.image_width, + self.box_crop, + self.box_crop_mask_thr, + self.box_crop_context, + ) logger.info(str(self)) def _extract_and_set_eval_batches(self): @@ -207,12 +227,11 @@ def join(self, other_datasets: Iterable[DatasetBase]) -> None: # https://gist.github.com/treyhunner/f35292e676efa0be1728 functools.reduce( lambda a, b: {**a, **b}, - [d.seq_annots for d in other_datasets], # pyre-ignore[16] + [d.seq_annots for d in other_datasets], ) ) all_eval_batches = [ self.eval_batches, - # pyre-ignore *[d.eval_batches for d in other_datasets], ] if not ( @@ -396,6 +415,7 @@ def __len__(self) -> int: def _get_frame_type(self, entry: FrameAnnotsEntry) -> Optional[str]: return entry["subset"] + # pyre-ignore def get_all_train_cameras(self) -> CamerasBase: """ Returns the cameras corresponding to all the known frames. @@ -411,6 +431,7 @@ def get_all_train_cameras(self) -> CamerasBase: cameras.append(self[frame_idx].camera) return join_cameras_as_batch(cameras) + # pyre-ignore def __getitem__(self, index) -> FrameData: # pyre-ignore[16] if index >= len(self.frame_annots): @@ -438,238 +459,14 @@ def __getitem__(self, index) -> FrameData: # The rest of the fields are optional frame_data.frame_type = self._get_frame_type(self.frame_annots[index]) - ( - frame_data.fg_probability, - frame_data.mask_path, - frame_data.bbox_xywh, - clamp_bbox_xyxy, - frame_data.crop_bbox_xywh, - ) = self._load_crop_fg_probability(entry) - - scale = 1.0 - if self.load_images and entry.image is not None: - # original image size - frame_data.image_size_hw = _safe_as_tensor(entry.image.size, torch.long) - - ( - frame_data.image_rgb, - frame_data.image_path, - frame_data.mask_crop, - scale, - ) = self._load_crop_images( - entry, frame_data.fg_probability, clamp_bbox_xyxy - ) - - if self.load_depths and entry.depth is not None: - ( - frame_data.depth_map, - frame_data.depth_path, - frame_data.depth_mask, - ) = self._load_mask_depth(entry, clamp_bbox_xyxy, frame_data.fg_probability) - - if entry.viewpoint is not None: - frame_data.camera = self._get_pytorch3d_camera( - entry, - scale, - clamp_bbox_xyxy, - ) - - if self.load_point_clouds and point_cloud is not None: - pcl_path = self._fix_point_cloud_path(point_cloud.path) - frame_data.sequence_point_cloud = _load_pointcloud( - self._local_path(pcl_path), max_points=self.max_points - ) - frame_data.sequence_point_cloud_path = pcl_path - + frame_data = self.loader.load(frame_data, entry, point_cloud) return frame_data - def _fix_point_cloud_path(self, path: str) -> str: - """ - Fix up a point cloud path from the dataset. - Some files in Co3Dv2 have an accidental absolute path stored. - """ - unwanted_prefix = ( - "/large_experiments/p3/replay/datasets/co3d/co3d45k_220512/export_v23/" - ) - if path.startswith(unwanted_prefix): - path = path[len(unwanted_prefix) :] - return os.path.join(self.dataset_root, path) - - def _load_crop_fg_probability( - self, entry: types.FrameAnnotation - ) -> Tuple[ - Optional[torch.Tensor], - Optional[str], - Optional[torch.Tensor], - Optional[torch.Tensor], - Optional[torch.Tensor], - ]: - fg_probability = None - full_path = None - bbox_xywh = None - clamp_bbox_xyxy = None - crop_box_xywh = None - - if (self.load_masks or self.box_crop) and entry.mask is not None: - full_path = os.path.join(self.dataset_root, entry.mask.path) - mask = _load_mask(self._local_path(full_path)) - - if mask.shape[-2:] != entry.image.size: - raise ValueError( - f"bad mask size: {mask.shape[-2:]} vs {entry.image.size}!" - ) - - bbox_xywh = torch.tensor(_get_bbox_from_mask(mask, self.box_crop_mask_thr)) - - if self.box_crop: - clamp_bbox_xyxy = _clamp_box_to_image_bounds_and_round( - _get_clamp_bbox( - bbox_xywh, - image_path=entry.image.path, - box_crop_context=self.box_crop_context, - ), - image_size_hw=tuple(mask.shape[-2:]), - ) - crop_box_xywh = _bbox_xyxy_to_xywh(clamp_bbox_xyxy) - - mask = _crop_around_box(mask, clamp_bbox_xyxy, full_path) - - fg_probability, _, _ = self._resize_image(mask, mode="nearest") - - return fg_probability, full_path, bbox_xywh, clamp_bbox_xyxy, crop_box_xywh - - def _load_crop_images( - self, - entry: types.FrameAnnotation, - fg_probability: Optional[torch.Tensor], - clamp_bbox_xyxy: Optional[torch.Tensor], - ) -> Tuple[torch.Tensor, str, torch.Tensor, float]: - assert self.dataset_root is not None and entry.image is not None - path = os.path.join(self.dataset_root, entry.image.path) - image_rgb = _load_image(self._local_path(path)) - - if image_rgb.shape[-2:] != entry.image.size: - raise ValueError( - f"bad image size: {image_rgb.shape[-2:]} vs {entry.image.size}!" - ) - - if self.box_crop: - assert clamp_bbox_xyxy is not None - image_rgb = _crop_around_box(image_rgb, clamp_bbox_xyxy, path) - - image_rgb, scale, mask_crop = self._resize_image(image_rgb) - - if self.mask_images: - assert fg_probability is not None - image_rgb *= fg_probability - - return image_rgb, path, mask_crop, scale - - def _load_mask_depth( - self, - entry: types.FrameAnnotation, - clamp_bbox_xyxy: Optional[torch.Tensor], - fg_probability: Optional[torch.Tensor], - ) -> Tuple[torch.Tensor, str, torch.Tensor]: - entry_depth = entry.depth - assert entry_depth is not None - path = os.path.join(self.dataset_root, entry_depth.path) - depth_map = _load_depth(self._local_path(path), entry_depth.scale_adjustment) - - if self.box_crop: - assert clamp_bbox_xyxy is not None - depth_bbox_xyxy = _rescale_bbox( - clamp_bbox_xyxy, entry.image.size, depth_map.shape[-2:] - ) - depth_map = _crop_around_box(depth_map, depth_bbox_xyxy, path) - - depth_map, _, _ = self._resize_image(depth_map, mode="nearest") - - if self.mask_depths: - assert fg_probability is not None - depth_map *= fg_probability - - if self.load_depth_masks: - assert entry_depth.mask_path is not None - mask_path = os.path.join(self.dataset_root, entry_depth.mask_path) - depth_mask = _load_depth_mask(self._local_path(mask_path)) - - if self.box_crop: - assert clamp_bbox_xyxy is not None - depth_mask_bbox_xyxy = _rescale_bbox( - clamp_bbox_xyxy, entry.image.size, depth_mask.shape[-2:] - ) - depth_mask = _crop_around_box( - depth_mask, depth_mask_bbox_xyxy, mask_path - ) - - depth_mask, _, _ = self._resize_image(depth_mask, mode="nearest") - else: - depth_mask = torch.ones_like(depth_map) - - return depth_map, path, depth_mask - - def _get_pytorch3d_camera( - self, - entry: types.FrameAnnotation, - scale: float, - clamp_bbox_xyxy: Optional[torch.Tensor], - ) -> PerspectiveCameras: - entry_viewpoint = entry.viewpoint - assert entry_viewpoint is not None - # principal point and focal length - principal_point = torch.tensor( - entry_viewpoint.principal_point, dtype=torch.float - ) - focal_length = torch.tensor(entry_viewpoint.focal_length, dtype=torch.float) - - half_image_size_wh_orig = ( - torch.tensor(list(reversed(entry.image.size)), dtype=torch.float) / 2.0 - ) - - # first, we convert from the dataset's NDC convention to pixels - format = entry_viewpoint.intrinsics_format - if format.lower() == "ndc_norm_image_bounds": - # this is e.g. currently used in CO3D for storing intrinsics - rescale = half_image_size_wh_orig - elif format.lower() == "ndc_isotropic": - rescale = half_image_size_wh_orig.min() - else: - raise ValueError(f"Unknown intrinsics format: {format}") - - # principal point and focal length in pixels - principal_point_px = half_image_size_wh_orig - principal_point * rescale - focal_length_px = focal_length * rescale - if self.box_crop: - assert clamp_bbox_xyxy is not None - principal_point_px -= clamp_bbox_xyxy[:2] - - # now, convert from pixels to PyTorch3D v0.5+ NDC convention - if self.image_height is None or self.image_width is None: - out_size = list(reversed(entry.image.size)) - else: - out_size = [self.image_width, self.image_height] - - half_image_size_output = torch.tensor(out_size, dtype=torch.float) / 2.0 - half_min_image_size_output = half_image_size_output.min() - - # rescaled principal point and focal length in ndc - principal_point = ( - half_image_size_output - principal_point_px * scale - ) / half_min_image_size_output - focal_length = focal_length_px * scale / half_min_image_size_output - - return PerspectiveCameras( - focal_length=focal_length[None], - principal_point=principal_point[None], - R=torch.tensor(entry_viewpoint.R, dtype=torch.float)[None], - T=torch.tensor(entry_viewpoint.T, dtype=torch.float)[None], - ) - def _load_frames(self) -> None: logger.info(f"Loading Co3D frames from {self.frame_annotations_file}.") local_file = self._local_path(self.frame_annotations_file) with gzip.open(local_file, "rt", encoding="utf8") as zipfile: + # pyre-ignore frame_annots_list = types.load_dataclass( zipfile, List[self.frame_annotations_type] ) @@ -684,6 +481,7 @@ def _load_sequences(self) -> None: logger.info(f"Loading Co3D sequences from {self.sequence_annotations_file}.") local_file = self._local_path(self.sequence_annotations_file) with gzip.open(local_file, "rt", encoding="utf8") as zipfile: + # pyre-ignore seq_annots = types.load_dataclass(zipfile, List[types.SequenceAnnotation]) if not seq_annots: raise ValueError("Empty sequences file!") @@ -853,35 +651,6 @@ def _invalidate_seq_to_idx(self) -> None: # pyre-ignore[16] self._seq_to_idx = seq_to_idx - def _resize_image( - self, image, mode="bilinear" - ) -> Tuple[torch.Tensor, float, torch.Tensor]: - image_height, image_width = self.image_height, self.image_width - if image_height is None or image_width is None: - # skip the resizing - imre_ = torch.from_numpy(image) - return imre_, 1.0, torch.ones_like(imre_[:1]) - # takes numpy array, returns pytorch tensor - minscale = min( - image_height / image.shape[-2], - image_width / image.shape[-1], - ) - imre = torch.nn.functional.interpolate( - torch.from_numpy(image)[None], - scale_factor=minscale, - mode=mode, - align_corners=False if mode == "bilinear" else None, - recompute_scale_factor=True, - )[0] - # pyre-fixme[19]: Expected 1 positional argument. - imre_ = torch.zeros(image.shape[0], self.image_height, self.image_width) - imre_[:, 0 : imre.shape[1], 0 : imre.shape[2]] = imre - # pyre-fixme[6]: For 2nd param expected `int` but got `Optional[int]`. - # pyre-fixme[6]: For 3rd param expected `int` but got `Optional[int]`. - mask = torch.zeros(1, self.image_height, self.image_width) - mask[:, 0 : imre.shape[1], 0 : imre.shape[2]] = 1.0 - return imre_, minscale, mask - def _local_path(self, path: str) -> str: if self.path_manager is None: return path @@ -920,167 +689,5 @@ def _seq_name_to_seed(seq_name) -> int: return int(hashlib.sha1(seq_name.encode("utf-8")).hexdigest(), 16) -def _load_image(path) -> np.ndarray: - with Image.open(path) as pil_im: - im = np.array(pil_im.convert("RGB")) - im = im.transpose((2, 0, 1)) - im = im.astype(np.float32) / 255.0 - return im - - -def _load_16big_png_depth(depth_png) -> np.ndarray: - with Image.open(depth_png) as depth_pil: - # the image is stored with 16-bit depth but PIL reads it as I (32 bit). - # we cast it to uint16, then reinterpret as float16, then cast to float32 - depth = ( - np.frombuffer(np.array(depth_pil, dtype=np.uint16), dtype=np.float16) - .astype(np.float32) - .reshape((depth_pil.size[1], depth_pil.size[0])) - ) - return depth - - -def _load_1bit_png_mask(file: str) -> np.ndarray: - with Image.open(file) as pil_im: - mask = (np.array(pil_im.convert("L")) > 0.0).astype(np.float32) - return mask - - -def _load_depth_mask(path: str) -> np.ndarray: - if not path.lower().endswith(".png"): - raise ValueError('unsupported depth mask file name "%s"' % path) - m = _load_1bit_png_mask(path) - return m[None] # fake feature channel - - -def _load_depth(path, scale_adjustment) -> np.ndarray: - if not path.lower().endswith(".png"): - raise ValueError('unsupported depth file name "%s"' % path) - - d = _load_16big_png_depth(path) * scale_adjustment - d[~np.isfinite(d)] = 0.0 - return d[None] # fake feature channel - - -def _load_mask(path) -> np.ndarray: - with Image.open(path) as pil_im: - mask = np.array(pil_im) - mask = mask.astype(np.float32) / 255.0 - return mask[None] # fake feature channel - - -def _get_1d_bounds(arr) -> Tuple[int, int]: - nz = np.flatnonzero(arr) - return nz[0], nz[-1] + 1 - - -def _get_bbox_from_mask( - mask, thr, decrease_quant: float = 0.05 -) -> Tuple[int, int, int, int]: - # bbox in xywh - masks_for_box = np.zeros_like(mask) - while masks_for_box.sum() <= 1.0: - masks_for_box = (mask > thr).astype(np.float32) - thr -= decrease_quant - if thr <= 0.0: - warnings.warn(f"Empty masks_for_bbox (thr={thr}) => using full image.") - - x0, x1 = _get_1d_bounds(masks_for_box.sum(axis=-2)) - y0, y1 = _get_1d_bounds(masks_for_box.sum(axis=-1)) - - return x0, y0, x1 - x0, y1 - y0 - - -def _get_clamp_bbox( - bbox: torch.Tensor, - box_crop_context: float = 0.0, - image_path: str = "", -) -> torch.Tensor: - # box_crop_context: rate of expansion for bbox - # returns possibly expanded bbox xyxy as float - - bbox = bbox.clone() # do not edit bbox in place - - # increase box size - if box_crop_context > 0.0: - c = box_crop_context - bbox = bbox.float() - bbox[0] -= bbox[2] * c / 2 - bbox[1] -= bbox[3] * c / 2 - bbox[2] += bbox[2] * c - bbox[3] += bbox[3] * c - - if (bbox[2:] <= 1.0).any(): - raise ValueError( - f"squashed image {image_path}!! The bounding box contains no pixels." - ) - - bbox[2:] = torch.clamp(bbox[2:], 2) # set min height, width to 2 along both axes - bbox_xyxy = _bbox_xywh_to_xyxy(bbox, clamp_size=2) - - return bbox_xyxy - - -def _crop_around_box(tensor, bbox, impath: str = ""): - # bbox is xyxy, where the upper bound is corrected with +1 - bbox = _clamp_box_to_image_bounds_and_round( - bbox, - image_size_hw=tensor.shape[-2:], - ) - tensor = tensor[..., bbox[1] : bbox[3], bbox[0] : bbox[2]] - assert all(c > 0 for c in tensor.shape), f"squashed image {impath}" - return tensor - - -def _clamp_box_to_image_bounds_and_round( - bbox_xyxy: torch.Tensor, - image_size_hw: Tuple[int, int], -) -> torch.LongTensor: - bbox_xyxy = bbox_xyxy.clone() - bbox_xyxy[[0, 2]] = torch.clamp(bbox_xyxy[[0, 2]], 0, image_size_hw[-1]) - bbox_xyxy[[1, 3]] = torch.clamp(bbox_xyxy[[1, 3]], 0, image_size_hw[-2]) - if not isinstance(bbox_xyxy, torch.LongTensor): - bbox_xyxy = bbox_xyxy.round().long() - return bbox_xyxy # pyre-ignore [7] - - -def _rescale_bbox(bbox: torch.Tensor, orig_res, new_res) -> torch.Tensor: - assert bbox is not None - assert np.prod(orig_res) > 1e-8 - # average ratio of dimensions - rel_size = (new_res[0] / orig_res[0] + new_res[1] / orig_res[1]) / 2.0 - return bbox * rel_size - - -def _bbox_xyxy_to_xywh(xyxy: torch.Tensor) -> torch.Tensor: - wh = xyxy[2:] - xyxy[:2] - xywh = torch.cat([xyxy[:2], wh]) - return xywh - - -def _bbox_xywh_to_xyxy( - xywh: torch.Tensor, clamp_size: Optional[int] = None -) -> torch.Tensor: - xyxy = xywh.clone() - if clamp_size is not None: - xyxy[2:] = torch.clamp(xyxy[2:], clamp_size) - xyxy[2:] += xyxy[:2] - return xyxy - - def _safe_as_tensor(data, dtype): - if data is None: - return None - return torch.tensor(data, dtype=dtype) - - -# NOTE this cache is per-worker; they are implemented as processes. -# each batch is loaded and collated by a single worker; -# since sequences tend to co-occur within batches, this is useful. -@functools.lru_cache(maxsize=256) -def _load_pointcloud(pcl_path: Union[str, Path], max_points: int = 0) -> Pointclouds: - pcl = IO().load_pointcloud(pcl_path) - if max_points > 0: - pcl = pcl.subsample(max_points) - - return pcl + return torch.tensor(data, dtype=dtype) if data is not None else None diff --git a/pytorch3d/implicitron/dataset/load_blob.py b/pytorch3d/implicitron/dataset/load_blob.py new file mode 100644 index 000000000..b10fb1267 --- /dev/null +++ b/pytorch3d/implicitron/dataset/load_blob.py @@ -0,0 +1,542 @@ +import functools +import os +import warnings + +import numpy as np +from PIL import Image +import torch +from typing import Any, Optional, Tuple + +from pytorch3d.implicitron.dataset import types +from pytorch3d.implicitron.dataset.dataset_base import FrameData +from pytorch3d.io import IO + + +class BlobLoader: + """ + A loader for correctly (according to setup) loading blobs for FrameData + + Args: + dataset_root: The root folder of the dataset; all the paths in jsons are + specified relative to this root (but not json paths themselves). + load_images: Enable loading the frame RGB data. + load_depths: Enable loading the frame depth maps. + load_depth_masks: Enable loading the frame depth map masks denoting the + depth values used for evaluation (the points consistent across views). + load_masks: Enable loading frame foreground masks. + load_point_clouds: Enable loading sequence-level point clouds. + max_points: Cap on the number of loaded points in the point cloud; + if reached, they are randomly sampled without replacement. + mask_images: Whether to mask the images with the loaded foreground masks; + 0 value is used for background. + mask_depths: Whether to mask the depth maps with the loaded foreground + masks; 0 value is used for background. + image_height: The height of the returned images, masks, and depth maps; + aspect ratio is preserved during cropping/resizing. + image_width: The width of the returned images, masks, and depth maps; + aspect ratio is preserved during cropping/resizing. + box_crop: Enable cropping of the image around the bounding box inferred + from the foreground region of the loaded segmentation mask; masks + and depth maps are cropped accordingly; cameras are corrected. + box_crop_mask_thr: The threshold used to separate pixels into foreground + and background based on the foreground_probability mask; if no value + is greater than this threshold, the loader lowers it and repeats. + box_crop_context: The amount of additional padding added to each + dimension of the cropping bounding box, relative to box size. + """ + + path_manager: Any = None + + def __init__( + self, + dataset_root, + load_images, + load_depths, + load_depth_masks, + load_masks, + load_point_clouds, + max_points, + mask_images, + mask_depths, + image_height, + image_width, + box_crop, + box_crop_mask_thr, + box_crop_context, + ): + self.dataset_root = dataset_root + self.load_images = load_images + self.load_depths = load_depths + self.load_depth_masks = load_depth_masks + self.load_masks = load_masks + self.load_point_clouds = load_point_clouds + self.max_points = max_points + self.mask_images = mask_images + self.mask_depths = mask_depths + self.image_height = image_height + self.image_width = image_width + self.box_crop = box_crop + self.box_crop_mask_thr = box_crop_mask_thr + self.box_crop_context = box_crop_context + + def load( + self, + # pyre-ignore + frame_data: FrameData, + # pyre-ignore + entry: types.FrameAnnotation, + # pyre-ignore + point_cloud: types.PointCloudAnnotation, + ) -> FrameData: + """Main method for loader.""" + ( + frame_data.fg_probability, + frame_data.mask_path, + frame_data.bbox_xywh, + clamp_bbox_xyxy, + frame_data.crop_bbox_xywh, + ) = self._load_crop_fg_probability(entry) + + scale = 1.0 + if self.load_images and entry.image is not None: + # original image size + frame_data.image_size_hw = _safe_as_tensor(entry.image.size, torch.long) + + ( + frame_data.image_rgb, + frame_data.image_path, + frame_data.mask_crop, + scale, + ) = self._load_crop_images( + entry, frame_data.fg_probability, clamp_bbox_xyxy + ) + + if self.load_depths and entry.depth is not None: + ( + frame_data.depth_map, + frame_data.depth_path, + frame_data.depth_mask, + ) = self._load_mask_depth(entry, clamp_bbox_xyxy, frame_data.fg_probability) + + if entry.viewpoint is not None: + frame_data.camera = self._get_pytorch3d_camera( + entry, + scale, + clamp_bbox_xyxy, + ) + + if self.load_point_clouds and point_cloud is not None: + pcl_path = self._fix_point_cloud_path(point_cloud.path) + frame_data.sequence_point_cloud = _load_pointcloud( + self._local_path(pcl_path), max_points=self.max_points + ) + frame_data.sequence_point_cloud_path = pcl_path + return frame_data + + def _load_crop_fg_probability( + self, entry: types.FrameAnnotation + ) -> Tuple[ + Optional[torch.Tensor], + Optional[str], + Optional[torch.Tensor], + Optional[torch.Tensor], + Optional[torch.Tensor], + ]: + fg_probability = None + full_path = None + bbox_xywh = None + clamp_bbox_xyxy = None + crop_box_xywh = None + + if (self.load_masks or self.box_crop) and entry.mask is not None: + full_path = os.path.join(self.dataset_root, entry.mask.path) + mask = _load_mask(self._local_path(full_path)) + + if mask.shape[-2:] != entry.image.size: + raise ValueError( + f"bad mask size: {mask.shape[-2:]} vs {entry.image.size}!" + ) + + bbox_xywh = torch.tensor(_get_bbox_from_mask(mask, self.box_crop_mask_thr)) + + if self.box_crop: + clamp_bbox_xyxy = _clamp_box_to_image_bounds_and_round( + _get_clamp_bbox( + bbox_xywh, + image_path=entry.image.path, + box_crop_context=self.box_crop_context, + ), + image_size_hw=tuple(mask.shape[-2:]), + ) + crop_box_xywh = _bbox_xyxy_to_xywh(clamp_bbox_xyxy) + + mask = _crop_around_box(mask, clamp_bbox_xyxy, full_path) + + fg_probability, _, _ = self._resize_image(mask, mode="nearest") + + return fg_probability, full_path, bbox_xywh, clamp_bbox_xyxy, crop_box_xywh + + def _load_crop_images( + self, + entry: types.FrameAnnotation, + fg_probability: Optional[torch.Tensor], + clamp_bbox_xyxy: Optional[torch.Tensor], + ) -> Tuple[torch.Tensor, str, torch.Tensor, float]: + assert self.dataset_root is not None and entry.image is not None + path = os.path.join(self.dataset_root, entry.image.path) + image_rgb = _load_image(self._local_path(path)) + + if image_rgb.shape[-2:] != entry.image.size: + raise ValueError( + f"bad image size: {image_rgb.shape[-2:]} vs {entry.image.size}!" + ) + + if self.box_crop: + assert clamp_bbox_xyxy is not None + image_rgb = _crop_around_box(image_rgb, clamp_bbox_xyxy, path) + + image_rgb, scale, mask_crop = self._resize_image(image_rgb) + + if self.mask_images: + assert fg_probability is not None + image_rgb *= fg_probability + + return image_rgb, path, mask_crop, scale + + def _load_mask_depth( + self, + entry: types.FrameAnnotation, + clamp_bbox_xyxy: Optional[torch.Tensor], + fg_probability: Optional[torch.Tensor], + ) -> Tuple[torch.Tensor, str, torch.Tensor]: + entry_depth = entry.depth + assert entry_depth is not None + path = os.path.join(self.dataset_root, entry_depth.path) + depth_map = _load_depth(self._local_path(path), entry_depth.scale_adjustment) + + if self.box_crop: + assert clamp_bbox_xyxy is not None + depth_bbox_xyxy = _rescale_bbox( + clamp_bbox_xyxy, entry.image.size, depth_map.shape[-2:] + ) + depth_map = _crop_around_box(depth_map, depth_bbox_xyxy, path) + + depth_map, _, _ = self._resize_image(depth_map, mode="nearest") + + if self.mask_depths: + assert fg_probability is not None + depth_map *= fg_probability + + if self.load_depth_masks: + assert entry_depth.mask_path is not None + mask_path = os.path.join(self.dataset_root, entry_depth.mask_path) + depth_mask = _load_depth_mask(self._local_path(mask_path)) + + if self.box_crop: + assert clamp_bbox_xyxy is not None + depth_mask_bbox_xyxy = _rescale_bbox( + clamp_bbox_xyxy, entry.image.size, depth_mask.shape[-2:] + ) + depth_mask = _crop_around_box( + depth_mask, depth_mask_bbox_xyxy, mask_path + ) + + depth_mask, _, _ = self._resize_image(depth_mask, mode="nearest") + else: + depth_mask = torch.ones_like(depth_map) + + return depth_map, path, depth_mask + + def _get_pytorch3d_camera( + self, + entry: types.FrameAnnotation, + scale: float, + clamp_bbox_xyxy: Optional[torch.Tensor], + ) -> PerspectiveCameras: # pyre-ignore + entry_viewpoint = entry.viewpoint + assert entry_viewpoint is not None + # principal point and focal length + principal_point = torch.tensor( + entry_viewpoint.principal_point, dtype=torch.float + ) + focal_length = torch.tensor(entry_viewpoint.focal_length, dtype=torch.float) + + half_image_size_wh_orig = ( + torch.tensor(list(reversed(entry.image.size)), dtype=torch.float) / 2.0 + ) + + # first, we convert from the dataset's NDC convention to pixels + format = entry_viewpoint.intrinsics_format + if format.lower() == "ndc_norm_image_bounds": + # this is e.g. currently used in CO3D for storing intrinsics + rescale = half_image_size_wh_orig + elif format.lower() == "ndc_isotropic": + rescale = half_image_size_wh_orig.min() + else: + raise ValueError(f"Unknown intrinsics format: {format}") + + # principal point and focal length in pixels + principal_point_px = half_image_size_wh_orig - principal_point * rescale + focal_length_px = focal_length * rescale + if self.box_crop: + assert clamp_bbox_xyxy is not None + principal_point_px -= clamp_bbox_xyxy[:2] + + # now, convert from pixels to PyTorch3D v0.5+ NDC convention + if self.image_height is None or self.image_width is None: + out_size = list(reversed(entry.image.size)) + else: + out_size = [self.image_width, self.image_height] + + half_image_size_output = torch.tensor(out_size, dtype=torch.float) / 2.0 + half_min_image_size_output = half_image_size_output.min() + + # rescaled principal point and focal length in ndc + principal_point = ( + half_image_size_output - principal_point_px * scale + ) / half_min_image_size_output + focal_length = focal_length_px * scale / half_min_image_size_output + + return PerspectiveCameras( + focal_length=focal_length[None], + principal_point=principal_point[None], + R=torch.tensor(entry_viewpoint.R, dtype=torch.float)[None], + T=torch.tensor(entry_viewpoint.T, dtype=torch.float)[None], + ) + + def _fix_point_cloud_path(self, path: str) -> str: + """ + Fix up a point cloud path from the dataset. + Some files in Co3Dv2 have an accidental absolute path stored. + """ + unwanted_prefix = ( + "/large_experiments/p3/replay/datasets/co3d/co3d45k_220512/export_v23/" + ) + if path.startswith(unwanted_prefix): + path = path[len(unwanted_prefix) :] + return os.path.join(self.dataset_root, path) + + def _local_path(self, path: str) -> str: + if self.path_manager is None: + return path + return self.path_manager.get_local_path(path) + + def _resize_image( + self, image, mode="bilinear" + ) -> Tuple[torch.Tensor, float, torch.Tensor]: + image_height, image_width = self.image_height, self.image_width + if image_height is None or image_width is None: + # skip the resizing + imre_ = torch.from_numpy(image) + return imre_, 1.0, torch.ones_like(imre_[:1]) + # takes numpy array, returns pytorch tensor + minscale = min( + image_height / image.shape[-2], + image_width / image.shape[-1], + ) + imre = torch.nn.functional.interpolate( + torch.from_numpy(image)[None], + scale_factor=minscale, + mode=mode, + align_corners=False if mode == "bilinear" else None, + recompute_scale_factor=True, + )[0] + # pyre-fixme[19]: Expected 1 positional argument. + imre_ = torch.zeros(image.shape[0], self.image_height, self.image_width) + imre_[:, 0 : imre.shape[1], 0 : imre.shape[2]] = imre + mask = torch.zeros(1, self.image_height, self.image_width) + mask[:, 0 : imre.shape[1], 0 : imre.shape[2]] = 1.0 + return imre_, minscale, mask + + +def _load_image(path) -> np.ndarray: + with Image.open(path) as pil_im: + im = np.array(pil_im.convert("RGB")) + im = im.transpose((2, 0, 1)) + im = im.astype(np.float32) / 255.0 + return im + + +def _load_mask(path) -> np.ndarray: + with Image.open(path) as pil_im: + mask = np.array(pil_im) + mask = mask.astype(np.float32) / 255.0 + return mask[None] # fake feature channel + + +def _get_bbox_from_mask( + mask, thr, decrease_quant: float = 0.05 +) -> Tuple[int, int, int, int]: + # bbox in xywh + masks_for_box = np.zeros_like(mask) + while masks_for_box.sum() <= 1.0: + masks_for_box = (mask > thr).astype(np.float32) + thr -= decrease_quant + if thr <= 0.0: + warnings.warn(f"Empty masks_for_bbox (thr={thr}) => using full image.") + + x0, x1 = _get_1d_bounds(masks_for_box.sum(axis=-2)) + y0, y1 = _get_1d_bounds(masks_for_box.sum(axis=-1)) + + return x0, y0, x1 - x0, y1 - y0 + + +def _crop_around_box(tensor, bbox, impath: str = ""): + # bbox is xyxy, where the upper bound is corrected with +1 + bbox = _clamp_box_to_image_bounds_and_round( + bbox, + image_size_hw=tensor.shape[-2:], + ) + tensor = tensor[..., bbox[1] : bbox[3], bbox[0] : bbox[2]] + assert all(c > 0 for c in tensor.shape), f"squashed image {impath}" + return tensor + + +def _clamp_box_to_image_bounds_and_round( + bbox_xyxy: torch.Tensor, + image_size_hw: Tuple[int, int], +) -> torch.LongTensor: + bbox_xyxy = bbox_xyxy.clone() + bbox_xyxy[[0, 2]] = torch.clamp(bbox_xyxy[[0, 2]], 0, image_size_hw[-1]) + bbox_xyxy[[1, 3]] = torch.clamp(bbox_xyxy[[1, 3]], 0, image_size_hw[-2]) + if not isinstance(bbox_xyxy, torch.LongTensor): + bbox_xyxy = bbox_xyxy.round().long() + return bbox_xyxy # pyre-ignore [7] + + +def _get_clamp_bbox( + bbox: torch.Tensor, + box_crop_context: float = 0.0, + image_path: str = "", +) -> torch.Tensor: + # box_crop_context: rate of expansion for bbox + # returns possibly expanded bbox xyxy as float + + bbox = bbox.clone() # do not edit bbox in place + + # increase box size + if box_crop_context > 0.0: + c = box_crop_context + bbox = bbox.float() + bbox[0] -= bbox[2] * c / 2 + bbox[1] -= bbox[3] * c / 2 + bbox[2] += bbox[2] * c + bbox[3] += bbox[3] * c + + if (bbox[2:] <= 1.0).any(): + raise ValueError( + f"squashed image {image_path}!! The bounding box contains no pixels." + ) + + bbox[2:] = torch.clamp(bbox[2:], 2) # set min height, width to 2 along both axes + bbox_xyxy = _bbox_xywh_to_xyxy(bbox, clamp_size=2) + + return bbox_xyxy + + +def _bbox_xyxy_to_xywh(xyxy: torch.Tensor) -> torch.Tensor: + wh = xyxy[2:] - xyxy[:2] + xywh = torch.cat([xyxy[:2], wh]) + return xywh + + +def _resize_image( + self, image, mode="bilinear" +) -> Tuple[torch.Tensor, float, torch.Tensor]: + image_height, image_width = self.image_height, self.image_width + if image_height is None or image_width is None: + # skip the resizing + imre_ = torch.from_numpy(image) + return imre_, 1.0, torch.ones_like(imre_[:1]) + # takes numpy array, returns pytorch tensor + minscale = min( + image_height / image.shape[-2], + image_width / image.shape[-1], + ) + imre = torch.nn.functional.interpolate( + torch.from_numpy(image)[None], + scale_factor=minscale, + mode=mode, + align_corners=False if mode == "bilinear" else None, + recompute_scale_factor=True, + )[0] + # pyre-fixme[19]: Expected 1 positional argument. + imre_ = torch.zeros(image.shape[0], self.image_height, self.image_width) + imre_[:, 0 : imre.shape[1], 0 : imre.shape[2]] = imre + mask = torch.zeros(1, self.image_height, self.image_width) + mask[:, 0 : imre.shape[1], 0 : imre.shape[2]] = 1.0 + return imre_, minscale, mask + + +def _load_depth(path, scale_adjustment) -> np.ndarray: + if not path.lower().endswith(".png"): + raise ValueError('unsupported depth file name "%s"' % path) + + d = _load_16big_png_depth(path) * scale_adjustment + d[~np.isfinite(d)] = 0.0 + return d[None] # fake feature channel + + +def _load_16big_png_depth(depth_png) -> np.ndarray: + with Image.open(depth_png) as depth_pil: + # the image is stored with 16-bit depth but PIL reads it as I (32 bit). + # we cast it to uint16, then reinterpret as float16, then cast to float32 + depth = ( + np.frombuffer(np.array(depth_pil, dtype=np.uint16), dtype=np.float16) + .astype(np.float32) + .reshape((depth_pil.size[1], depth_pil.size[0])) + ) + return depth + + +def _rescale_bbox(bbox: torch.Tensor, orig_res, new_res) -> torch.Tensor: + assert bbox is not None + assert np.prod(orig_res) > 1e-8 + # average ratio of dimensions + rel_size = (new_res[0] / orig_res[0] + new_res[1] / orig_res[1]) / 2.0 + return bbox * rel_size + + +def _load_1bit_png_mask(file: str) -> np.ndarray: + with Image.open(file) as pil_im: + mask = (np.array(pil_im.convert("L")) > 0.0).astype(np.float32) + return mask + + +def _load_depth_mask(path: str) -> np.ndarray: + if not path.lower().endswith(".png"): + raise ValueError('unsupported depth mask file name "%s"' % path) + m = _load_1bit_png_mask(path) + return m[None] # fake feature channel + + +def _get_1d_bounds(arr) -> Tuple[int, int]: + nz = np.flatnonzero(arr) + return nz[0], nz[-1] + 1 + + +def _bbox_xywh_to_xyxy( + xywh: torch.Tensor, clamp_size: Optional[int] = None +) -> torch.Tensor: + xyxy = xywh.clone() + if clamp_size is not None: + xyxy[2:] = torch.clamp(xyxy[2:], clamp_size) + xyxy[2:] += xyxy[:2] + return xyxy + + +def _safe_as_tensor(data, dtype): + return torch.tensor(data, dtype=dtype) if data is not None else None + + +# NOTE this cache is per-worker; they are implemented as processes. +# each batch is loaded and collated by a single worker; +# since sequences tend to co-occur within batches, this is useful. +@functools.lru_cache(maxsize=256) +# pyre-ignore +def _load_pointcloud(pcl_path: Union[str, Path], max_points: int = 0) -> Pointclouds: + pcl = IO().load_pointcloud(pcl_path) + if max_points > 0: + pcl = pcl.subsample(max_points) + + return pcl From f745dfc941e9c5ed3e10e0a2664236b3124b3770 Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Tue, 28 Feb 2023 15:39:42 +0000 Subject: [PATCH 02/43] added type hints and deleted chore pyre-ignore --- .../implicitron/dataset/json_index_dataset.py | 45 ++++++++----------- pytorch3d/implicitron/dataset/load_blob.py | 30 ++++++------- 2 files changed, 32 insertions(+), 43 deletions(-) diff --git a/pytorch3d/implicitron/dataset/json_index_dataset.py b/pytorch3d/implicitron/dataset/json_index_dataset.py index ac9daf02a..9bec154c3 100644 --- a/pytorch3d/implicitron/dataset/json_index_dataset.py +++ b/pytorch3d/implicitron/dataset/json_index_dataset.py @@ -52,7 +52,6 @@ class FrameAnnotsEntry(TypedDict): subset: Optional[str] - # pyre-ignore frame_annotation: types.FrameAnnotation else: @@ -60,7 +59,6 @@ class FrameAnnotsEntry(TypedDict): @registry.register -# pyre-ignore class JsonIndexDataset(DatasetBase, ReplaceableBase): """ A dataset with annotations in json files like the Common Objects in 3D @@ -131,7 +129,6 @@ class JsonIndexDataset(DatasetBase, ReplaceableBase): frame_annotations_type: ClassVar[ Type[types.FrameAnnotation] - # pyre-ignore ] = types.FrameAnnotation path_manager: Any = None @@ -164,7 +161,7 @@ class JsonIndexDataset(DatasetBase, ReplaceableBase): sort_frames: bool = False eval_batches: Any = None eval_batch_index: Any = None - loader: BlobLoader + blob_loader: BlobLoader # frame_annots: List[FrameAnnotsEntry] = field(init=False) # seq_annots: Dict[str, types.SequenceAnnotation] = field(init=False) @@ -179,21 +176,21 @@ def __post_init__(self) -> None: self._filter_db() # also computes sequence indices self._extract_and_set_eval_batches() - self.loader = BlobLoader( - self.dataset_root, - self.load_images, - self.load_depths, - self.load_depth_masks, - self.load_masks, - self.load_point_clouds, - self.max_points, - self.mask_images, - self.mask_depths, - self.image_height, - self.image_width, - self.box_crop, - self.box_crop_mask_thr, - self.box_crop_context, + self.blob_loader = BlobLoader( + dataset_root = self.dataset_root, + load_images = self.load_images, + load_depths = self.load_depths, + load_depth_masks = self.load_depth_masks, + load_masks = self.load_masks, + load_point_clouds = self.load_point_clouds, + max_points = self.max_points, + mask_images = self.mask_images, + mask_depths = self.mask_depths, + image_height = self.image_height, + image_width = self.image_width, + box_crop = self.box_crop, + box_crop_mask_thr = self.box_crop_mask_thr, + box_crop_context = self.box_crop_context, ) logger.info(str(self)) @@ -415,7 +412,6 @@ def __len__(self) -> int: def _get_frame_type(self, entry: FrameAnnotsEntry) -> Optional[str]: return entry["subset"] - # pyre-ignore def get_all_train_cameras(self) -> CamerasBase: """ Returns the cameras corresponding to all the known frames. @@ -431,7 +427,6 @@ def get_all_train_cameras(self) -> CamerasBase: cameras.append(self[frame_idx].camera) return join_cameras_as_batch(cameras) - # pyre-ignore def __getitem__(self, index) -> FrameData: # pyre-ignore[16] if index >= len(self.frame_annots): @@ -456,17 +451,14 @@ def __getitem__(self, index) -> FrameData: else None, ) - # The rest of the fields are optional + # Optional field frame_data.frame_type = self._get_frame_type(self.frame_annots[index]) - - frame_data = self.loader.load(frame_data, entry, point_cloud) - return frame_data + return self.blob_loader.load(frame_data, entry, point_cloud) def _load_frames(self) -> None: logger.info(f"Loading Co3D frames from {self.frame_annotations_file}.") local_file = self._local_path(self.frame_annotations_file) with gzip.open(local_file, "rt", encoding="utf8") as zipfile: - # pyre-ignore frame_annots_list = types.load_dataclass( zipfile, List[self.frame_annotations_type] ) @@ -481,7 +473,6 @@ def _load_sequences(self) -> None: logger.info(f"Loading Co3D sequences from {self.sequence_annotations_file}.") local_file = self._local_path(self.sequence_annotations_file) with gzip.open(local_file, "rt", encoding="utf8") as zipfile: - # pyre-ignore seq_annots = types.load_dataclass(zipfile, List[types.SequenceAnnotation]) if not seq_annots: raise ValueError("Empty sequences file!") diff --git a/pytorch3d/implicitron/dataset/load_blob.py b/pytorch3d/implicitron/dataset/load_blob.py index b10fb1267..905351896 100644 --- a/pytorch3d/implicitron/dataset/load_blob.py +++ b/pytorch3d/implicitron/dataset/load_blob.py @@ -44,7 +44,6 @@ class BlobLoader: box_crop_context: The amount of additional padding added to each dimension of the cropping bounding box, relative to box size. """ - path_manager: Any = None def __init__( @@ -64,20 +63,20 @@ def __init__( box_crop_mask_thr, box_crop_context, ): - self.dataset_root = dataset_root - self.load_images = load_images - self.load_depths = load_depths - self.load_depth_masks = load_depth_masks - self.load_masks = load_masks - self.load_point_clouds = load_point_clouds - self.max_points = max_points - self.mask_images = mask_images - self.mask_depths = mask_depths - self.image_height = image_height - self.image_width = image_width - self.box_crop = box_crop - self.box_crop_mask_thr = box_crop_mask_thr - self.box_crop_context = box_crop_context + self.dataset_root: str = dataset_root + self.load_images: bool = load_images + self.load_depths: bool = load_depths + self.load_depth_masks: bool = load_depth_masks + self.load_masks: bool = load_masks + self.load_point_clouds: bool = load_point_clouds + self.max_points: int = max_points + self.mask_images: bool = mask_images + self.mask_depths: bool = mask_depths + self.image_height: int = image_height + self.image_width: int = image_width + self.box_crop: bool = box_crop + self.box_crop_mask_thr: float = box_crop_mask_thr + self.box_crop_context: float = box_crop_context def load( self, @@ -341,7 +340,6 @@ def _resize_image( align_corners=False if mode == "bilinear" else None, recompute_scale_factor=True, )[0] - # pyre-fixme[19]: Expected 1 positional argument. imre_ = torch.zeros(image.shape[0], self.image_height, self.image_width) imre_[:, 0 : imre.shape[1], 0 : imre.shape[2]] = imre mask = torch.zeros(1, self.image_height, self.image_width) From c3c5110364ae1d7e42ba63a97223d3410926d587 Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Tue, 28 Feb 2023 16:16:37 +0000 Subject: [PATCH 03/43] linter --- .../implicitron/dataset/json_index_dataset.py | 36 +++++++++---------- pytorch3d/implicitron/dataset/load_blob.py | 10 ++++-- 2 files changed, 24 insertions(+), 22 deletions(-) diff --git a/pytorch3d/implicitron/dataset/json_index_dataset.py b/pytorch3d/implicitron/dataset/json_index_dataset.py index 9bec154c3..0d5aa1796 100644 --- a/pytorch3d/implicitron/dataset/json_index_dataset.py +++ b/pytorch3d/implicitron/dataset/json_index_dataset.py @@ -15,7 +15,6 @@ import warnings from collections import defaultdict from itertools import islice -from pathlib import Path from typing import ( Any, ClassVar, @@ -30,18 +29,17 @@ Union, ) -import numpy as np import torch from tqdm import tqdm -from pytorch3d.implicitron.tools.config import registry, ReplaceableBase from pytorch3d.implicitron.dataset import types from pytorch3d.implicitron.dataset.dataset_base import DatasetBase, FrameData from pytorch3d.implicitron.dataset.load_blob import BlobLoader from pytorch3d.implicitron.dataset.utils import is_known_frame_scalar + +from pytorch3d.implicitron.tools.config import registry, ReplaceableBase from pytorch3d.renderer.camera_utils import join_cameras_as_batch -from pytorch3d.renderer.cameras import CamerasBase, PerspectiveCameras -from pytorch3d.structures.pointclouds import Pointclouds +from pytorch3d.renderer.cameras import CamerasBase logger = logging.getLogger(__name__) @@ -177,20 +175,20 @@ def __post_init__(self) -> None: self._extract_and_set_eval_batches() self.blob_loader = BlobLoader( - dataset_root = self.dataset_root, - load_images = self.load_images, - load_depths = self.load_depths, - load_depth_masks = self.load_depth_masks, - load_masks = self.load_masks, - load_point_clouds = self.load_point_clouds, - max_points = self.max_points, - mask_images = self.mask_images, - mask_depths = self.mask_depths, - image_height = self.image_height, - image_width = self.image_width, - box_crop = self.box_crop, - box_crop_mask_thr = self.box_crop_mask_thr, - box_crop_context = self.box_crop_context, + dataset_root=self.dataset_root, + load_images=self.load_images, + load_depths=self.load_depths, + load_depth_masks=self.load_depth_masks, + load_masks=self.load_masks, + load_point_clouds=self.load_point_clouds, + max_points=self.max_points, + mask_images=self.mask_images, + mask_depths=self.mask_depths, + image_height=self.image_height, + image_width=self.image_width, + box_crop=self.box_crop, + box_crop_mask_thr=self.box_crop_mask_thr, + box_crop_context=self.box_crop_context, ) logger.info(str(self)) diff --git a/pytorch3d/implicitron/dataset/load_blob.py b/pytorch3d/implicitron/dataset/load_blob.py index 905351896..2d6d2d220 100644 --- a/pytorch3d/implicitron/dataset/load_blob.py +++ b/pytorch3d/implicitron/dataset/load_blob.py @@ -1,15 +1,18 @@ import functools import os import warnings +from pathlib import Path +from typing import Any, Optional, Tuple, Union import numpy as np -from PIL import Image import torch -from typing import Any, Optional, Tuple +from PIL import Image from pytorch3d.implicitron.dataset import types from pytorch3d.implicitron.dataset.dataset_base import FrameData from pytorch3d.io import IO +from pytorch3d.renderer.cameras import PerspectiveCameras +from pytorch3d.structures.pointclouds import Pointclouds class BlobLoader: @@ -44,6 +47,7 @@ class BlobLoader: box_crop_context: The amount of additional padding added to each dimension of the cropping bounding box, relative to box size. """ + path_manager: Any = None def __init__( @@ -371,7 +375,7 @@ def _get_bbox_from_mask( masks_for_box = (mask > thr).astype(np.float32) thr -= decrease_quant if thr <= 0.0: - warnings.warn(f"Empty masks_for_bbox (thr={thr}) => using full image.") + warnings.warn(f"Empty masks_for_bbox (thr={thr}) => using full image.", stacklevel=1) x0, x1 = _get_1d_bounds(masks_for_box.sum(axis=-2)) y0, y1 = _get_1d_bounds(masks_for_box.sum(axis=-1)) From 9b431bd5698050bfc5574881a569f2fb9cab5be7 Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Tue, 28 Feb 2023 16:18:11 +0000 Subject: [PATCH 04/43] linter --- pytorch3d/implicitron/dataset/load_blob.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pytorch3d/implicitron/dataset/load_blob.py b/pytorch3d/implicitron/dataset/load_blob.py index 2d6d2d220..0cad7b4f1 100644 --- a/pytorch3d/implicitron/dataset/load_blob.py +++ b/pytorch3d/implicitron/dataset/load_blob.py @@ -375,7 +375,9 @@ def _get_bbox_from_mask( masks_for_box = (mask > thr).astype(np.float32) thr -= decrease_quant if thr <= 0.0: - warnings.warn(f"Empty masks_for_bbox (thr={thr}) => using full image.", stacklevel=1) + warnings.warn( + f"Empty masks_for_bbox (thr={thr}) => using full image.", stacklevel=1 + ) x0, x1 = _get_1d_bounds(masks_for_box.sum(axis=-2)) y0, y1 = _get_1d_bounds(masks_for_box.sum(axis=-1)) From 627e60fb4cf989c7ce0a75b1cb198cd5f99a027a Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Tue, 28 Feb 2023 17:02:36 +0000 Subject: [PATCH 05/43] deleted chore pyre-ignore --- pytorch3d/implicitron/dataset/load_blob.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pytorch3d/implicitron/dataset/load_blob.py b/pytorch3d/implicitron/dataset/load_blob.py index 0cad7b4f1..9193a147d 100644 --- a/pytorch3d/implicitron/dataset/load_blob.py +++ b/pytorch3d/implicitron/dataset/load_blob.py @@ -537,7 +537,6 @@ def _safe_as_tensor(data, dtype): # each batch is loaded and collated by a single worker; # since sequences tend to co-occur within batches, this is useful. @functools.lru_cache(maxsize=256) -# pyre-ignore def _load_pointcloud(pcl_path: Union[str, Path], max_points: int = 0) -> Pointclouds: pcl = IO().load_pointcloud(pcl_path) if max_points > 0: From 0aa27a6488afe16dbda6b667a34e802a627f2b77 Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Wed, 1 Mar 2023 09:49:02 +0000 Subject: [PATCH 06/43] renamed load_blob to blob_loader --- pytorch3d/implicitron/dataset/{load_blob.py => blob_loader.py} | 0 pytorch3d/implicitron/dataset/json_index_dataset.py | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename pytorch3d/implicitron/dataset/{load_blob.py => blob_loader.py} (100%) diff --git a/pytorch3d/implicitron/dataset/load_blob.py b/pytorch3d/implicitron/dataset/blob_loader.py similarity index 100% rename from pytorch3d/implicitron/dataset/load_blob.py rename to pytorch3d/implicitron/dataset/blob_loader.py diff --git a/pytorch3d/implicitron/dataset/json_index_dataset.py b/pytorch3d/implicitron/dataset/json_index_dataset.py index 0d5aa1796..2ad041bf7 100644 --- a/pytorch3d/implicitron/dataset/json_index_dataset.py +++ b/pytorch3d/implicitron/dataset/json_index_dataset.py @@ -34,7 +34,7 @@ from pytorch3d.implicitron.dataset import types from pytorch3d.implicitron.dataset.dataset_base import DatasetBase, FrameData -from pytorch3d.implicitron.dataset.load_blob import BlobLoader +from pytorch3d.implicitron.dataset.blob_loader import BlobLoader from pytorch3d.implicitron.dataset.utils import is_known_frame_scalar from pytorch3d.implicitron.tools.config import registry, ReplaceableBase From 53823cf6d330af23046ec66a5ce52c17a0c038ec Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Wed, 1 Mar 2023 09:56:19 +0000 Subject: [PATCH 07/43] sending to BlobLoader whore seq_annotation --- pytorch3d/implicitron/dataset/blob_loader.py | 6 +++--- pytorch3d/implicitron/dataset/json_index_dataset.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pytorch3d/implicitron/dataset/blob_loader.py b/pytorch3d/implicitron/dataset/blob_loader.py index 9193a147d..3c624a2ce 100644 --- a/pytorch3d/implicitron/dataset/blob_loader.py +++ b/pytorch3d/implicitron/dataset/blob_loader.py @@ -89,7 +89,7 @@ def load( # pyre-ignore entry: types.FrameAnnotation, # pyre-ignore - point_cloud: types.PointCloudAnnotation, + seq_annotation: types.SequenceAnnotation, ) -> FrameData: """Main method for loader.""" ( @@ -128,8 +128,8 @@ def load( clamp_bbox_xyxy, ) - if self.load_point_clouds and point_cloud is not None: - pcl_path = self._fix_point_cloud_path(point_cloud.path) + if self.load_point_clouds and seq_annotation.point_cloud is not None: + pcl_path = self._fix_point_cloud_path(seq_annotation.point_cloud.path) frame_data.sequence_point_cloud = _load_pointcloud( self._local_path(pcl_path), max_points=self.max_points ) diff --git a/pytorch3d/implicitron/dataset/json_index_dataset.py b/pytorch3d/implicitron/dataset/json_index_dataset.py index 2ad041bf7..0ceb7dec0 100644 --- a/pytorch3d/implicitron/dataset/json_index_dataset.py +++ b/pytorch3d/implicitron/dataset/json_index_dataset.py @@ -451,7 +451,7 @@ def __getitem__(self, index) -> FrameData: # Optional field frame_data.frame_type = self._get_frame_type(self.frame_annots[index]) - return self.blob_loader.load(frame_data, entry, point_cloud) + return self.blob_loader.load(frame_data, entry, self.seq_annots[entry.sequence_name]) def _load_frames(self) -> None: logger.info(f"Loading Co3D frames from {self.frame_annotations_file}.") From d6f13eb629d6607ebd60bfc1e026027309402fc9 Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Wed, 1 Mar 2023 10:00:09 +0000 Subject: [PATCH 08/43] made blob_loader dataclass to avoid boilerplate --- pytorch3d/implicitron/dataset/blob_loader.py | 48 +++++++------------- 1 file changed, 16 insertions(+), 32 deletions(-) diff --git a/pytorch3d/implicitron/dataset/blob_loader.py b/pytorch3d/implicitron/dataset/blob_loader.py index 3c624a2ce..29c41837f 100644 --- a/pytorch3d/implicitron/dataset/blob_loader.py +++ b/pytorch3d/implicitron/dataset/blob_loader.py @@ -1,6 +1,7 @@ import functools import os import warnings +from dataclasses import dataclass from pathlib import Path from typing import Any, Optional, Tuple, Union @@ -15,6 +16,7 @@ from pytorch3d.structures.pointclouds import Pointclouds +@dataclass class BlobLoader: """ A loader for correctly (according to setup) loading blobs for FrameData @@ -48,40 +50,22 @@ class BlobLoader: dimension of the cropping bounding box, relative to box size. """ + dataset_root: str + load_images: bool + load_depths: bool + load_depth_masks: bool + load_masks: bool + load_point_clouds: bool + max_points: int + mask_images: bool + mask_depths: bool + image_height: int + image_width: int + box_crop: bool + box_crop_mask_thr: float + box_crop_context: float path_manager: Any = None - def __init__( - self, - dataset_root, - load_images, - load_depths, - load_depth_masks, - load_masks, - load_point_clouds, - max_points, - mask_images, - mask_depths, - image_height, - image_width, - box_crop, - box_crop_mask_thr, - box_crop_context, - ): - self.dataset_root: str = dataset_root - self.load_images: bool = load_images - self.load_depths: bool = load_depths - self.load_depth_masks: bool = load_depth_masks - self.load_masks: bool = load_masks - self.load_point_clouds: bool = load_point_clouds - self.max_points: int = max_points - self.mask_images: bool = mask_images - self.mask_depths: bool = mask_depths - self.image_height: int = image_height - self.image_width: int = image_width - self.box_crop: bool = box_crop - self.box_crop_mask_thr: float = box_crop_mask_thr - self.box_crop_context: float = box_crop_context - def load( self, # pyre-ignore From 86e64f77fb89b10acd51576620aeda709bd0505c Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Wed, 1 Mar 2023 10:02:21 +0000 Subject: [PATCH 09/43] documented, that FrameData modification done inplace --- pytorch3d/implicitron/dataset/blob_loader.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pytorch3d/implicitron/dataset/blob_loader.py b/pytorch3d/implicitron/dataset/blob_loader.py index 29c41837f..48578927d 100644 --- a/pytorch3d/implicitron/dataset/blob_loader.py +++ b/pytorch3d/implicitron/dataset/blob_loader.py @@ -19,7 +19,8 @@ @dataclass class BlobLoader: """ - A loader for correctly (according to setup) loading blobs for FrameData + A loader for correctly (according to setup) loading blobs for FrameData. + Beware that modification done in place Args: dataset_root: The root folder of the dataset; all the paths in jsons are @@ -75,7 +76,9 @@ def load( # pyre-ignore seq_annotation: types.SequenceAnnotation, ) -> FrameData: - """Main method for loader.""" + """Main method for loader. + FrameData modification done inplace + """ ( frame_data.fg_probability, frame_data.mask_path, From 2f1704939fb1795e7ad2e0eca1b18fb30d12fba4 Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Wed, 1 Mar 2023 10:19:14 +0000 Subject: [PATCH 10/43] spliited JsonIndexDataset args to 2 gorups: Matadata-related and Blob-loading --- .../implicitron/dataset/json_index_dataset.py | 37 ++++++++++--------- 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/pytorch3d/implicitron/dataset/json_index_dataset.py b/pytorch3d/implicitron/dataset/json_index_dataset.py index 0ceb7dec0..671161680 100644 --- a/pytorch3d/implicitron/dataset/json_index_dataset.py +++ b/pytorch3d/implicitron/dataset/json_index_dataset.py @@ -62,7 +62,7 @@ class JsonIndexDataset(DatasetBase, ReplaceableBase): A dataset with annotations in json files like the Common Objects in 3D (CO3D) dataset. - Args: + Metadata-related args:: frame_annotations_file: A zipped json file containing metadata of the frames in the dataset, serialized List[types.FrameAnnotation]. sequence_annotations_file: A zipped json file containing metadata of the @@ -80,6 +80,24 @@ class JsonIndexDataset(DatasetBase, ReplaceableBase): pick_sequence: A list of sequence names to restrict the dataset to. exclude_sequence: A list of the names of the sequences to exclude. limit_category_to: Restrict the dataset to the given list of categories. + remove_empty_masks: Removes the frames with no active foreground pixels + in the segmentation mask after thresholding (see box_crop_mask_thr). + n_frames_per_sequence: If > 0, randomly samples #n_frames_per_sequence + frames in each sequences uniformly without replacement if it has + more frames than that; applied before other frame-level filters. + seed: The seed of the random generator sampling #n_frames_per_sequence + random frames per sequence. + sort_frames: Enable frame annotations sorting to group frames from the + same sequences together and order them by timestamps + eval_batches: A list of batches that form the evaluation set; + list of batch-sized lists of indices corresponding to __getitem__ + of this class, thus it can be used directly as a batch sampler. + eval_batch_index: + ( Optional[List[List[Union[Tuple[str, int, str], Tuple[str, int]]]] ) + A list of batches of frames described as (sequence_name, frame_idx) + that can form the evaluation set, `eval_batches` will be set from this. + + Blob-loading parameters: dataset_root: The root folder of the dataset; all the paths in jsons are specified relative to this root (but not json paths themselves). load_images: Enable loading the frame RGB data. @@ -106,23 +124,6 @@ class JsonIndexDataset(DatasetBase, ReplaceableBase): is greater than this threshold, the loader lowers it and repeats. box_crop_context: The amount of additional padding added to each dimension of the cropping bounding box, relative to box size. - remove_empty_masks: Removes the frames with no active foreground pixels - in the segmentation mask after thresholding (see box_crop_mask_thr). - n_frames_per_sequence: If > 0, randomly samples #n_frames_per_sequence - frames in each sequences uniformly without replacement if it has - more frames than that; applied before other frame-level filters. - seed: The seed of the random generator sampling #n_frames_per_sequence - random frames per sequence. - sort_frames: Enable frame annotations sorting to group frames from the - same sequences together and order them by timestamps - eval_batches: A list of batches that form the evaluation set; - list of batch-sized lists of indices corresponding to __getitem__ - of this class, thus it can be used directly as a batch sampler. - eval_batch_index: - ( Optional[List[List[Union[Tuple[str, int, str], Tuple[str, int]]]] ) - A list of batches of frames described as (sequence_name, frame_idx) - that can form the evaluation set, `eval_batches` will be set from this. - """ frame_annotations_type: ClassVar[ From 527ec098e44c15f1386b607d34c7b9e760528813 Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Wed, 1 Mar 2023 12:08:46 +0000 Subject: [PATCH 11/43] code refactoring to delete chore pyre-ignore --- pytorch3d/implicitron/dataset/blob_loader.py | 19 +++--- .../implicitron/dataset/json_index_dataset.py | 58 ++++++------------- pytorch3d/implicitron/dataset/visualize.py | 1 - 3 files changed, 29 insertions(+), 49 deletions(-) diff --git a/pytorch3d/implicitron/dataset/blob_loader.py b/pytorch3d/implicitron/dataset/blob_loader.py index 48578927d..fce26b255 100644 --- a/pytorch3d/implicitron/dataset/blob_loader.py +++ b/pytorch3d/implicitron/dataset/blob_loader.py @@ -1,3 +1,9 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + import functools import os import warnings @@ -60,8 +66,8 @@ class BlobLoader: max_points: int mask_images: bool mask_depths: bool - image_height: int - image_width: int + image_height: Optional[int] + image_width: Optional[int] box_crop: bool box_crop_mask_thr: float box_crop_context: float @@ -69,11 +75,8 @@ class BlobLoader: def load( self, - # pyre-ignore frame_data: FrameData, - # pyre-ignore entry: types.FrameAnnotation, - # pyre-ignore seq_annotation: types.SequenceAnnotation, ) -> FrameData: """Main method for loader. @@ -242,7 +245,7 @@ def _get_pytorch3d_camera( entry: types.FrameAnnotation, scale: float, clamp_bbox_xyxy: Optional[torch.Tensor], - ) -> PerspectiveCameras: # pyre-ignore + ) -> PerspectiveCameras: entry_viewpoint = entry.viewpoint assert entry_viewpoint is not None # principal point and focal length @@ -331,9 +334,9 @@ def _resize_image( align_corners=False if mode == "bilinear" else None, recompute_scale_factor=True, )[0] - imre_ = torch.zeros(image.shape[0], self.image_height, self.image_width) + imre_ = torch.zeros(image.shape[0], image_height, image_width) imre_[:, 0 : imre.shape[1], 0 : imre.shape[2]] = imre - mask = torch.zeros(1, self.image_height, self.image_width) + mask = torch.zeros(1, image_height, image_width) mask[:, 0 : imre.shape[1], 0 : imre.shape[2]] = 1.0 return imre_, minscale, mask diff --git a/pytorch3d/implicitron/dataset/json_index_dataset.py b/pytorch3d/implicitron/dataset/json_index_dataset.py index 671161680..cf63b9b43 100644 --- a/pytorch3d/implicitron/dataset/json_index_dataset.py +++ b/pytorch3d/implicitron/dataset/json_index_dataset.py @@ -14,6 +14,7 @@ import random import warnings from collections import defaultdict +from dataclasses import field from itertools import islice from typing import ( Any, @@ -30,16 +31,16 @@ ) import torch -from tqdm import tqdm from pytorch3d.implicitron.dataset import types -from pytorch3d.implicitron.dataset.dataset_base import DatasetBase, FrameData from pytorch3d.implicitron.dataset.blob_loader import BlobLoader +from pytorch3d.implicitron.dataset.dataset_base import DatasetBase, FrameData from pytorch3d.implicitron.dataset.utils import is_known_frame_scalar from pytorch3d.implicitron.tools.config import registry, ReplaceableBase from pytorch3d.renderer.camera_utils import join_cameras_as_batch from pytorch3d.renderer.cameras import CamerasBase +from tqdm import tqdm logger = logging.getLogger(__name__) @@ -160,13 +161,14 @@ class JsonIndexDataset(DatasetBase, ReplaceableBase): sort_frames: bool = False eval_batches: Any = None eval_batch_index: Any = None - blob_loader: BlobLoader - # frame_annots: List[FrameAnnotsEntry] = field(init=False) - # seq_annots: Dict[str, types.SequenceAnnotation] = field(init=False) + subset_to_image_path: Any = None + # initialised in __post_init__ + blob_loader: BlobLoader = field(init=False) + frame_annots: List[FrameAnnotsEntry] = field(init=False) + seq_annots: Dict[str, types.SequenceAnnotation] = field(init=False) + _seq_to_idx: Dict[str, List[int]] = field(init=False) def __post_init__(self) -> None: - # pyre-fixme[16]: `JsonIndexDataset` has no attribute `subset_to_image_path`. - self.subset_to_image_path = None self._load_frames() self._load_sequences() if self.sort_frames: @@ -206,7 +208,8 @@ def _extract_and_set_eval_batches(self): self.eval_batch_index ) - def join(self, other_datasets: Iterable[DatasetBase]) -> None: + # pyre-ignore + def join(self, other_datasets: Iterable["JsonIndexDataset"]) -> None: """ Join the dataset with other JsonIndexDataset objects. @@ -216,9 +219,7 @@ def join(self, other_datasets: Iterable[DatasetBase]) -> None: """ if not all(isinstance(d, JsonIndexDataset) for d in other_datasets): raise ValueError("This function can only join a list of JsonIndexDataset") - # pyre-ignore[16] self.frame_annots.extend([fa for d in other_datasets for fa in d.frame_annots]) - # pyre-ignore[16] self.seq_annots.update( # https://gist.github.com/treyhunner/f35292e676efa0be1728 functools.reduce( @@ -266,7 +267,7 @@ def seq_frame_index_to_dataset_index( allow_missing_indices: bool = False, remove_missing_indices: bool = False, suppress_missing_index_warning: bool = True, - ) -> List[List[Union[Optional[int], int]]]: + ) -> Union[List[List[Optional[int]]], List[List[int]]]: """ Obtain indices into the dataset object given a list of frame ids. @@ -294,11 +295,9 @@ def seq_frame_index_to_dataset_index( """ _dataset_seq_frame_n_index = { seq: { - # pyre-ignore[16] self.frame_annots[idx]["frame_annotation"].frame_number: idx for idx in seq_idx } - # pyre-ignore[16] for seq, seq_idx in self._seq_to_idx.items() } @@ -321,7 +320,6 @@ def _get_dataset_idx( # Check that the loaded frame path is consistent # with the one stored in self.frame_annots. assert os.path.normpath( - # pyre-ignore[16] self.frame_annots[idx]["frame_annotation"].image.path ) == os.path.normpath( path @@ -338,9 +336,7 @@ def _get_dataset_idx( valid_dataset_idx = [ [b for b in batch if b is not None] for batch in dataset_idx ] - return [ # pyre-ignore[7] - batch for batch in valid_dataset_idx if len(batch) > 0 - ] + return [batch for batch in valid_dataset_idx if len(batch) > 0] return dataset_idx @@ -373,7 +369,7 @@ def subset_from_frame_index( # Deep copy the whole dataset except frame_annots, which are large so we # deep copy only the requested subset of frame_annots. - memo = {id(self.frame_annots): None} # pyre-ignore[16] + memo = {id(self.frame_annots): None} dataset_new = copy.deepcopy(self, memo) dataset_new.frame_annots = copy.deepcopy( [self.frame_annots[i] for i in valid_dataset_indices] @@ -401,11 +397,9 @@ def subset_from_frame_index( return dataset_new def __str__(self) -> str: - # pyre-ignore[16] return f"JsonIndexDataset #frames={len(self.frame_annots)}" def __len__(self) -> int: - # pyre-ignore[16] return len(self.frame_annots) def _get_frame_type(self, entry: FrameAnnotsEntry) -> Optional[str]: @@ -417,7 +411,6 @@ def get_all_train_cameras(self) -> CamerasBase: """ logger.info("Loading all train cameras.") cameras = [] - # pyre-ignore[16] for frame_idx, frame_annot in enumerate(tqdm(self.frame_annots)): frame_type = self._get_frame_type(frame_annot) if frame_type is None: @@ -427,12 +420,10 @@ def get_all_train_cameras(self) -> CamerasBase: return join_cameras_as_batch(cameras) def __getitem__(self, index) -> FrameData: - # pyre-ignore[16] if index >= len(self.frame_annots): raise IndexError(f"index {index} out of range {len(self.frame_annots)}") entry = self.frame_annots[index]["frame_annotation"] - # pyre-ignore[16] point_cloud = self.seq_annots[entry.sequence_name].point_cloud frame_data = FrameData( frame_number=_safe_as_tensor(entry.frame_number, torch.long), @@ -452,7 +443,9 @@ def __getitem__(self, index) -> FrameData: # Optional field frame_data.frame_type = self._get_frame_type(self.frame_annots[index]) - return self.blob_loader.load(frame_data, entry, self.seq_annots[entry.sequence_name]) + return self.blob_loader.load( + frame_data, entry, self.seq_annots[entry.sequence_name] + ) def _load_frames(self) -> None: logger.info(f"Loading Co3D frames from {self.frame_annotations_file}.") @@ -463,7 +456,6 @@ def _load_frames(self) -> None: ) if not frame_annots_list: raise ValueError("Empty dataset!") - # pyre-ignore[16] self.frame_annots = [ FrameAnnotsEntry(frame_annotation=a, subset=None) for a in frame_annots_list ] @@ -475,7 +467,6 @@ def _load_sequences(self) -> None: seq_annots = types.load_dataclass(zipfile, List[types.SequenceAnnotation]) if not seq_annots: raise ValueError("Empty sequences file!") - # pyre-ignore[16] self.seq_annots = {entry.sequence_name: entry for entry in seq_annots} def _load_subset_lists(self) -> None: @@ -491,7 +482,6 @@ def _load_subset_lists(self) -> None: for subset, frames in subset_to_seq_frame.items() for _, _, path in frames } - # pyre-ignore[16] for frame in self.frame_annots: frame["subset"] = frame_path_to_subset.get( frame["frame_annotation"].image.path, None @@ -504,7 +494,6 @@ def _load_subset_lists(self) -> None: def _sort_frames(self) -> None: # Sort frames to have them grouped by sequence, ordered by timestamp - # pyre-ignore[16] self.frame_annots = sorted( self.frame_annots, key=lambda f: ( @@ -516,7 +505,6 @@ def _sort_frames(self) -> None: def _filter_db(self) -> None: if self.remove_empty_masks: logger.info("Removing images with empty masks.") - # pyre-ignore[16] old_len = len(self.frame_annots) msg = "remove_empty_masks needs every MaskAnnotation.mass to be set." @@ -557,7 +545,6 @@ def positive_mass(frame_annot: types.FrameAnnotation) -> bool: if len(self.limit_category_to) > 0: logger.info(f"Limiting dataset to categories: {self.limit_category_to}") - # pyre-ignore[16] self.seq_annots = { name: entry for name, entry in self.seq_annots.items() @@ -595,7 +582,6 @@ def positive_mass(frame_annot: types.FrameAnnotation) -> bool: if self.n_frames_per_sequence > 0: logger.info(f"Taking max {self.n_frames_per_sequence} per sequence.") keep_idx = [] - # pyre-ignore[16] for seq, seq_indices in self._seq_to_idx.items(): # infer the seed from the sequence name, this is reproducible # and makes the selection differ for different sequences @@ -625,20 +611,14 @@ def _invalidate_indexes(self, filter_seq_annots: bool = False) -> None: self._invalidate_seq_to_idx() if filter_seq_annots: - # pyre-ignore[16] self.seq_annots = { - k: v - for k, v in self.seq_annots.items() - # pyre-ignore[16] - if k in self._seq_to_idx + k: v for k, v in self.seq_annots.items() if k in self._seq_to_idx } def _invalidate_seq_to_idx(self) -> None: seq_to_idx = defaultdict(list) - # pyre-ignore[16] for idx, entry in enumerate(self.frame_annots): seq_to_idx[entry["frame_annotation"].sequence_name].append(idx) - # pyre-ignore[16] self._seq_to_idx = seq_to_idx def _local_path(self, path: str) -> str: @@ -653,7 +633,6 @@ def get_frame_numbers_and_timestamps( for idx in idxs: if ( subset_filter is not None - # pyre-fixme[16]: `JsonIndexDataset` has no attribute `frame_annots`. and self.frame_annots[idx]["subset"] not in subset_filter ): continue @@ -666,7 +645,6 @@ def get_frame_numbers_and_timestamps( def category_to_sequence_names(self) -> Dict[str, List[str]]: c2seq = defaultdict(list) - # pyre-ignore for sequence_name, sa in self.seq_annots.items(): c2seq[sa.category].append(sequence_name) return dict(c2seq) diff --git a/pytorch3d/implicitron/dataset/visualize.py b/pytorch3d/implicitron/dataset/visualize.py index 6d0be0362..284e903a0 100644 --- a/pytorch3d/implicitron/dataset/visualize.py +++ b/pytorch3d/implicitron/dataset/visualize.py @@ -44,7 +44,6 @@ def get_implicitron_sequence_pointcloud( sequence_entries = [ ei for ei in sequence_entries - # pyre-ignore[16] if dataset.frame_annots[ei]["frame_annotation"].sequence_name == sequence_name ] From 24b731b853b54f741a2f9377118e36d14821fa7c Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Mon, 6 Mar 2023 12:47:23 +0000 Subject: [PATCH 12/43] deleted chore function --- pytorch3d/implicitron/dataset/blob_loader.py | 28 -------------------- 1 file changed, 28 deletions(-) diff --git a/pytorch3d/implicitron/dataset/blob_loader.py b/pytorch3d/implicitron/dataset/blob_loader.py index fce26b255..035e99a83 100644 --- a/pytorch3d/implicitron/dataset/blob_loader.py +++ b/pytorch3d/implicitron/dataset/blob_loader.py @@ -434,34 +434,6 @@ def _bbox_xyxy_to_xywh(xyxy: torch.Tensor) -> torch.Tensor: return xywh -def _resize_image( - self, image, mode="bilinear" -) -> Tuple[torch.Tensor, float, torch.Tensor]: - image_height, image_width = self.image_height, self.image_width - if image_height is None or image_width is None: - # skip the resizing - imre_ = torch.from_numpy(image) - return imre_, 1.0, torch.ones_like(imre_[:1]) - # takes numpy array, returns pytorch tensor - minscale = min( - image_height / image.shape[-2], - image_width / image.shape[-1], - ) - imre = torch.nn.functional.interpolate( - torch.from_numpy(image)[None], - scale_factor=minscale, - mode=mode, - align_corners=False if mode == "bilinear" else None, - recompute_scale_factor=True, - )[0] - # pyre-fixme[19]: Expected 1 positional argument. - imre_ = torch.zeros(image.shape[0], self.image_height, self.image_width) - imre_[:, 0 : imre.shape[1], 0 : imre.shape[2]] = imre - mask = torch.zeros(1, self.image_height, self.image_width) - mask[:, 0 : imre.shape[1], 0 : imre.shape[2]] = 1.0 - return imre_, minscale, mask - - def _load_depth(path, scale_adjustment) -> np.ndarray: if not path.lower().endswith(".png"): raise ValueError('unsupported depth file name "%s"' % path) From f484a12501b7d13027fe98707c1be8ece3546153 Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Mon, 6 Mar 2023 12:47:51 +0000 Subject: [PATCH 13/43] BloabLoader tests boilerplate --- tests/implicitron/test_bbox.py | 2 +- tests/implicitron/test_blob_loader.py | 89 +++++++++++++++++++++++++++ 2 files changed, 90 insertions(+), 1 deletion(-) create mode 100644 tests/implicitron/test_blob_loader.py diff --git a/tests/implicitron/test_bbox.py b/tests/implicitron/test_bbox.py index 999dfc924..7d214d857 100644 --- a/tests/implicitron/test_bbox.py +++ b/tests/implicitron/test_bbox.py @@ -9,7 +9,7 @@ import numpy as np import torch -from pytorch3d.implicitron.dataset.json_index_dataset import ( +from pytorch3d.implicitron.dataset.blob_loader import ( _bbox_xywh_to_xyxy, _bbox_xyxy_to_xywh, _get_bbox_from_mask, diff --git a/tests/implicitron/test_blob_loader.py b/tests/implicitron/test_blob_loader.py new file mode 100644 index 000000000..0e6bf6936 --- /dev/null +++ b/tests/implicitron/test_blob_loader.py @@ -0,0 +1,89 @@ +import contextlib +import unittest + +import numpy as np + +import torch +from pytorch3d.implicitron.dataset.blob_loader import ( + _bbox_xywh_to_xyxy, + _bbox_xyxy_to_xywh, + _get_bbox_from_mask, +) +from pytorch3d.implicitron.dataset.json_index_dataset import JsonIndexDataset +from pytorch3d.implicitron.dataset.blob_loader import BlobLoader +from tests.common_testing import TestCaseMixin +from pytorch3d.implicitron.tools.config import expand_args_fields +from pytorch3d.implicitron.tools.config import get_default_args + + +class TestBlobLoader(TestCaseMixin, unittest.TestCase): + def setUp(self): + torch.manual_seed(42) + self.blob_loader = BlobLoader() + + category = "skateboard" + stack = contextlib.ExitStack() + dataset_root, path_manager = stack.enter_context(get_skateboard_data()) + self.addCleanup(stack.close) + frame_file = os.path.join(dataset_root, category, "frame_annotations.jgz") + sequence_file = os.path.join(dataset_root, category, "sequence_annotations.jgz") + self.image_size = 256 + + expand_args_fields(JsonIndexDataset) + + self.datasets = JsonIndexDataset( + frame_annotations_file=frame_file, + sequence_annotations_file=sequence_file, + dataset_root=dataset_root, + image_height=self.image_size, + image_width=self.image_size, + box_crop=True, + load_point_clouds=True, + path_manager=path_manager, + ) + + def test_BlobLoader_args(self): + # test that BlobLoader works with get_default_args + get_default_args(BlobLoader) + + def test_load_crop_fg_probability(self): + pass + + def test_load_crop_images(self): + pass + + def test_load_mask_depth(self): + pass + + def test_fix_point_cloud_path(self): + pass + + def test_resize_image(self): + pass + + def test_crop_around_box(self): + pass + + def test_clamp_box_to_image_bounds_and_round(self): + pass + + def test_get_clamp_bbox(self): + pass + + def test_load_depth(self): + pass + + def test_load_16big_png_depth(self): + pass + + def test_rescale_bbox(self): + pass + + def test_load_1bit_png_mask(self): + pass + + def test_load_depth_mask(self): + pass + + def test_get_1d_bounds(self): + pass From b8674eaa4c6645bcceae089dcc2d12dee730f657 Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Tue, 7 Mar 2023 13:11:45 +0000 Subject: [PATCH 14/43] tests WIP (not tested) --- tests/implicitron/test_bbox.py | 43 +++++++++ tests/implicitron/test_blob_loader.py | 124 +++++++++++++++++++------- 2 files changed, 136 insertions(+), 31 deletions(-) diff --git a/tests/implicitron/test_bbox.py b/tests/implicitron/test_bbox.py index 7d214d857..ddbcd6bd1 100644 --- a/tests/implicitron/test_bbox.py +++ b/tests/implicitron/test_bbox.py @@ -13,6 +13,11 @@ _bbox_xywh_to_xyxy, _bbox_xyxy_to_xywh, _get_bbox_from_mask, + _crop_around_box, + _clamp_box_to_image_bounds_and_round, + _get_clamp_bbox, + _rescale_bbox, + _get_1d_bounds, ) from tests.common_testing import TestCaseMixin @@ -76,3 +81,41 @@ def test_mask_to_bbox(self): expected_bbox_xywh = [2, 1, 2, 1] bbox_xywh = _get_bbox_from_mask(mask, 0.5) self.assertClose(bbox_xywh, expected_bbox_xywh) + + def test_crop_around_box(self): + bbox = (0, 1, 2, 2) # (x_min, y_min, x_max, y_max) + image = torch.LongTensor( + [ + [0, 0, 10, 20], + [10, 20, 5, 1], + [10, 20, 1, 1], + [5, 4, 0, 1], + ] + ) + cropped = _crop_around_box(image, bbox) + self.assertClose(cropped, image[0:2, 1:2]) + + def test_clamp_box_to_image_bounds_and_round(self): + bbox = torch.LongTensor([0, 1, 10, 12]) + image_size = (5, 6) + clamped_bbox = _clamp_box_to_image_bounds_and_round(bbox) + self.assertClose(clamped_bbox == [0, 1, 5, 6]) + + def test_get_clamp_bbox(self): + bbox_xywh = torch.LongTensor([1, 1, 4, 5]) + clamped_bbox_xyxy = _get_clamp_bbox(bbox, box_crop_context=2) + # size multiplied by 2 and added coordinates + self.assertClose(clamped_bbox_xyxy == torch.LongTensor([0, 1, 9, 11])) + + def test_rescale_bbox(self): + bbox = torch.LongTensor([0, 1, 3, 4]) + original_resolution = (4, 4) # + new_resolution = (8, 8) + rescaled_bbox = _rescale_bbox(bbox, original_resolution, new_resolution) + self.assertClose(bbox * 2 == rescaled_bbox) + + def test_get_1d_bounds(self): + array = [0, 1, 2] + bounds = _get_1d_bounds(array) + # make nonzero 1d bounds of image + assert bounds == [1, 2] diff --git a/tests/implicitron/test_blob_loader.py b/tests/implicitron/test_blob_loader.py index 0e6bf6936..da3326421 100644 --- a/tests/implicitron/test_blob_loader.py +++ b/tests/implicitron/test_blob_loader.py @@ -5,9 +5,12 @@ import torch from pytorch3d.implicitron.dataset.blob_loader import ( - _bbox_xywh_to_xyxy, - _bbox_xyxy_to_xywh, - _get_bbox_from_mask, + _load_image, + _load_mask, + _load_depth, + _load_16big_png_depth, + _load_1bit_png_mask, + _load_depth_mask, ) from pytorch3d.implicitron.dataset.json_index_dataset import JsonIndexDataset from pytorch3d.implicitron.dataset.blob_loader import BlobLoader @@ -41,49 +44,108 @@ def setUp(self): load_point_clouds=True, path_manager=path_manager, ) + self.entry = self.datasets.frame_annots[index]["frame_annotation"] def test_BlobLoader_args(self): # test that BlobLoader works with get_default_args get_default_args(BlobLoader) - def test_load_crop_fg_probability(self): - pass - - def test_load_crop_images(self): - pass + def test_load_pipeline(self): + ( + fg_probability, + mask_path, + bbox_xywh, + clamp_bbox_xyxy, + crop_bbox_xywh, + ) = self.datasets.loader._load_crop_fg_probability(entry) + + assert fg_probability + assert mask_path + assert bbox_xywh + assert clamp_bbox_xyxy + assert crop_bbox_xywh + ( + image_rgb, + image_path, + mask_crop, + scale, + ) = self.dataset.loader._load_crop_images( + self.entry, fg_probability, clamp_bbox_xyxy, + ) + assert image_rgb + assert image_path + assert mask_crop, + assert scale, + ( + depth_map, + depth_path, + depth_mask, + ) = self.dataset.loader._load_mask_depth( + self.entry, clamp_bbox_xyxy, fg_probability, + ) + assert depth_map + assert depth_path + assert depth_mask - def test_load_mask_depth(self): - pass + camera = self.dataset.loader._get_pytorch3d_camera( + self.entry, scale, clamp_bbox_xyxy, + ) + assert camera def test_fix_point_cloud_path(self): - pass + """Some files in Co3Dv2 have an accidental absolute path stored.""" + original_path = 'some_file_path' + modified_path = self.dataset.loader._fix_point_cloud_path(original_path) + assert original_path in modified_path + assert self.dataset.loader.dataset_root in modified_path def test_resize_image(self): - pass - - def test_crop_around_box(self): - pass - - def test_clamp_box_to_image_bounds_and_round(self): - pass - - def test_get_clamp_bbox(self): - pass + image = None + image_rgb, scale, mask_crop = self.dataset.loader._resize_image(image) + assert image_rgb.shape == (self.dataset.loader.width, self.dataset.loader.height) + assert scale == 1 + assert masc_crop.shape == (self.dataset.loader.width, self.dataset.loader.height) + + def test_load_image(self): + image = _load_image(self.entry.image.path) + assert image.dtype == np.float32 + assert torch.max(image) <= 1.0 + assert torch.min(image) >= 0.0 + + def test_load_mask(self): + mask = _load_mask(self.entry.mask.path) + assert mask.dtype == np.float32 + assert torch.max(mask) <= 1.0 + assert torch.min(mask) >= 0.0 def test_load_depth(self): - pass + entry_depth = self.entry.depth + # path = os.path.join(self.dataset_root, entry_depth.path) + path = entry_depth.path + depth_map = _load_depth(path, entry_depth.scale_adjustment) + assert depth_map.dtype == np.float32 + assert depth_map.shape def test_load_16big_png_depth(self): - pass - - def test_rescale_bbox(self): - pass + entry_depth = self.entry.depth + # path = os.path.join(self.dataset_root, entry_depth.path) + path = entry_depth.path + depth_map = _load_16big_png_depth(path) + assert depth_map.dtype == np.float32 + assert depth_map.shape def test_load_1bit_png_mask(self): - pass + entry_depth = self.entry.depth + # mask_path = os.path.join(self.dataset_root, entry_depth.mask_path) + mask_path = entry_depth.mask_path + mask = _load_16big_png_depth(mask_path) + assert mask.dtype == np.float32 + assert mask.shape def test_load_depth_mask(self): - pass - - def test_get_1d_bounds(self): - pass + entry_depth = self.entry.depth + # mask_path = os.path.join(self.dataset_root, entry_depth.mask_path) + mask_path = entry_depth.mask_path + mask = _load_depth_mask(mask_path) + assert mask.dtype == np.float32 + assert mask.shape From faeffcf3aa61716640fca15fe25e260fd524e953 Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Thu, 9 Mar 2023 09:58:42 +0000 Subject: [PATCH 15/43] tests typos and errors WIP --- tests/implicitron/test_bbox.py | 12 ++++++------ tests/implicitron/test_blob_loader.py | 7 ++++--- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/tests/implicitron/test_bbox.py b/tests/implicitron/test_bbox.py index ddbcd6bd1..1e351d049 100644 --- a/tests/implicitron/test_bbox.py +++ b/tests/implicitron/test_bbox.py @@ -83,7 +83,7 @@ def test_mask_to_bbox(self): self.assertClose(bbox_xywh, expected_bbox_xywh) def test_crop_around_box(self): - bbox = (0, 1, 2, 2) # (x_min, y_min, x_max, y_max) + bbox = torxh.LongTensor([0, 1, 2, 2]) # (x_min, y_min, x_max, y_max) image = torch.LongTensor( [ [0, 0, 10, 20], @@ -98,24 +98,24 @@ def test_crop_around_box(self): def test_clamp_box_to_image_bounds_and_round(self): bbox = torch.LongTensor([0, 1, 10, 12]) image_size = (5, 6) - clamped_bbox = _clamp_box_to_image_bounds_and_round(bbox) + clamped_bbox = _clamp_box_to_image_bounds_and_round(bbox, image_size) self.assertClose(clamped_bbox == [0, 1, 5, 6]) def test_get_clamp_bbox(self): bbox_xywh = torch.LongTensor([1, 1, 4, 5]) - clamped_bbox_xyxy = _get_clamp_bbox(bbox, box_crop_context=2) + clamped_bbox_xyxy = _get_clamp_bbox(bbox_xywh, box_crop_context=2) # size multiplied by 2 and added coordinates self.assertClose(clamped_bbox_xyxy == torch.LongTensor([0, 1, 9, 11])) def test_rescale_bbox(self): bbox = torch.LongTensor([0, 1, 3, 4]) - original_resolution = (4, 4) # + original_resolution = (4, 4) new_resolution = (8, 8) rescaled_bbox = _rescale_bbox(bbox, original_resolution, new_resolution) - self.assertClose(bbox * 2 == rescaled_bbox) + self.assertClose(bbox * 2, rescaled_bbox) def test_get_1d_bounds(self): array = [0, 1, 2] bounds = _get_1d_bounds(array) # make nonzero 1d bounds of image - assert bounds == [1, 2] + assert bounds == [1, 3] diff --git a/tests/implicitron/test_blob_loader.py b/tests/implicitron/test_blob_loader.py index da3326421..692ecbd62 100644 --- a/tests/implicitron/test_blob_loader.py +++ b/tests/implicitron/test_blob_loader.py @@ -18,11 +18,12 @@ from pytorch3d.implicitron.tools.config import expand_args_fields from pytorch3d.implicitron.tools.config import get_default_args +from tests.implicitron.common_resources import get_skateboard_data + class TestBlobLoader(TestCaseMixin, unittest.TestCase): def setUp(self): torch.manual_seed(42) - self.blob_loader = BlobLoader() category = "skateboard" stack = contextlib.ExitStack() @@ -74,8 +75,8 @@ def test_load_pipeline(self): ) assert image_rgb assert image_path - assert mask_crop, - assert scale, + assert mask_crop + assert scale ( depth_map, depth_path, From bc24e29d7640773e0892288b919b3e1f851ec37d Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Thu, 9 Mar 2023 10:23:19 +0000 Subject: [PATCH 16/43] tests typos and errors WIP --- tests/implicitron/test_bbox.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tests/implicitron/test_bbox.py b/tests/implicitron/test_bbox.py index 1e351d049..5381e709e 100644 --- a/tests/implicitron/test_bbox.py +++ b/tests/implicitron/test_bbox.py @@ -83,7 +83,7 @@ def test_mask_to_bbox(self): self.assertClose(bbox_xywh, expected_bbox_xywh) def test_crop_around_box(self): - bbox = torxh.LongTensor([0, 1, 2, 2]) # (x_min, y_min, x_max, y_max) + bbox = torch.LongTensor([0, 1, 2, 2]) # (x_min, y_min, x_max, y_max) image = torch.LongTensor( [ [0, 0, 10, 20], @@ -95,27 +95,31 @@ def test_crop_around_box(self): cropped = _crop_around_box(image, bbox) self.assertClose(cropped, image[0:2, 1:2]) + + def test_clamp_box_to_image_bounds_and_round(self): bbox = torch.LongTensor([0, 1, 10, 12]) image_size = (5, 6) clamped_bbox = _clamp_box_to_image_bounds_and_round(bbox, image_size) - self.assertClose(clamped_bbox == [0, 1, 5, 6]) + self.assertClose(clamped_bbox, [0, 1, 5, 6]) def test_get_clamp_bbox(self): bbox_xywh = torch.LongTensor([1, 1, 4, 5]) clamped_bbox_xyxy = _get_clamp_bbox(bbox_xywh, box_crop_context=2) # size multiplied by 2 and added coordinates - self.assertClose(clamped_bbox_xyxy == torch.LongTensor([0, 1, 9, 11])) + self.assertClose(clamped_bbox_xyxy, torch.LongTensor([0, 1, 9, 11])) def test_rescale_bbox(self): bbox = torch.LongTensor([0, 1, 3, 4]) original_resolution = (4, 4) new_resolution = (8, 8) rescaled_bbox = _rescale_bbox(bbox, original_resolution, new_resolution) + print(rescaled_bbox) self.assertClose(bbox * 2, rescaled_bbox) def test_get_1d_bounds(self): array = [0, 1, 2] bounds = _get_1d_bounds(array) # make nonzero 1d bounds of image + print(bounds) assert bounds == [1, 3] From e9c59693ed78dfb036db3e056724b88252f6fbe7 Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Thu, 9 Mar 2023 11:03:16 +0000 Subject: [PATCH 17/43] solved error and typos for test_bbox --- tests/implicitron/test_bbox.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/tests/implicitron/test_bbox.py b/tests/implicitron/test_bbox.py index 5381e709e..89b624199 100644 --- a/tests/implicitron/test_bbox.py +++ b/tests/implicitron/test_bbox.py @@ -83,7 +83,7 @@ def test_mask_to_bbox(self): self.assertClose(bbox_xywh, expected_bbox_xywh) def test_crop_around_box(self): - bbox = torch.LongTensor([0, 1, 2, 2]) # (x_min, y_min, x_max, y_max) + bbox = torch.LongTensor([0, 1, 2, 3]) # (x_min, y_min, x_max, y_max) image = torch.LongTensor( [ [0, 0, 10, 20], @@ -93,33 +93,30 @@ def test_crop_around_box(self): ] ) cropped = _crop_around_box(image, bbox) - self.assertClose(cropped, image[0:2, 1:2]) - - + self.assertClose(cropped, image[1:3, 0:2]) def test_clamp_box_to_image_bounds_and_round(self): bbox = torch.LongTensor([0, 1, 10, 12]) image_size = (5, 6) + expected_clamped_bbox = torch.LongTensor([0, 1, image_size[1], image_size[0]]) clamped_bbox = _clamp_box_to_image_bounds_and_round(bbox, image_size) - self.assertClose(clamped_bbox, [0, 1, 5, 6]) + self.assertClose(clamped_bbox, expected_clamped_bbox) def test_get_clamp_bbox(self): bbox_xywh = torch.LongTensor([1, 1, 4, 5]) clamped_bbox_xyxy = _get_clamp_bbox(bbox_xywh, box_crop_context=2) # size multiplied by 2 and added coordinates - self.assertClose(clamped_bbox_xyxy, torch.LongTensor([0, 1, 9, 11])) + self.assertClose(clamped_bbox_xyxy, torch.Tensor([-3, -4, 9, 11])) def test_rescale_bbox(self): - bbox = torch.LongTensor([0, 1, 3, 4]) + bbox = torch.Tensor([0.0, 1.0, 3.0, 4.0]) original_resolution = (4, 4) - new_resolution = (8, 8) + new_resolution = (8, 8) # twice bigger rescaled_bbox = _rescale_bbox(bbox, original_resolution, new_resolution) - print(rescaled_bbox) self.assertClose(bbox * 2, rescaled_bbox) def test_get_1d_bounds(self): array = [0, 1, 2] bounds = _get_1d_bounds(array) # make nonzero 1d bounds of image - print(bounds) - assert bounds == [1, 3] + self.assertClose(bounds, [1, 3]) From 44cfcfb9f243c16f6153617166eb28461705f1cc Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Thu, 9 Mar 2023 13:28:54 +0000 Subject: [PATCH 18/43] updating test_blob_loader WIP --- tests/implicitron/test_blob_loader.py | 78 +++++++++++++++------------ 1 file changed, 43 insertions(+), 35 deletions(-) diff --git a/tests/implicitron/test_blob_loader.py b/tests/implicitron/test_blob_loader.py index 692ecbd62..d54754d88 100644 --- a/tests/implicitron/test_blob_loader.py +++ b/tests/implicitron/test_blob_loader.py @@ -1,3 +1,5 @@ +import os +import math import contextlib import unittest @@ -14,6 +16,7 @@ ) from pytorch3d.implicitron.dataset.json_index_dataset import JsonIndexDataset from pytorch3d.implicitron.dataset.blob_loader import BlobLoader +from pytorch3d.renderer.cameras import PerspectiveCameras from tests.common_testing import TestCaseMixin from pytorch3d.implicitron.tools.config import expand_args_fields from pytorch3d.implicitron.tools.config import get_default_args @@ -27,7 +30,7 @@ def setUp(self): category = "skateboard" stack = contextlib.ExitStack() - dataset_root, path_manager = stack.enter_context(get_skateboard_data()) + self.dataset_root, self.path_manager = stack.enter_context(get_skateboard_data()) self.addCleanup(stack.close) frame_file = os.path.join(dataset_root, category, "frame_annotations.jgz") sequence_file = os.path.join(dataset_root, category, "sequence_annotations.jgz") @@ -35,17 +38,18 @@ def setUp(self): expand_args_fields(JsonIndexDataset) - self.datasets = JsonIndexDataset( + self.dataset = JsonIndexDataset( frame_annotations_file=frame_file, sequence_annotations_file=sequence_file, - dataset_root=dataset_root, + dataset_root=self.dataset_root, image_height=self.image_size, image_width=self.image_size, box_crop=True, load_point_clouds=True, - path_manager=path_manager, + path_manager=self.path_manager, ) - self.entry = self.datasets.frame_annots[index]["frame_annotation"] + index = 7000 + self.entry = self.dataset.frame_annots[index]["frame_annotation"] def test_BlobLoader_args(self): # test that BlobLoader works with get_default_args @@ -58,57 +62,66 @@ def test_load_pipeline(self): bbox_xywh, clamp_bbox_xyxy, crop_bbox_xywh, - ) = self.datasets.loader._load_crop_fg_probability(entry) + ) = self.dataset.blob_loader._load_crop_fg_probability(self.entry) - assert fg_probability + assert torch.is_tensor(fg_probability) assert mask_path assert bbox_xywh - assert clamp_bbox_xyxy - assert crop_bbox_xywh + assert torch.is_tensor(clamp_bbox_xyxy) + assert torch.is_tensor(crop_bbox_xywh) ( image_rgb, image_path, mask_crop, scale, - ) = self.dataset.loader._load_crop_images( + ) = self.dataset.blob_loader._load_crop_images( self.entry, fg_probability, clamp_bbox_xyxy, ) - assert image_rgb + assert torch.is_tensor(image_rgb) assert image_path - assert mask_crop + assert torch.is_tensor(mask_crop) assert scale ( depth_map, depth_path, depth_mask, - ) = self.dataset.loader._load_mask_depth( + ) = self.dataset.blob_loader._load_mask_depth( self.entry, clamp_bbox_xyxy, fg_probability, ) - assert depth_map - assert depth_path - assert depth_mask + assert torch.is_tensor(depth_map) + assert torch.is_tensor(depth_path) + assert torch.is_tensor(depth_mask) - camera = self.dataset.loader._get_pytorch3d_camera( + camera = self.dataset.blob_loader._get_pytorch3d_camera( self.entry, scale, clamp_bbox_xyxy, ) - assert camera + assert type(camera) == PerspectiveCameras def test_fix_point_cloud_path(self): """Some files in Co3Dv2 have an accidental absolute path stored.""" original_path = 'some_file_path' - modified_path = self.dataset.loader._fix_point_cloud_path(original_path) + modified_path = self.dataset.blob_loader._fix_point_cloud_path(original_path) assert original_path in modified_path - assert self.dataset.loader.dataset_root in modified_path + assert self.dataset.blob_loader.dataset_root in modified_path def test_resize_image(self): - image = None - image_rgb, scale, mask_crop = self.dataset.loader._resize_image(image) - assert image_rgb.shape == (self.dataset.loader.width, self.dataset.loader.height) - assert scale == 1 - assert masc_crop.shape == (self.dataset.loader.width, self.dataset.loader.height) + path = os.path.join(self.dataset_root, self.entry.image.path) + local_path = self.path_manager.get_local_path(path) + image = _load_image(local_path) + image_rgb, scale, mask_crop = self.dataset.blob_loader._resize_image(image) + + original_shape = image.shape[-2:] + expected_shape = (self.dataset.blob_loader.image_width, self.dataset.blob_loader.image_height) + expected_scale = expected_shape[0] / original_shape[0] + + assert scale == expected_scale + assert image_rgb.shape[-2:] == expected_shape + assert mask_crop.shape[-2:] == expected_shape def test_load_image(self): - image = _load_image(self.entry.image.path) + path = os.path.join(self.dataset_root, self.entry.image.path) + local_path = self.path_manager.get_local_path(path) + image = _load_image(local_path) assert image.dtype == np.float32 assert torch.max(image) <= 1.0 assert torch.min(image) >= 0.0 @@ -120,32 +133,27 @@ def test_load_mask(self): assert torch.min(mask) >= 0.0 def test_load_depth(self): - entry_depth = self.entry.depth - # path = os.path.join(self.dataset_root, entry_depth.path) + path = os.path.join(self.dataset_root, entry_depth.path) path = entry_depth.path depth_map = _load_depth(path, entry_depth.scale_adjustment) assert depth_map.dtype == np.float32 assert depth_map.shape def test_load_16big_png_depth(self): - entry_depth = self.entry.depth - # path = os.path.join(self.dataset_root, entry_depth.path) - path = entry_depth.path + path = os.path.join(self.dataset_root, self.entry.depth.path) depth_map = _load_16big_png_depth(path) assert depth_map.dtype == np.float32 assert depth_map.shape def test_load_1bit_png_mask(self): - entry_depth = self.entry.depth - # mask_path = os.path.join(self.dataset_root, entry_depth.mask_path) + mask_path = os.path.join(self.dataset_root, self.entry.depth.mask_path) mask_path = entry_depth.mask_path mask = _load_16big_png_depth(mask_path) assert mask.dtype == np.float32 assert mask.shape def test_load_depth_mask(self): - entry_depth = self.entry.depth - # mask_path = os.path.join(self.dataset_root, entry_depth.mask_path) + mask_path = os.path.join(self.dataset_root, self.entry.depth.mask_path) mask_path = entry_depth.mask_path mask = _load_depth_mask(mask_path) assert mask.dtype == np.float32 From 11def0a8b452a1479d63fe9ba665f2adc6687553 Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Thu, 9 Mar 2023 14:50:11 +0000 Subject: [PATCH 19/43] blob loader tests ready for review --- tests/implicitron/test_bbox.py | 9 +- tests/implicitron/test_blob_loader.py | 119 ++++++++++++++++---------- 2 files changed, 81 insertions(+), 47 deletions(-) diff --git a/tests/implicitron/test_bbox.py b/tests/implicitron/test_bbox.py index 89b624199..8dffd751d 100644 --- a/tests/implicitron/test_bbox.py +++ b/tests/implicitron/test_bbox.py @@ -12,13 +12,14 @@ from pytorch3d.implicitron.dataset.blob_loader import ( _bbox_xywh_to_xyxy, _bbox_xyxy_to_xywh, - _get_bbox_from_mask, - _crop_around_box, _clamp_box_to_image_bounds_and_round, + _crop_around_box, + _get_1d_bounds, + _get_bbox_from_mask, _get_clamp_bbox, _rescale_bbox, - _get_1d_bounds, ) + from tests.common_testing import TestCaseMixin @@ -83,7 +84,7 @@ def test_mask_to_bbox(self): self.assertClose(bbox_xywh, expected_bbox_xywh) def test_crop_around_box(self): - bbox = torch.LongTensor([0, 1, 2, 3]) # (x_min, y_min, x_max, y_max) + bbox = torch.LongTensor([0, 1, 2, 3]) # (x_min, y_min, x_max, y_max) image = torch.LongTensor( [ [0, 0, 10, 20], diff --git a/tests/implicitron/test_blob_loader.py b/tests/implicitron/test_blob_loader.py index d54754d88..461b2109c 100644 --- a/tests/implicitron/test_blob_loader.py +++ b/tests/implicitron/test_blob_loader.py @@ -1,25 +1,24 @@ -import os -import math import contextlib +import os import unittest import numpy as np import torch from pytorch3d.implicitron.dataset.blob_loader import ( - _load_image, - _load_mask, - _load_depth, _load_16big_png_depth, _load_1bit_png_mask, + _load_depth, _load_depth_mask, + _load_image, + _load_mask, + BlobLoader, ) from pytorch3d.implicitron.dataset.json_index_dataset import JsonIndexDataset -from pytorch3d.implicitron.dataset.blob_loader import BlobLoader +from pytorch3d.implicitron.tools.config import expand_args_fields, get_default_args from pytorch3d.renderer.cameras import PerspectiveCameras + from tests.common_testing import TestCaseMixin -from pytorch3d.implicitron.tools.config import expand_args_fields -from pytorch3d.implicitron.tools.config import get_default_args from tests.implicitron.common_resources import get_skateboard_data @@ -30,23 +29,28 @@ def setUp(self): category = "skateboard" stack = contextlib.ExitStack() - self.dataset_root, self.path_manager = stack.enter_context(get_skateboard_data()) + self.dataset_root, self.path_manager = stack.enter_context( + get_skateboard_data() + ) self.addCleanup(stack.close) - frame_file = os.path.join(dataset_root, category, "frame_annotations.jgz") - sequence_file = os.path.join(dataset_root, category, "sequence_annotations.jgz") - self.image_size = 256 + frame_file = os.path.join(self.dataset_root, category, "frame_annotations.jgz") + sequence_file = os.path.join( + self.dataset_root, category, "sequence_annotations.jgz" + ) + self.image_height = 768 + self.image_width = 512 expand_args_fields(JsonIndexDataset) self.dataset = JsonIndexDataset( - frame_annotations_file=frame_file, - sequence_annotations_file=sequence_file, - dataset_root=self.dataset_root, - image_height=self.image_size, - image_width=self.image_size, - box_crop=True, - load_point_clouds=True, - path_manager=self.path_manager, + frame_annotations_file=frame_file, + sequence_annotations_file=sequence_file, + dataset_root=self.dataset_root, + image_height=self.image_height, + image_width=self.image_width, + box_crop=True, + load_point_clouds=True, + path_manager=self.path_manager, ) index = 7000 self.entry = self.dataset.frame_annots[index]["frame_annotation"] @@ -64,42 +68,68 @@ def test_load_pipeline(self): crop_bbox_xywh, ) = self.dataset.blob_loader._load_crop_fg_probability(self.entry) - assert torch.is_tensor(fg_probability) assert mask_path - assert bbox_xywh + assert torch.is_tensor(fg_probability) + assert torch.is_tensor(bbox_xywh) assert torch.is_tensor(clamp_bbox_xyxy) assert torch.is_tensor(crop_bbox_xywh) + # assert bboxes shape + assert fg_probability.shape == torch.Shape( + [1, self.image_height, self.image_width] + ) + assert bbox_xywh.shape == torch.Shape([4]) + assert clamp_bbox_xyxy == torch.Shape([4]) + assert crop_bbox_xywh.shape == torch.Shape([4]) ( image_rgb, image_path, mask_crop, scale, ) = self.dataset.blob_loader._load_crop_images( - self.entry, fg_probability, clamp_bbox_xyxy, + self.entry, + fg_probability, + clamp_bbox_xyxy, ) assert torch.is_tensor(image_rgb) assert image_path assert torch.is_tensor(mask_crop) assert scale + # assert image and mask shapes + assert image_rgb.shape == torch.Shape([3, self.image_height, self.image_width]) + assert mask_crop.shape == torch.Shape( + [1, self.image_height, self.image_width], + ) + ( depth_map, depth_path, depth_mask, ) = self.dataset.blob_loader._load_mask_depth( - self.entry, clamp_bbox_xyxy, fg_probability, + self.entry, + clamp_bbox_xyxy, + fg_probability, ) assert torch.is_tensor(depth_map) - assert torch.is_tensor(depth_path) + assert depth_path assert torch.is_tensor(depth_mask) + # assert image and mask shapes + assert depth_map.shape == torch.Shape( + [1, self.image_height, self.image_width], + ) + assert depth_mask.shape == torch.Shape( + [1, self.image_height, self.image_width], + ) camera = self.dataset.blob_loader._get_pytorch3d_camera( - self.entry, scale, clamp_bbox_xyxy, - ) + self.entry, + scale, + clamp_bbox_xyxy, + ) assert type(camera) == PerspectiveCameras def test_fix_point_cloud_path(self): """Some files in Co3Dv2 have an accidental absolute path stored.""" - original_path = 'some_file_path' + original_path = "some_file_path" modified_path = self.dataset.blob_loader._fix_point_cloud_path(original_path) assert original_path in modified_path assert self.dataset.blob_loader.dataset_root in modified_path @@ -111,8 +141,13 @@ def test_resize_image(self): image_rgb, scale, mask_crop = self.dataset.blob_loader._resize_image(image) original_shape = image.shape[-2:] - expected_shape = (self.dataset.blob_loader.image_width, self.dataset.blob_loader.image_height) - expected_scale = expected_shape[0] / original_shape[0] + expected_shape = ( + self.image_height, + self.image_width, + ) + expected_scale = min( + expected_shape[0] / original_shape[0], expected_shape[1] / original_shape[1] + ) assert scale == expected_scale assert image_rgb.shape[-2:] == expected_shape @@ -123,19 +158,19 @@ def test_load_image(self): local_path = self.path_manager.get_local_path(path) image = _load_image(local_path) assert image.dtype == np.float32 - assert torch.max(image) <= 1.0 - assert torch.min(image) >= 0.0 + assert np.max(image) <= 1.0 + assert np.min(image) >= 0.0 def test_load_mask(self): - mask = _load_mask(self.entry.mask.path) + path = os.path.join(self.dataset_root, self.entry.mask.path) + mask = _load_mask(path) assert mask.dtype == np.float32 - assert torch.max(mask) <= 1.0 - assert torch.min(mask) >= 0.0 + assert np.max(mask) <= 1.0 + assert np.min(mask) >= 0.0 def test_load_depth(self): - path = os.path.join(self.dataset_root, entry_depth.path) - path = entry_depth.path - depth_map = _load_depth(path, entry_depth.scale_adjustment) + path = os.path.join(self.dataset_root, self.entry.depth.path) + depth_map = _load_depth(path, self.entry.depth.scale_adjustment) assert depth_map.dtype == np.float32 assert depth_map.shape @@ -147,14 +182,12 @@ def test_load_16big_png_depth(self): def test_load_1bit_png_mask(self): mask_path = os.path.join(self.dataset_root, self.entry.depth.mask_path) - mask_path = entry_depth.mask_path - mask = _load_16big_png_depth(mask_path) + mask = _load_1bit_png_mask(mask_path) assert mask.dtype == np.float32 - assert mask.shape + assert len(mask.shape) == 3 def test_load_depth_mask(self): mask_path = os.path.join(self.dataset_root, self.entry.depth.mask_path) - mask_path = entry_depth.mask_path mask = _load_depth_mask(mask_path) assert mask.dtype == np.float32 - assert mask.shape + assert len(mask.shape) == 3 From bc52382a7991c69107645c0a91e5ea6dd7511f25 Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Thu, 9 Mar 2023 14:54:25 +0000 Subject: [PATCH 20/43] typo --- tests/implicitron/test_blob_loader.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/implicitron/test_blob_loader.py b/tests/implicitron/test_blob_loader.py index 461b2109c..cb2976011 100644 --- a/tests/implicitron/test_blob_loader.py +++ b/tests/implicitron/test_blob_loader.py @@ -74,12 +74,12 @@ def test_load_pipeline(self): assert torch.is_tensor(clamp_bbox_xyxy) assert torch.is_tensor(crop_bbox_xywh) # assert bboxes shape - assert fg_probability.shape == torch.Shape( + assert fg_probability.shape == torch.Size( [1, self.image_height, self.image_width] ) - assert bbox_xywh.shape == torch.Shape([4]) - assert clamp_bbox_xyxy == torch.Shape([4]) - assert crop_bbox_xywh.shape == torch.Shape([4]) + assert bbox_xywh.shape == torch.Size([4]) + assert clamp_bbox_xyxy == torch.Size([4]) + assert crop_bbox_xywh.shape == torch.Size([4]) ( image_rgb, image_path, @@ -95,8 +95,8 @@ def test_load_pipeline(self): assert torch.is_tensor(mask_crop) assert scale # assert image and mask shapes - assert image_rgb.shape == torch.Shape([3, self.image_height, self.image_width]) - assert mask_crop.shape == torch.Shape( + assert image_rgb.shape == torch.Size([3, self.image_height, self.image_width]) + assert mask_crop.shape == torch.Size( [1, self.image_height, self.image_width], ) @@ -113,10 +113,10 @@ def test_load_pipeline(self): assert depth_path assert torch.is_tensor(depth_mask) # assert image and mask shapes - assert depth_map.shape == torch.Shape( + assert depth_map.shape == torch.Size( [1, self.image_height, self.image_width], ) - assert depth_mask.shape == torch.Shape( + assert depth_mask.shape == torch.Size( [1, self.image_height, self.image_width], ) From 01493775ea0d2c55069fc6348ce80aaa56cbf104 Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Thu, 9 Mar 2023 14:58:43 +0000 Subject: [PATCH 21/43] typo --- tests/implicitron/test_blob_loader.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/implicitron/test_blob_loader.py b/tests/implicitron/test_blob_loader.py index cb2976011..5f694b897 100644 --- a/tests/implicitron/test_blob_loader.py +++ b/tests/implicitron/test_blob_loader.py @@ -78,7 +78,7 @@ def test_load_pipeline(self): [1, self.image_height, self.image_width] ) assert bbox_xywh.shape == torch.Size([4]) - assert clamp_bbox_xyxy == torch.Size([4]) + assert clamp_bbox_xyxy.shape == torch.Size([4]) assert crop_bbox_xywh.shape == torch.Size([4]) ( image_rgb, @@ -184,7 +184,7 @@ def test_load_1bit_png_mask(self): mask_path = os.path.join(self.dataset_root, self.entry.depth.mask_path) mask = _load_1bit_png_mask(mask_path) assert mask.dtype == np.float32 - assert len(mask.shape) == 3 + assert len(mask.shape) == 2 def test_load_depth_mask(self): mask_path = os.path.join(self.dataset_root, self.entry.depth.mask_path) From 3bcbd018cd04941a1541d58c724cd266803ae768 Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Thu, 9 Mar 2023 15:23:39 +0000 Subject: [PATCH 22/43] linter --- tests/implicitron/test_blob_loader.py | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/tests/implicitron/test_blob_loader.py b/tests/implicitron/test_blob_loader.py index 5f694b897..96d8fac60 100644 --- a/tests/implicitron/test_blob_loader.py +++ b/tests/implicitron/test_blob_loader.py @@ -96,9 +96,7 @@ def test_load_pipeline(self): assert scale # assert image and mask shapes assert image_rgb.shape == torch.Size([3, self.image_height, self.image_width]) - assert mask_crop.shape == torch.Size( - [1, self.image_height, self.image_width], - ) + assert mask_crop.shape == torch.Size([1, self.image_height, self.image_width]) ( depth_map, @@ -113,12 +111,8 @@ def test_load_pipeline(self): assert depth_path assert torch.is_tensor(depth_mask) # assert image and mask shapes - assert depth_map.shape == torch.Size( - [1, self.image_height, self.image_width], - ) - assert depth_mask.shape == torch.Size( - [1, self.image_height, self.image_width], - ) + assert depth_map.shape == torch.Size([1, self.image_height, self.image_width]) + assert depth_mask.shape == torch.Size([1, self.image_height, self.image_width]) camera = self.dataset.blob_loader._get_pytorch3d_camera( self.entry, @@ -178,7 +172,7 @@ def test_load_16big_png_depth(self): path = os.path.join(self.dataset_root, self.entry.depth.path) depth_map = _load_16big_png_depth(path) assert depth_map.dtype == np.float32 - assert depth_map.shape + assert len(depth_map.shape) == 2 def test_load_1bit_png_mask(self): mask_path = os.path.join(self.dataset_root, self.entry.depth.mask_path) From 269cffa9ca8327cabb90104407df1c285e3405e3 Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Thu, 9 Mar 2023 15:35:02 +0000 Subject: [PATCH 23/43] all entry tests run thru all frames --- tests/implicitron/test_blob_loader.py | 68 +++++++++++++++------------ 1 file changed, 39 insertions(+), 29 deletions(-) diff --git a/tests/implicitron/test_blob_loader.py b/tests/implicitron/test_blob_loader.py index 96d8fac60..619586f04 100644 --- a/tests/implicitron/test_blob_loader.py +++ b/tests/implicitron/test_blob_loader.py @@ -52,21 +52,38 @@ def setUp(self): load_point_clouds=True, path_manager=self.path_manager, ) - index = 7000 - self.entry = self.dataset.frame_annots[index]["frame_annotation"] def test_BlobLoader_args(self): # test that BlobLoader works with get_default_args get_default_args(BlobLoader) - def test_load_pipeline(self): + def test_fix_point_cloud_path(self): + """Some files in Co3Dv2 have an accidental absolute path stored.""" + original_path = "some_file_path" + modified_path = self.dataset.blob_loader._fix_point_cloud_path(original_path) + assert original_path in modified_path + assert self.dataset.blob_loader.dataset_root in modified_path + + def test_entry_loading_functions(self): + for index in range(len(self.dataset.frame_annots)): + entry = self.dataset.frame_annots[index]["frame_annotation"] + self.load_test(entry) + self._resize_image_test(entry) + self._load_image_test(entry) + self._load_mask_test(entry) + self._load_depth_test(entry) + self._load_16big_png_depth_test(entry) + self._load_1bit_png_mask_test(entry) + self._load_depth_mask_test(entry) + + def load_test(self, entry): ( fg_probability, mask_path, bbox_xywh, clamp_bbox_xyxy, crop_bbox_xywh, - ) = self.dataset.blob_loader._load_crop_fg_probability(self.entry) + ) = self.dataset.blob_loader._load_crop_fg_probability(entry) assert mask_path assert torch.is_tensor(fg_probability) @@ -86,7 +103,7 @@ def test_load_pipeline(self): mask_crop, scale, ) = self.dataset.blob_loader._load_crop_images( - self.entry, + entry, fg_probability, clamp_bbox_xyxy, ) @@ -103,7 +120,7 @@ def test_load_pipeline(self): depth_path, depth_mask, ) = self.dataset.blob_loader._load_mask_depth( - self.entry, + entry, clamp_bbox_xyxy, fg_probability, ) @@ -115,21 +132,14 @@ def test_load_pipeline(self): assert depth_mask.shape == torch.Size([1, self.image_height, self.image_width]) camera = self.dataset.blob_loader._get_pytorch3d_camera( - self.entry, + entry, scale, clamp_bbox_xyxy, ) assert type(camera) == PerspectiveCameras - def test_fix_point_cloud_path(self): - """Some files in Co3Dv2 have an accidental absolute path stored.""" - original_path = "some_file_path" - modified_path = self.dataset.blob_loader._fix_point_cloud_path(original_path) - assert original_path in modified_path - assert self.dataset.blob_loader.dataset_root in modified_path - - def test_resize_image(self): - path = os.path.join(self.dataset_root, self.entry.image.path) + def _resize_image_test(self, entry): + path = os.path.join(self.dataset_root, entry.image.path) local_path = self.path_manager.get_local_path(path) image = _load_image(local_path) image_rgb, scale, mask_crop = self.dataset.blob_loader._resize_image(image) @@ -147,41 +157,41 @@ def test_resize_image(self): assert image_rgb.shape[-2:] == expected_shape assert mask_crop.shape[-2:] == expected_shape - def test_load_image(self): - path = os.path.join(self.dataset_root, self.entry.image.path) + def _load_image_test(self, entry): + path = os.path.join(self.dataset_root, entry.image.path) local_path = self.path_manager.get_local_path(path) image = _load_image(local_path) assert image.dtype == np.float32 assert np.max(image) <= 1.0 assert np.min(image) >= 0.0 - def test_load_mask(self): - path = os.path.join(self.dataset_root, self.entry.mask.path) + def _load_mask_test(self, entry): + path = os.path.join(self.dataset_root, entry.mask.path) mask = _load_mask(path) assert mask.dtype == np.float32 assert np.max(mask) <= 1.0 assert np.min(mask) >= 0.0 - def test_load_depth(self): - path = os.path.join(self.dataset_root, self.entry.depth.path) - depth_map = _load_depth(path, self.entry.depth.scale_adjustment) + def _load_depth_test(self, entry): + path = os.path.join(self.dataset_root, entry.depth.path) + depth_map = _load_depth(path, entry.depth.scale_adjustment) assert depth_map.dtype == np.float32 assert depth_map.shape - def test_load_16big_png_depth(self): - path = os.path.join(self.dataset_root, self.entry.depth.path) + def _load_16big_png_depth_test(self, entry): + path = os.path.join(self.dataset_root, entry.depth.path) depth_map = _load_16big_png_depth(path) assert depth_map.dtype == np.float32 assert len(depth_map.shape) == 2 - def test_load_1bit_png_mask(self): - mask_path = os.path.join(self.dataset_root, self.entry.depth.mask_path) + def _load_1bit_png_mask_test(self, entry): + mask_path = os.path.join(self.dataset_root, entry.depth.mask_path) mask = _load_1bit_png_mask(mask_path) assert mask.dtype == np.float32 assert len(mask.shape) == 2 - def test_load_depth_mask(self): - mask_path = os.path.join(self.dataset_root, self.entry.depth.mask_path) + def _load_depth_mask_test(self, entry): + mask_path = os.path.join(self.dataset_root, entry.depth.mask_path) mask = _load_depth_mask(mask_path) assert mask.dtype == np.float32 assert len(mask.shape) == 3 From f930d71488ed978b7ca71525567e4f94ed721fc6 Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Fri, 10 Mar 2023 09:38:08 +0000 Subject: [PATCH 24/43] assert .. == .. to self.assertEqual(.., ..) --- tests/implicitron/test_blob_loader.py | 46 +++++++++++++-------------- 1 file changed, 22 insertions(+), 24 deletions(-) diff --git a/tests/implicitron/test_blob_loader.py b/tests/implicitron/test_blob_loader.py index 619586f04..059244c67 100644 --- a/tests/implicitron/test_blob_loader.py +++ b/tests/implicitron/test_blob_loader.py @@ -91,12 +91,10 @@ def load_test(self, entry): assert torch.is_tensor(clamp_bbox_xyxy) assert torch.is_tensor(crop_bbox_xywh) # assert bboxes shape - assert fg_probability.shape == torch.Size( - [1, self.image_height, self.image_width] - ) - assert bbox_xywh.shape == torch.Size([4]) - assert clamp_bbox_xyxy.shape == torch.Size([4]) - assert crop_bbox_xywh.shape == torch.Size([4]) + self.assertEqual(fg_probability.shape, torch.Size([1, self.image_height, self.image_width])) + self.assertEqual(bbox_xywh.shape, torch.Size([4])) + self.assertEqual(clamp_bbox_xyxy.shape, torch.Size([4])) + self.assertEqual(crop_bbox_xywh.shape, torch.Size([4])) ( image_rgb, image_path, @@ -112,8 +110,8 @@ def load_test(self, entry): assert torch.is_tensor(mask_crop) assert scale # assert image and mask shapes - assert image_rgb.shape == torch.Size([3, self.image_height, self.image_width]) - assert mask_crop.shape == torch.Size([1, self.image_height, self.image_width]) + self.assertEqual(image_rgb.shape, torch.Size([3, self.image_height, self.image_width])) + self.assertEqual(mask_crop.shape, torch.Size([1, self.image_height, self.image_width]) ( depth_map, @@ -128,15 +126,15 @@ def load_test(self, entry): assert depth_path assert torch.is_tensor(depth_mask) # assert image and mask shapes - assert depth_map.shape == torch.Size([1, self.image_height, self.image_width]) - assert depth_mask.shape == torch.Size([1, self.image_height, self.image_width]) + self.assertEqual(depth_map.shape, torch.Size([1, self.image_height, self.image_width])) + self.assertEqual(depth_mask.shape, torch.Size([1, self.image_height, self.image_width])) camera = self.dataset.blob_loader._get_pytorch3d_camera( entry, scale, clamp_bbox_xyxy, ) - assert type(camera) == PerspectiveCameras + self.assertEqual(type(camera), PerspectiveCameras) def _resize_image_test(self, entry): path = os.path.join(self.dataset_root, entry.image.path) @@ -153,45 +151,45 @@ def _resize_image_test(self, entry): expected_shape[0] / original_shape[0], expected_shape[1] / original_shape[1] ) - assert scale == expected_scale - assert image_rgb.shape[-2:] == expected_shape - assert mask_crop.shape[-2:] == expected_shape + self.assertEqual(scale, expected_scale) + self.assertEqual(image_rgb.shape[-2:], expected_shape) + self.assertEqual(mask_crop.shape[-2:], expected_shape) def _load_image_test(self, entry): path = os.path.join(self.dataset_root, entry.image.path) local_path = self.path_manager.get_local_path(path) image = _load_image(local_path) - assert image.dtype == np.float32 + self.assertEqual(image.dtype, np.float32) assert np.max(image) <= 1.0 assert np.min(image) >= 0.0 def _load_mask_test(self, entry): path = os.path.join(self.dataset_root, entry.mask.path) mask = _load_mask(path) - assert mask.dtype == np.float32 + self.assertEqual(mask.dtype, np.float32) assert np.max(mask) <= 1.0 assert np.min(mask) >= 0.0 def _load_depth_test(self, entry): path = os.path.join(self.dataset_root, entry.depth.path) depth_map = _load_depth(path, entry.depth.scale_adjustment) - assert depth_map.dtype == np.float32 - assert depth_map.shape + self.assertEqual(depth_map.dtype, np.float32) + self.assertEqual(len(depth_map.shape), 2) def _load_16big_png_depth_test(self, entry): path = os.path.join(self.dataset_root, entry.depth.path) depth_map = _load_16big_png_depth(path) - assert depth_map.dtype == np.float32 - assert len(depth_map.shape) == 2 + self.assertEqual(depth_map.dtype, np.float32) + self.assertEqual(len(depth_map.shape), 2) def _load_1bit_png_mask_test(self, entry): mask_path = os.path.join(self.dataset_root, entry.depth.mask_path) mask = _load_1bit_png_mask(mask_path) - assert mask.dtype == np.float32 - assert len(mask.shape) == 2 + self.assertEqual(mask.dtype, np.float32) + self.assertEqual(len(mask.shape), 2) def _load_depth_mask_test(self, entry): mask_path = os.path.join(self.dataset_root, entry.depth.mask_path) mask = _load_depth_mask(mask_path) - assert mask.dtype == np.float32 - assert len(mask.shape) == 3 + self.assertEqual(mask.dtype, np.float32) + self.assertEqual(len(mask.shape), 3) From dc7a70280eed0715ddf8ab04267f883e7a4de8e4 Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Fri, 10 Mar 2023 09:42:24 +0000 Subject: [PATCH 25/43] testing only on 1 frame --- tests/implicitron/test_blob_loader.py | 63 ++++++++++----------------- 1 file changed, 23 insertions(+), 40 deletions(-) diff --git a/tests/implicitron/test_blob_loader.py b/tests/implicitron/test_blob_loader.py index 059244c67..a03e91537 100644 --- a/tests/implicitron/test_blob_loader.py +++ b/tests/implicitron/test_blob_loader.py @@ -52,6 +52,8 @@ def setUp(self): load_point_clouds=True, path_manager=self.path_manager, ) + index = 7000 + self.entry = self.dataset.frame_annots[index]["frame_annotation"] def test_BlobLoader_args(self): # test that BlobLoader works with get_default_args @@ -64,26 +66,14 @@ def test_fix_point_cloud_path(self): assert original_path in modified_path assert self.dataset.blob_loader.dataset_root in modified_path - def test_entry_loading_functions(self): - for index in range(len(self.dataset.frame_annots)): - entry = self.dataset.frame_annots[index]["frame_annotation"] - self.load_test(entry) - self._resize_image_test(entry) - self._load_image_test(entry) - self._load_mask_test(entry) - self._load_depth_test(entry) - self._load_16big_png_depth_test(entry) - self._load_1bit_png_mask_test(entry) - self._load_depth_mask_test(entry) - - def load_test(self, entry): + def test_load(self): ( fg_probability, mask_path, bbox_xywh, clamp_bbox_xyxy, crop_bbox_xywh, - ) = self.dataset.blob_loader._load_crop_fg_probability(entry) + ) = self.dataset.blob_loader._load_crop_fg_probability(self.entry) assert mask_path assert torch.is_tensor(fg_probability) @@ -96,15 +86,8 @@ def load_test(self, entry): self.assertEqual(clamp_bbox_xyxy.shape, torch.Size([4])) self.assertEqual(crop_bbox_xywh.shape, torch.Size([4])) ( - image_rgb, - image_path, - mask_crop, - scale, - ) = self.dataset.blob_loader._load_crop_images( - entry, - fg_probability, - clamp_bbox_xyxy, - ) + image_rgb, image_path, mask_crop, scale, + ) = self.dataset.blob_loader._load_crop_images(self.entry, fg_probability, clamp_bbox_xyxy) assert torch.is_tensor(image_rgb) assert image_path assert torch.is_tensor(mask_crop) @@ -118,7 +101,7 @@ def load_test(self, entry): depth_path, depth_mask, ) = self.dataset.blob_loader._load_mask_depth( - entry, + self.entry, clamp_bbox_xyxy, fg_probability, ) @@ -130,14 +113,14 @@ def load_test(self, entry): self.assertEqual(depth_mask.shape, torch.Size([1, self.image_height, self.image_width])) camera = self.dataset.blob_loader._get_pytorch3d_camera( - entry, + self.entry, scale, clamp_bbox_xyxy, ) self.assertEqual(type(camera), PerspectiveCameras) - def _resize_image_test(self, entry): - path = os.path.join(self.dataset_root, entry.image.path) + def test_resize_image(self): + path = os.path.join(self.dataset_root, self.entry.image.path) local_path = self.path_manager.get_local_path(path) image = _load_image(local_path) image_rgb, scale, mask_crop = self.dataset.blob_loader._resize_image(image) @@ -155,41 +138,41 @@ def _resize_image_test(self, entry): self.assertEqual(image_rgb.shape[-2:], expected_shape) self.assertEqual(mask_crop.shape[-2:], expected_shape) - def _load_image_test(self, entry): - path = os.path.join(self.dataset_root, entry.image.path) + def test_load_image(self): + path = os.path.join(self.dataset_root, self.entry.image.path) local_path = self.path_manager.get_local_path(path) image = _load_image(local_path) self.assertEqual(image.dtype, np.float32) assert np.max(image) <= 1.0 assert np.min(image) >= 0.0 - def _load_mask_test(self, entry): - path = os.path.join(self.dataset_root, entry.mask.path) + def test_load_mask(self): + path = os.path.join(self.dataset_root, self.entry.mask.path) mask = _load_mask(path) self.assertEqual(mask.dtype, np.float32) assert np.max(mask) <= 1.0 assert np.min(mask) >= 0.0 - def _load_depth_test(self, entry): - path = os.path.join(self.dataset_root, entry.depth.path) - depth_map = _load_depth(path, entry.depth.scale_adjustment) + def test_load_depth(self): + path = os.path.join(self.dataset_root, self.entry.depth.path) + depth_map = _load_depth(path, self.entry.depth.scale_adjustment) self.assertEqual(depth_map.dtype, np.float32) self.assertEqual(len(depth_map.shape), 2) - def _load_16big_png_depth_test(self, entry): - path = os.path.join(self.dataset_root, entry.depth.path) + def test_load_16big_png_depth(self): + path = os.path.join(self.dataset_root, self.entry.depth.path) depth_map = _load_16big_png_depth(path) self.assertEqual(depth_map.dtype, np.float32) self.assertEqual(len(depth_map.shape), 2) - def _load_1bit_png_mask_test(self, entry): - mask_path = os.path.join(self.dataset_root, entry.depth.mask_path) + def test_load_1bit_png_mask(self): + mask_path = os.path.join(self.dataset_root, self.entry.depth.mask_path) mask = _load_1bit_png_mask(mask_path) self.assertEqual(mask.dtype, np.float32) self.assertEqual(len(mask.shape), 2) - def _load_depth_mask_test(self, entry): - mask_path = os.path.join(self.dataset_root, entry.depth.mask_path) + def test_load_depth_mask(self): + mask_path = os.path.join(self.dataset_root, self.entry.depth.mask_path) mask = _load_depth_mask(mask_path) self.assertEqual(mask.dtype, np.float32) self.assertEqual(len(mask.shape), 3) From fcd8d8b45947c40370a2f93254da9e9169357a38 Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Fri, 10 Mar 2023 09:57:39 +0000 Subject: [PATCH 26/43] instead of loading whole dataset, loading only single frame annots --- tests/implicitron/test_blob_loader.py | 39 +++++++++++++-------------- 1 file changed, 18 insertions(+), 21 deletions(-) diff --git a/tests/implicitron/test_blob_loader.py b/tests/implicitron/test_blob_loader.py index a03e91537..b878dc0d0 100644 --- a/tests/implicitron/test_blob_loader.py +++ b/tests/implicitron/test_blob_loader.py @@ -14,6 +14,7 @@ _load_mask, BlobLoader, ) +from pytorch3d.implicitron.dataset import types from pytorch3d.implicitron.dataset.json_index_dataset import JsonIndexDataset from pytorch3d.implicitron.tools.config import expand_args_fields, get_default_args from pytorch3d.renderer.cameras import PerspectiveCameras @@ -40,20 +41,16 @@ def setUp(self): self.image_height = 768 self.image_width = 512 - expand_args_fields(JsonIndexDataset) - - self.dataset = JsonIndexDataset( - frame_annotations_file=frame_file, - sequence_annotations_file=sequence_file, - dataset_root=self.dataset_root, - image_height=self.image_height, - image_width=self.image_width, - box_crop=True, - load_point_clouds=True, - path_manager=self.path_manager, - ) - index = 7000 - self.entry = self.dataset.frame_annots[index]["frame_annotation"] + expand_args_fields(BlobLoader) + self.blob_loader = BlobLoader() + + # loading single frame annotation of dataset (see JsonIndexDataset._load_frames()) + local_file = self.path_manager.get_local_path(frame_file) + with gzip.open(local_file, "rt", encoding="utf8") as zipfile: + frame_annots_list = types.load_dataclass(zipfile, List[self.frame_annotations_type]) + + index = 0 + self.entry = FrameAnnotsEntry(frame_annotation=frame_annots_list[index], subset=None) def test_BlobLoader_args(self): # test that BlobLoader works with get_default_args @@ -62,9 +59,9 @@ def test_BlobLoader_args(self): def test_fix_point_cloud_path(self): """Some files in Co3Dv2 have an accidental absolute path stored.""" original_path = "some_file_path" - modified_path = self.dataset.blob_loader._fix_point_cloud_path(original_path) + modified_path = self.blob_loader._fix_point_cloud_path(original_path) assert original_path in modified_path - assert self.dataset.blob_loader.dataset_root in modified_path + assert self.blob_loader.dataset_root in modified_path def test_load(self): ( @@ -73,7 +70,7 @@ def test_load(self): bbox_xywh, clamp_bbox_xyxy, crop_bbox_xywh, - ) = self.dataset.blob_loader._load_crop_fg_probability(self.entry) + ) = self.blob_loader._load_crop_fg_probability(self.entry) assert mask_path assert torch.is_tensor(fg_probability) @@ -87,7 +84,7 @@ def test_load(self): self.assertEqual(crop_bbox_xywh.shape, torch.Size([4])) ( image_rgb, image_path, mask_crop, scale, - ) = self.dataset.blob_loader._load_crop_images(self.entry, fg_probability, clamp_bbox_xyxy) + ) = self.blob_loader._load_crop_images(self.entry, fg_probability, clamp_bbox_xyxy) assert torch.is_tensor(image_rgb) assert image_path assert torch.is_tensor(mask_crop) @@ -100,7 +97,7 @@ def test_load(self): depth_map, depth_path, depth_mask, - ) = self.dataset.blob_loader._load_mask_depth( + ) = self.blob_loader._load_mask_depth( self.entry, clamp_bbox_xyxy, fg_probability, @@ -112,7 +109,7 @@ def test_load(self): self.assertEqual(depth_map.shape, torch.Size([1, self.image_height, self.image_width])) self.assertEqual(depth_mask.shape, torch.Size([1, self.image_height, self.image_width])) - camera = self.dataset.blob_loader._get_pytorch3d_camera( + camera = self.blob_loader._get_pytorch3d_camera( self.entry, scale, clamp_bbox_xyxy, @@ -123,7 +120,7 @@ def test_resize_image(self): path = os.path.join(self.dataset_root, self.entry.image.path) local_path = self.path_manager.get_local_path(path) image = _load_image(local_path) - image_rgb, scale, mask_crop = self.dataset.blob_loader._resize_image(image) + image_rgb, scale, mask_crop = self.blob_loader._resize_image(image) original_shape = image.shape[-2:] expected_shape = ( From c3bd722507a8bca9eb9dfa0828ee3e32fe005bb0 Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Fri, 10 Mar 2023 10:15:38 +0000 Subject: [PATCH 27/43] added default values to BlobLoader to ease initialisation --- pytorch3d/implicitron/dataset/blob_loader.py | 28 ++++++++++---------- tests/implicitron/test_blob_loader.py | 2 +- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/pytorch3d/implicitron/dataset/blob_loader.py b/pytorch3d/implicitron/dataset/blob_loader.py index 035e99a83..362d1c459 100644 --- a/pytorch3d/implicitron/dataset/blob_loader.py +++ b/pytorch3d/implicitron/dataset/blob_loader.py @@ -57,20 +57,20 @@ class BlobLoader: dimension of the cropping bounding box, relative to box size. """ - dataset_root: str - load_images: bool - load_depths: bool - load_depth_masks: bool - load_masks: bool - load_point_clouds: bool - max_points: int - mask_images: bool - mask_depths: bool - image_height: Optional[int] - image_width: Optional[int] - box_crop: bool - box_crop_mask_thr: float - box_crop_context: float + dataset_root: str = "" + load_images: bool = True + load_depths: bool = True + load_depth_masks: bool = True + load_masks: bool = True + load_point_clouds: bool = False + max_points: int = 0 + mask_images: bool = False + mask_depths: bool = False + image_height: Optional[int] = 800 + image_width: Optional[int] = 800 + box_crop: bool = True + box_crop_mask_thr: float = 0.4 + box_crop_context: float = 0.3 path_manager: Any = None def load( diff --git a/tests/implicitron/test_blob_loader.py b/tests/implicitron/test_blob_loader.py index b878dc0d0..cdbb2d9c5 100644 --- a/tests/implicitron/test_blob_loader.py +++ b/tests/implicitron/test_blob_loader.py @@ -91,7 +91,7 @@ def test_load(self): assert scale # assert image and mask shapes self.assertEqual(image_rgb.shape, torch.Size([3, self.image_height, self.image_width])) - self.assertEqual(mask_crop.shape, torch.Size([1, self.image_height, self.image_width]) + self.assertEqual(mask_crop.shape, torch.Size([1, self.image_height, self.image_width])) ( depth_map, From cb34c0134465bc8e2b5e463e5f123afa224920bf Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Fri, 10 Mar 2023 10:59:18 +0000 Subject: [PATCH 28/43] mackink tests on single loaded frame --- tests/implicitron/test_blob_loader.py | 89 +++++++++++++++------------ 1 file changed, 50 insertions(+), 39 deletions(-) diff --git a/tests/implicitron/test_blob_loader.py b/tests/implicitron/test_blob_loader.py index cdbb2d9c5..209830bbe 100644 --- a/tests/implicitron/test_blob_loader.py +++ b/tests/implicitron/test_blob_loader.py @@ -1,10 +1,13 @@ import contextlib +import gzip import os import unittest +from typing import List import numpy as np - import torch + +from pytorch3d.implicitron.dataset import types from pytorch3d.implicitron.dataset.blob_loader import ( _load_16big_png_depth, _load_1bit_png_mask, @@ -14,13 +17,10 @@ _load_mask, BlobLoader, ) -from pytorch3d.implicitron.dataset import types -from pytorch3d.implicitron.dataset.json_index_dataset import JsonIndexDataset from pytorch3d.implicitron.tools.config import expand_args_fields, get_default_args from pytorch3d.renderer.cameras import PerspectiveCameras from tests.common_testing import TestCaseMixin - from tests.implicitron.common_resources import get_skateboard_data @@ -34,23 +34,24 @@ def setUp(self): get_skateboard_data() ) self.addCleanup(stack.close) - frame_file = os.path.join(self.dataset_root, category, "frame_annotations.jgz") - sequence_file = os.path.join( - self.dataset_root, category, "sequence_annotations.jgz" - ) self.image_height = 768 self.image_width = 512 - expand_args_fields(BlobLoader) - self.blob_loader = BlobLoader() + self.blob_loader = BlobLoader( + image_height=self.image_height, + image_width=self.image_width, + dataset_root=self.dataset_root, + path_manager=self.path_manager, + ) # loading single frame annotation of dataset (see JsonIndexDataset._load_frames()) + frame_file = os.path.join(self.dataset_root, category, "frame_annotations.jgz") local_file = self.path_manager.get_local_path(frame_file) with gzip.open(local_file, "rt", encoding="utf8") as zipfile: - frame_annots_list = types.load_dataclass(zipfile, List[self.frame_annotations_type]) - - index = 0 - self.entry = FrameAnnotsEntry(frame_annotation=frame_annots_list[index], subset=None) + frame_annots_list = types.load_dataclass( + zipfile, List[types.FrameAnnotation] + ) + self.frame_annotation = frame_annots_list[0] def test_BlobLoader_args(self): # test that BlobLoader works with get_default_args @@ -70,7 +71,7 @@ def test_load(self): bbox_xywh, clamp_bbox_xyxy, crop_bbox_xywh, - ) = self.blob_loader._load_crop_fg_probability(self.entry) + ) = self.blob_loader._load_crop_fg_probability(self.frame_annotation) assert mask_path assert torch.is_tensor(fg_probability) @@ -78,27 +79,29 @@ def test_load(self): assert torch.is_tensor(clamp_bbox_xyxy) assert torch.is_tensor(crop_bbox_xywh) # assert bboxes shape - self.assertEqual(fg_probability.shape, torch.Size([1, self.image_height, self.image_width])) + self.assertEqual( + fg_probability.shape, torch.Size([1, self.image_height, self.image_width]) + ) self.assertEqual(bbox_xywh.shape, torch.Size([4])) self.assertEqual(clamp_bbox_xyxy.shape, torch.Size([4])) self.assertEqual(crop_bbox_xywh.shape, torch.Size([4])) - ( - image_rgb, image_path, mask_crop, scale, - ) = self.blob_loader._load_crop_images(self.entry, fg_probability, clamp_bbox_xyxy) + (image_rgb, image_path, mask_crop, scale,) = self.blob_loader._load_crop_images( + self.frame_annotation, fg_probability, clamp_bbox_xyxy + ) assert torch.is_tensor(image_rgb) assert image_path assert torch.is_tensor(mask_crop) assert scale # assert image and mask shapes - self.assertEqual(image_rgb.shape, torch.Size([3, self.image_height, self.image_width])) - self.assertEqual(mask_crop.shape, torch.Size([1, self.image_height, self.image_width])) + self.assertEqual( + image_rgb.shape, torch.Size([3, self.image_height, self.image_width]) + ) + self.assertEqual( + mask_crop.shape, torch.Size([1, self.image_height, self.image_width]) + ) - ( - depth_map, - depth_path, - depth_mask, - ) = self.blob_loader._load_mask_depth( - self.entry, + (depth_map, depth_path, depth_mask,) = self.blob_loader._load_mask_depth( + self.frame_annotation, clamp_bbox_xyxy, fg_probability, ) @@ -106,18 +109,22 @@ def test_load(self): assert depth_path assert torch.is_tensor(depth_mask) # assert image and mask shapes - self.assertEqual(depth_map.shape, torch.Size([1, self.image_height, self.image_width])) - self.assertEqual(depth_mask.shape, torch.Size([1, self.image_height, self.image_width])) + self.assertEqual( + depth_map.shape, torch.Size([1, self.image_height, self.image_width]) + ) + self.assertEqual( + depth_mask.shape, torch.Size([1, self.image_height, self.image_width]) + ) camera = self.blob_loader._get_pytorch3d_camera( - self.entry, + self.frame_annotation, scale, clamp_bbox_xyxy, ) self.assertEqual(type(camera), PerspectiveCameras) def test_resize_image(self): - path = os.path.join(self.dataset_root, self.entry.image.path) + path = os.path.join(self.dataset_root, self.frame_annotation.image.path) local_path = self.path_manager.get_local_path(path) image = _load_image(local_path) image_rgb, scale, mask_crop = self.blob_loader._resize_image(image) @@ -136,7 +143,7 @@ def test_resize_image(self): self.assertEqual(mask_crop.shape[-2:], expected_shape) def test_load_image(self): - path = os.path.join(self.dataset_root, self.entry.image.path) + path = os.path.join(self.dataset_root, self.frame_annotation.image.path) local_path = self.path_manager.get_local_path(path) image = _load_image(local_path) self.assertEqual(image.dtype, np.float32) @@ -144,32 +151,36 @@ def test_load_image(self): assert np.min(image) >= 0.0 def test_load_mask(self): - path = os.path.join(self.dataset_root, self.entry.mask.path) + path = os.path.join(self.dataset_root, self.frame_annotation.mask.path) mask = _load_mask(path) self.assertEqual(mask.dtype, np.float32) assert np.max(mask) <= 1.0 assert np.min(mask) >= 0.0 def test_load_depth(self): - path = os.path.join(self.dataset_root, self.entry.depth.path) - depth_map = _load_depth(path, self.entry.depth.scale_adjustment) + path = os.path.join(self.dataset_root, self.frame_annotation.depth.path) + depth_map = _load_depth(path, self.frame_annotation.depth.scale_adjustment) self.assertEqual(depth_map.dtype, np.float32) - self.assertEqual(len(depth_map.shape), 2) + self.assertEqual(len(depth_map.shape), 3) def test_load_16big_png_depth(self): - path = os.path.join(self.dataset_root, self.entry.depth.path) + path = os.path.join(self.dataset_root, self.frame_annotation.depth.path) depth_map = _load_16big_png_depth(path) self.assertEqual(depth_map.dtype, np.float32) self.assertEqual(len(depth_map.shape), 2) def test_load_1bit_png_mask(self): - mask_path = os.path.join(self.dataset_root, self.entry.depth.mask_path) + mask_path = os.path.join( + self.dataset_root, self.frame_annotation.depth.mask_path + ) mask = _load_1bit_png_mask(mask_path) self.assertEqual(mask.dtype, np.float32) self.assertEqual(len(mask.shape), 2) def test_load_depth_mask(self): - mask_path = os.path.join(self.dataset_root, self.entry.depth.mask_path) + mask_path = os.path.join( + self.dataset_root, self.frame_annotation.depth.mask_path + ) mask = _load_depth_mask(mask_path) self.assertEqual(mask.dtype, np.float32) self.assertEqual(len(mask.shape), 3) From 04b7d1591cf38c9957c19dd2ba3da53aeb023715 Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Fri, 10 Mar 2023 15:29:08 +0000 Subject: [PATCH 29/43] made _resize_image separate function (will ease use in pixar replay) --- pytorch3d/implicitron/dataset/blob_loader.py | 75 ++++++++++++-------- tests/implicitron/test_blob_loader.py | 5 +- 2 files changed, 50 insertions(+), 30 deletions(-) diff --git a/pytorch3d/implicitron/dataset/blob_loader.py b/pytorch3d/implicitron/dataset/blob_loader.py index 362d1c459..bedbc070e 100644 --- a/pytorch3d/implicitron/dataset/blob_loader.py +++ b/pytorch3d/implicitron/dataset/blob_loader.py @@ -165,7 +165,12 @@ def _load_crop_fg_probability( mask = _crop_around_box(mask, clamp_bbox_xyxy, full_path) - fg_probability, _, _ = self._resize_image(mask, mode="nearest") + fg_probability, _, _ = _resize_image( + mask, + image_height=self.image_height, + image_width=self.image_width, + mode="nearest", + ) return fg_probability, full_path, bbox_xywh, clamp_bbox_xyxy, crop_box_xywh @@ -188,7 +193,9 @@ def _load_crop_images( assert clamp_bbox_xyxy is not None image_rgb = _crop_around_box(image_rgb, clamp_bbox_xyxy, path) - image_rgb, scale, mask_crop = self._resize_image(image_rgb) + image_rgb, scale, mask_crop = _resize_image( + image_rgb, image_height=self.image_height, image_width=self.image_width + ) if self.mask_images: assert fg_probability is not None @@ -214,7 +221,12 @@ def _load_mask_depth( ) depth_map = _crop_around_box(depth_map, depth_bbox_xyxy, path) - depth_map, _, _ = self._resize_image(depth_map, mode="nearest") + depth_map, _, _ = _resize_image( + depth_map, + image_height=self.image_height, + image_width=self.image_width, + mode="nearest", + ) if self.mask_depths: assert fg_probability is not None @@ -234,7 +246,12 @@ def _load_mask_depth( depth_mask, depth_mask_bbox_xyxy, mask_path ) - depth_mask, _, _ = self._resize_image(depth_mask, mode="nearest") + depth_mask, _, _ = _resize_image( + depth_mask, + image_height=self.image_height, + image_width=self.image_width, + mode="nearest", + ) else: depth_mask = torch.ones_like(depth_map) @@ -314,31 +331,31 @@ def _local_path(self, path: str) -> str: return path return self.path_manager.get_local_path(path) - def _resize_image( - self, image, mode="bilinear" - ) -> Tuple[torch.Tensor, float, torch.Tensor]: - image_height, image_width = self.image_height, self.image_width - if image_height is None or image_width is None: - # skip the resizing - imre_ = torch.from_numpy(image) - return imre_, 1.0, torch.ones_like(imre_[:1]) - # takes numpy array, returns pytorch tensor - minscale = min( - image_height / image.shape[-2], - image_width / image.shape[-1], - ) - imre = torch.nn.functional.interpolate( - torch.from_numpy(image)[None], - scale_factor=minscale, - mode=mode, - align_corners=False if mode == "bilinear" else None, - recompute_scale_factor=True, - )[0] - imre_ = torch.zeros(image.shape[0], image_height, image_width) - imre_[:, 0 : imre.shape[1], 0 : imre.shape[2]] = imre - mask = torch.zeros(1, image_height, image_width) - mask[:, 0 : imre.shape[1], 0 : imre.shape[2]] = 1.0 - return imre_, minscale, mask + +def _resize_image( + self, image, image_height, image_width, mode="bilinear" +) -> Tuple[torch.Tensor, float, torch.Tensor]: + if image_height is None or image_width is None: + # skip the resizing + imre_ = torch.from_numpy(image) + return imre_, 1.0, torch.ones_like(imre_[:1]) + # takes numpy array, returns pytorch tensor + minscale = min( + image_height / image.shape[-2], + image_width / image.shape[-1], + ) + imre = torch.nn.functional.interpolate( + torch.from_numpy(image)[None], + scale_factor=minscale, + mode=mode, + align_corners=False if mode == "bilinear" else None, + recompute_scale_factor=True, + )[0] + imre_ = torch.zeros(image.shape[0], image_height, image_width) + imre_[:, 0 : imre.shape[1], 0 : imre.shape[2]] = imre + mask = torch.zeros(1, image_height, image_width) + mask[:, 0 : imre.shape[1], 0 : imre.shape[2]] = 1.0 + return imre_, minscale, mask def _load_image(path) -> np.ndarray: diff --git a/tests/implicitron/test_blob_loader.py b/tests/implicitron/test_blob_loader.py index 209830bbe..5d432ba69 100644 --- a/tests/implicitron/test_blob_loader.py +++ b/tests/implicitron/test_blob_loader.py @@ -15,6 +15,7 @@ _load_depth_mask, _load_image, _load_mask, + _resize_image, BlobLoader, ) from pytorch3d.implicitron.tools.config import expand_args_fields, get_default_args @@ -127,7 +128,9 @@ def test_resize_image(self): path = os.path.join(self.dataset_root, self.frame_annotation.image.path) local_path = self.path_manager.get_local_path(path) image = _load_image(local_path) - image_rgb, scale, mask_crop = self.blob_loader._resize_image(image) + image_rgb, scale, mask_crop = _resize_image( + image, image_height=self.image_height, image_width=self.image_width + ) original_shape = image.shape[-2:] expected_shape = ( From 76f45aa27dc5c99c576e9242b480267ceeb55db6 Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Fri, 10 Mar 2023 15:31:22 +0000 Subject: [PATCH 30/43] type in function arguments --- pytorch3d/implicitron/dataset/blob_loader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytorch3d/implicitron/dataset/blob_loader.py b/pytorch3d/implicitron/dataset/blob_loader.py index bedbc070e..ce15f116a 100644 --- a/pytorch3d/implicitron/dataset/blob_loader.py +++ b/pytorch3d/implicitron/dataset/blob_loader.py @@ -333,7 +333,7 @@ def _local_path(self, path: str) -> str: def _resize_image( - self, image, image_height, image_width, mode="bilinear" + image, image_height, image_width, mode="bilinear" ) -> Tuple[torch.Tensor, float, torch.Tensor]: if image_height is None or image_width is None: # skip the resizing From e5d3a2b08c3e0c1ab2496ef55d9e3a012428ea7f Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Fri, 10 Mar 2023 15:44:12 +0000 Subject: [PATCH 31/43] moved tests for _resize_image to test_bbox --- tests/implicitron/test_bbox.py | 18 ++++++++++++++++++ tests/implicitron/test_blob_loader.py | 24 +----------------------- 2 files changed, 19 insertions(+), 23 deletions(-) diff --git a/tests/implicitron/test_bbox.py b/tests/implicitron/test_bbox.py index 8dffd751d..4f518dfee 100644 --- a/tests/implicitron/test_bbox.py +++ b/tests/implicitron/test_bbox.py @@ -18,6 +18,7 @@ _get_bbox_from_mask, _get_clamp_bbox, _rescale_bbox, + _resize_image, ) from tests.common_testing import TestCaseMixin @@ -121,3 +122,20 @@ def test_get_1d_bounds(self): bounds = _get_1d_bounds(array) # make nonzero 1d bounds of image self.assertClose(bounds, [1, 3]) + + def test_resize_image(self): + image = torch.rand(3, 300, 500) # rgb image 300x500 + expected_shape = (150, 250) + + resized_image, scale, mask_crop = _resize_image( + image, image_height=expected_shape[0], image_width=expected_shape[1] + ) + + original_shape = image.shape[-2:] + expected_scale = min( + expected_shape[0] / original_shape[0], expected_shape[1] / original_shape[1] + ) + + self.assertEqual(scale, expected_scale) + self.assertEqual(resized_image.shape[-2:], expected_shape) + self.assertEqual(mask_crop.shape[-2:], expected_shape) diff --git a/tests/implicitron/test_blob_loader.py b/tests/implicitron/test_blob_loader.py index 5d432ba69..5634854e9 100644 --- a/tests/implicitron/test_blob_loader.py +++ b/tests/implicitron/test_blob_loader.py @@ -15,10 +15,9 @@ _load_depth_mask, _load_image, _load_mask, - _resize_image, BlobLoader, ) -from pytorch3d.implicitron.tools.config import expand_args_fields, get_default_args +from pytorch3d.implicitron.tools.config import get_default_args from pytorch3d.renderer.cameras import PerspectiveCameras from tests.common_testing import TestCaseMixin @@ -124,27 +123,6 @@ def test_load(self): ) self.assertEqual(type(camera), PerspectiveCameras) - def test_resize_image(self): - path = os.path.join(self.dataset_root, self.frame_annotation.image.path) - local_path = self.path_manager.get_local_path(path) - image = _load_image(local_path) - image_rgb, scale, mask_crop = _resize_image( - image, image_height=self.image_height, image_width=self.image_width - ) - - original_shape = image.shape[-2:] - expected_shape = ( - self.image_height, - self.image_width, - ) - expected_scale = min( - expected_shape[0] / original_shape[0], expected_shape[1] / original_shape[1] - ) - - self.assertEqual(scale, expected_scale) - self.assertEqual(image_rgb.shape[-2:], expected_shape) - self.assertEqual(mask_crop.shape[-2:], expected_shape) - def test_load_image(self): path = os.path.join(self.dataset_root, self.frame_annotation.image.path) local_path = self.path_manager.get_local_path(path) From 1ba1a3a6896ee947a7791df704c39ca436eff377 Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Fri, 10 Mar 2023 15:46:17 +0000 Subject: [PATCH 32/43] np array instead of tensor to resize_image --- tests/implicitron/test_bbox.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/implicitron/test_bbox.py b/tests/implicitron/test_bbox.py index 4f518dfee..48a8421bb 100644 --- a/tests/implicitron/test_bbox.py +++ b/tests/implicitron/test_bbox.py @@ -124,7 +124,7 @@ def test_get_1d_bounds(self): self.assertClose(bounds, [1, 3]) def test_resize_image(self): - image = torch.rand(3, 300, 500) # rgb image 300x500 + image = np.random.rand(3, 300, 500) # rgb image 300x500 expected_shape = (150, 250) resized_image, scale, mask_crop = _resize_image( From cd9aa5ccd0bb3899cee14058277d99b7fc2daffa Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Mon, 13 Mar 2023 10:59:25 +0000 Subject: [PATCH 33/43] setting up default scale value to correct one --- pytorch3d/implicitron/dataset/blob_loader.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pytorch3d/implicitron/dataset/blob_loader.py b/pytorch3d/implicitron/dataset/blob_loader.py index ce15f116a..2d77e6c08 100644 --- a/pytorch3d/implicitron/dataset/blob_loader.py +++ b/pytorch3d/implicitron/dataset/blob_loader.py @@ -90,7 +90,10 @@ def load( frame_data.crop_bbox_xywh, ) = self._load_crop_fg_probability(entry) - scale = 1.0 + scale = min( + self.image_height / entry.image.size[0], + self.image_width / entry.image.size[1], + ) if self.load_images and entry.image is not None: # original image size frame_data.image_size_hw = _safe_as_tensor(entry.image.size, torch.long) From ce9fd400d1ca3a2b7b767cda31ddc530d68eee1a Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Tue, 14 Mar 2023 10:48:26 +0000 Subject: [PATCH 34/43] renamed funciton to load_ to make more obvious inplace modification --- pytorch3d/implicitron/dataset/blob_loader.py | 3 +-- pytorch3d/implicitron/dataset/json_index_dataset.py | 3 ++- tests/implicitron/test_blob_loader.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pytorch3d/implicitron/dataset/blob_loader.py b/pytorch3d/implicitron/dataset/blob_loader.py index 2d77e6c08..6d0dc7fa4 100644 --- a/pytorch3d/implicitron/dataset/blob_loader.py +++ b/pytorch3d/implicitron/dataset/blob_loader.py @@ -73,7 +73,7 @@ class BlobLoader: box_crop_context: float = 0.3 path_manager: Any = None - def load( + def load_( self, frame_data: FrameData, entry: types.FrameAnnotation, @@ -127,7 +127,6 @@ def load( self._local_path(pcl_path), max_points=self.max_points ) frame_data.sequence_point_cloud_path = pcl_path - return frame_data def _load_crop_fg_probability( self, entry: types.FrameAnnotation diff --git a/pytorch3d/implicitron/dataset/json_index_dataset.py b/pytorch3d/implicitron/dataset/json_index_dataset.py index cf63b9b43..636630680 100644 --- a/pytorch3d/implicitron/dataset/json_index_dataset.py +++ b/pytorch3d/implicitron/dataset/json_index_dataset.py @@ -443,9 +443,10 @@ def __getitem__(self, index) -> FrameData: # Optional field frame_data.frame_type = self._get_frame_type(self.frame_annots[index]) - return self.blob_loader.load( + self.blob_loader.load_( frame_data, entry, self.seq_annots[entry.sequence_name] ) + return frame_data def _load_frames(self) -> None: logger.info(f"Loading Co3D frames from {self.frame_annotations_file}.") diff --git a/tests/implicitron/test_blob_loader.py b/tests/implicitron/test_blob_loader.py index 5634854e9..fd8d8fd81 100644 --- a/tests/implicitron/test_blob_loader.py +++ b/tests/implicitron/test_blob_loader.py @@ -64,7 +64,7 @@ def test_fix_point_cloud_path(self): assert original_path in modified_path assert self.blob_loader.dataset_root in modified_path - def test_load(self): + def test_load_(self): ( fg_probability, mask_path, From f217eb1fcc2f3ee561aacab161488b2142aafb9a Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Tue, 14 Mar 2023 17:55:25 +0000 Subject: [PATCH 35/43] moved crop_by_bbox to FrameData as method --- pytorch3d/implicitron/dataset/blob_loader.py | 181 ++---------------- pytorch3d/implicitron/dataset/dataset_base.py | 32 ++++ pytorch3d/implicitron/dataset/utils.py | 101 ++++++++++ tests/implicitron/test_bbox.py | 5 +- 4 files changed, 152 insertions(+), 167 deletions(-) diff --git a/pytorch3d/implicitron/dataset/blob_loader.py b/pytorch3d/implicitron/dataset/blob_loader.py index 6d0dc7fa4..ce59c542d 100644 --- a/pytorch3d/implicitron/dataset/blob_loader.py +++ b/pytorch3d/implicitron/dataset/blob_loader.py @@ -20,6 +20,9 @@ from pytorch3d.io import IO from pytorch3d.renderer.cameras import PerspectiveCameras from pytorch3d.structures.pointclouds import Pointclouds +from pytorch3d.implicitron.dataset.utils import ( + _get_bbox_from_mask, +) @dataclass @@ -85,9 +88,7 @@ def load_( ( frame_data.fg_probability, frame_data.mask_path, - frame_data.bbox_xywh, - clamp_bbox_xyxy, - frame_data.crop_bbox_xywh, + bbox_xywh, ) = self._load_crop_fg_probability(entry) scale = min( @@ -103,23 +104,17 @@ def load_( frame_data.image_path, frame_data.mask_crop, scale, - ) = self._load_crop_images( - entry, frame_data.fg_probability, clamp_bbox_xyxy - ) + ) = self._load_crop_images(entry, frame_data.fg_probability) if self.load_depths and entry.depth is not None: ( frame_data.depth_map, frame_data.depth_path, frame_data.depth_mask, - ) = self._load_mask_depth(entry, clamp_bbox_xyxy, frame_data.fg_probability) + ) = self._load_mask_depth(entry, frame_data.fg_probability) if entry.viewpoint is not None: - frame_data.camera = self._get_pytorch3d_camera( - entry, - scale, - clamp_bbox_xyxy, - ) + frame_data.camera = self._get_pytorch3d_camera(entry, scale) if self.load_point_clouds and seq_annotation.point_cloud is not None: pcl_path = self._fix_point_cloud_path(seq_annotation.point_cloud.path) @@ -128,45 +123,28 @@ def load_( ) frame_data.sequence_point_cloud_path = pcl_path + if self.box_crop: + frame_data.crop_by_bbox(bbox_xywh, self.box_crop_context, ) + + return frame_data + def _load_crop_fg_probability( self, entry: types.FrameAnnotation - ) -> Tuple[ - Optional[torch.Tensor], - Optional[str], - Optional[torch.Tensor], - Optional[torch.Tensor], - Optional[torch.Tensor], - ]: + ) -> Tuple[Optional[torch.Tensor],Optional[str],Optional[torch.Tensor]]: fg_probability = None full_path = None bbox_xywh = None - clamp_bbox_xyxy = None - crop_box_xywh = None - if (self.load_masks or self.box_crop) and entry.mask is not None: + if (self.load_masks) and entry.mask is not None: full_path = os.path.join(self.dataset_root, entry.mask.path) mask = _load_mask(self._local_path(full_path)) + bbox_xywh = torch.tensor(_get_bbox_from_mask(self.mask, self.box_crop_mask_thr)) if mask.shape[-2:] != entry.image.size: raise ValueError( f"bad mask size: {mask.shape[-2:]} vs {entry.image.size}!" ) - bbox_xywh = torch.tensor(_get_bbox_from_mask(mask, self.box_crop_mask_thr)) - - if self.box_crop: - clamp_bbox_xyxy = _clamp_box_to_image_bounds_and_round( - _get_clamp_bbox( - bbox_xywh, - image_path=entry.image.path, - box_crop_context=self.box_crop_context, - ), - image_size_hw=tuple(mask.shape[-2:]), - ) - crop_box_xywh = _bbox_xyxy_to_xywh(clamp_bbox_xyxy) - - mask = _crop_around_box(mask, clamp_bbox_xyxy, full_path) - fg_probability, _, _ = _resize_image( mask, image_height=self.image_height, @@ -174,13 +152,12 @@ def _load_crop_fg_probability( mode="nearest", ) - return fg_probability, full_path, bbox_xywh, clamp_bbox_xyxy, crop_box_xywh + return fg_probability, full_path, bbox_xywh def _load_crop_images( self, entry: types.FrameAnnotation, fg_probability: Optional[torch.Tensor], - clamp_bbox_xyxy: Optional[torch.Tensor], ) -> Tuple[torch.Tensor, str, torch.Tensor, float]: assert self.dataset_root is not None and entry.image is not None path = os.path.join(self.dataset_root, entry.image.path) @@ -191,10 +168,6 @@ def _load_crop_images( f"bad image size: {image_rgb.shape[-2:]} vs {entry.image.size}!" ) - if self.box_crop: - assert clamp_bbox_xyxy is not None - image_rgb = _crop_around_box(image_rgb, clamp_bbox_xyxy, path) - image_rgb, scale, mask_crop = _resize_image( image_rgb, image_height=self.image_height, image_width=self.image_width ) @@ -208,7 +181,6 @@ def _load_crop_images( def _load_mask_depth( self, entry: types.FrameAnnotation, - clamp_bbox_xyxy: Optional[torch.Tensor], fg_probability: Optional[torch.Tensor], ) -> Tuple[torch.Tensor, str, torch.Tensor]: entry_depth = entry.depth @@ -216,13 +188,6 @@ def _load_mask_depth( path = os.path.join(self.dataset_root, entry_depth.path) depth_map = _load_depth(self._local_path(path), entry_depth.scale_adjustment) - if self.box_crop: - assert clamp_bbox_xyxy is not None - depth_bbox_xyxy = _rescale_bbox( - clamp_bbox_xyxy, entry.image.size, depth_map.shape[-2:] - ) - depth_map = _crop_around_box(depth_map, depth_bbox_xyxy, path) - depth_map, _, _ = _resize_image( depth_map, image_height=self.image_height, @@ -239,15 +204,6 @@ def _load_mask_depth( mask_path = os.path.join(self.dataset_root, entry_depth.mask_path) depth_mask = _load_depth_mask(self._local_path(mask_path)) - if self.box_crop: - assert clamp_bbox_xyxy is not None - depth_mask_bbox_xyxy = _rescale_bbox( - clamp_bbox_xyxy, entry.image.size, depth_mask.shape[-2:] - ) - depth_mask = _crop_around_box( - depth_mask, depth_mask_bbox_xyxy, mask_path - ) - depth_mask, _, _ = _resize_image( depth_mask, image_height=self.image_height, @@ -263,7 +219,6 @@ def _get_pytorch3d_camera( self, entry: types.FrameAnnotation, scale: float, - clamp_bbox_xyxy: Optional[torch.Tensor], ) -> PerspectiveCameras: entry_viewpoint = entry.viewpoint assert entry_viewpoint is not None @@ -290,9 +245,6 @@ def _get_pytorch3d_camera( # principal point and focal length in pixels principal_point_px = half_image_size_wh_orig - principal_point * rescale focal_length_px = focal_length * rescale - if self.box_crop: - assert clamp_bbox_xyxy is not None - principal_point_px -= clamp_bbox_xyxy[:2] # now, convert from pixels to PyTorch3D v0.5+ NDC convention if self.image_height is None or self.image_width is None: @@ -375,84 +327,6 @@ def _load_mask(path) -> np.ndarray: return mask[None] # fake feature channel -def _get_bbox_from_mask( - mask, thr, decrease_quant: float = 0.05 -) -> Tuple[int, int, int, int]: - # bbox in xywh - masks_for_box = np.zeros_like(mask) - while masks_for_box.sum() <= 1.0: - masks_for_box = (mask > thr).astype(np.float32) - thr -= decrease_quant - if thr <= 0.0: - warnings.warn( - f"Empty masks_for_bbox (thr={thr}) => using full image.", stacklevel=1 - ) - - x0, x1 = _get_1d_bounds(masks_for_box.sum(axis=-2)) - y0, y1 = _get_1d_bounds(masks_for_box.sum(axis=-1)) - - return x0, y0, x1 - x0, y1 - y0 - - -def _crop_around_box(tensor, bbox, impath: str = ""): - # bbox is xyxy, where the upper bound is corrected with +1 - bbox = _clamp_box_to_image_bounds_and_round( - bbox, - image_size_hw=tensor.shape[-2:], - ) - tensor = tensor[..., bbox[1] : bbox[3], bbox[0] : bbox[2]] - assert all(c > 0 for c in tensor.shape), f"squashed image {impath}" - return tensor - - -def _clamp_box_to_image_bounds_and_round( - bbox_xyxy: torch.Tensor, - image_size_hw: Tuple[int, int], -) -> torch.LongTensor: - bbox_xyxy = bbox_xyxy.clone() - bbox_xyxy[[0, 2]] = torch.clamp(bbox_xyxy[[0, 2]], 0, image_size_hw[-1]) - bbox_xyxy[[1, 3]] = torch.clamp(bbox_xyxy[[1, 3]], 0, image_size_hw[-2]) - if not isinstance(bbox_xyxy, torch.LongTensor): - bbox_xyxy = bbox_xyxy.round().long() - return bbox_xyxy # pyre-ignore [7] - - -def _get_clamp_bbox( - bbox: torch.Tensor, - box_crop_context: float = 0.0, - image_path: str = "", -) -> torch.Tensor: - # box_crop_context: rate of expansion for bbox - # returns possibly expanded bbox xyxy as float - - bbox = bbox.clone() # do not edit bbox in place - - # increase box size - if box_crop_context > 0.0: - c = box_crop_context - bbox = bbox.float() - bbox[0] -= bbox[2] * c / 2 - bbox[1] -= bbox[3] * c / 2 - bbox[2] += bbox[2] * c - bbox[3] += bbox[3] * c - - if (bbox[2:] <= 1.0).any(): - raise ValueError( - f"squashed image {image_path}!! The bounding box contains no pixels." - ) - - bbox[2:] = torch.clamp(bbox[2:], 2) # set min height, width to 2 along both axes - bbox_xyxy = _bbox_xywh_to_xyxy(bbox, clamp_size=2) - - return bbox_xyxy - - -def _bbox_xyxy_to_xywh(xyxy: torch.Tensor) -> torch.Tensor: - wh = xyxy[2:] - xyxy[:2] - xywh = torch.cat([xyxy[:2], wh]) - return xywh - - def _load_depth(path, scale_adjustment) -> np.ndarray: if not path.lower().endswith(".png"): raise ValueError('unsupported depth file name "%s"' % path) @@ -474,14 +348,6 @@ def _load_16big_png_depth(depth_png) -> np.ndarray: return depth -def _rescale_bbox(bbox: torch.Tensor, orig_res, new_res) -> torch.Tensor: - assert bbox is not None - assert np.prod(orig_res) > 1e-8 - # average ratio of dimensions - rel_size = (new_res[0] / orig_res[0] + new_res[1] / orig_res[1]) / 2.0 - return bbox * rel_size - - def _load_1bit_png_mask(file: str) -> np.ndarray: with Image.open(file) as pil_im: mask = (np.array(pil_im.convert("L")) > 0.0).astype(np.float32) @@ -495,21 +361,6 @@ def _load_depth_mask(path: str) -> np.ndarray: return m[None] # fake feature channel -def _get_1d_bounds(arr) -> Tuple[int, int]: - nz = np.flatnonzero(arr) - return nz[0], nz[-1] + 1 - - -def _bbox_xywh_to_xyxy( - xywh: torch.Tensor, clamp_size: Optional[int] = None -) -> torch.Tensor: - xyxy = xywh.clone() - if clamp_size is not None: - xyxy[2:] = torch.clamp(xyxy[2:], clamp_size) - xyxy[2:] += xyxy[:2] - return xyxy - - def _safe_as_tensor(data, dtype): return torch.tensor(data, dtype=dtype) if data is not None else None diff --git a/pytorch3d/implicitron/dataset/dataset_base.py b/pytorch3d/implicitron/dataset/dataset_base.py index 283ef3dcd..322d1889b 100644 --- a/pytorch3d/implicitron/dataset/dataset_base.py +++ b/pytorch3d/implicitron/dataset/dataset_base.py @@ -26,6 +26,13 @@ from pytorch3d.renderer.camera_utils import join_cameras_as_batch from pytorch3d.renderer.cameras import CamerasBase, PerspectiveCameras from pytorch3d.structures.pointclouds import join_pointclouds_as_batch, Pointclouds +from pytorch3d.implicitron.dataset.utils import ( + _crop_around_box, + _clamp_box_to_image_bounds_and_round, + _bbox_xyxy_to_xywh, + _get_clamp_bbox, + _rescale_bbox, +) @dataclass @@ -144,6 +151,31 @@ def __getitem__(self, key): def __len__(self): return len(fields(self)) + def crop_by_bbox(self, bbox_xywh, box_crop_context): + clamp_bbox_xyxy = _clamp_box_to_image_bounds_and_round( + _get_clamp_bbox( + bbox_xywh, + image_path=self.image.path, + box_crop_context=box_crop_context, + ), + image_size_hw=tuple(self.fg_probability.shape[-2:]), + ) + self.crop_bbox_xywh = _bbox_xyxy_to_xywh(clamp_bbox_xyxy) + + self.fg_probability = _crop_around_box( + self.fg_probability, clamp_bbox_xyxy, self.mask_path + ) + self.image_rgb = _crop_around_box(self.image_rgb, clamp_bbox_xyxy, self.image.path) + + depth_bbox_xyxy = _rescale_bbox(clamp_bbox_xyxy, entry.image.size, self.depth_map.shape[-2:]) + self.depth_map = _crop_around_box(self.depth_map, depth_bbox_xyxy, self.depth_path) + + depth_mask_bbox_xyxy = _rescale_bbox(clamp_bbox_xyxy, entry.image.size, self.depth_mask.shape[-2:]) + self.depth_mask = _crop_around_box(self.depth_mask, depth_mask_bbox_xyxy, self.mask_path) + + + principal_point_px -= clamp_bbox_xyxy[:2] + @classmethod def collate(cls, batch): """ diff --git a/pytorch3d/implicitron/dataset/utils.py b/pytorch3d/implicitron/dataset/utils.py index 05252aff1..b2ac99f36 100644 --- a/pytorch3d/implicitron/dataset/utils.py +++ b/pytorch3d/implicitron/dataset/utils.py @@ -52,3 +52,104 @@ def is_train_frame( dtype=torch.bool, device=device, ) + + +def _get_bbox_from_mask( + mask, thr, decrease_quant: float = 0.05 + ) -> Tuple[int, int, int, int]: + # bbox in xywh + masks_for_box = np.zeros_like(mask) + while masks_for_box.sum() <= 1.0: + masks_for_box = (mask > thr).astype(np.float32) + thr -= decrease_quant + if thr <= 0.0: + warnings.warn( + f"Empty masks_for_bbox (thr={thr}) => using full image.", stacklevel=1 + ) + + x0, x1 = _get_1d_bounds(masks_for_box.sum(axis=-2)) + y0, y1 = _get_1d_bounds(masks_for_box.sum(axis=-1)) + + return x0, y0, x1 - x0, y1 - y0 + + +def _crop_around_box(tensor, bbox, impath: str = ""): + # bbox is xyxy, where the upper bound is corrected with +1 + bbox = _clamp_box_to_image_bounds_and_round( + bbox, + image_size_hw=tensor.shape[-2:], + ) + tensor = tensor[..., bbox[1] : bbox[3], bbox[0] : bbox[2]] + assert all(c > 0 for c in tensor.shape), f"squashed image {impath}" + return tensor + + +def _clamp_box_to_image_bounds_and_round( + bbox_xyxy: torch.Tensor, + image_size_hw: Tuple[int, int], +) -> torch.LongTensor: + bbox_xyxy = bbox_xyxy.clone() + bbox_xyxy[[0, 2]] = torch.clamp(bbox_xyxy[[0, 2]], 0, image_size_hw[-1]) + bbox_xyxy[[1, 3]] = torch.clamp(bbox_xyxy[[1, 3]], 0, image_size_hw[-2]) + if not isinstance(bbox_xyxy, torch.LongTensor): + bbox_xyxy = bbox_xyxy.round().long() + return bbox_xyxy # pyre-ignore [7] + + +def _bbox_xyxy_to_xywh(xyxy: torch.Tensor) -> torch.Tensor: + wh = xyxy[2:] - xyxy[:2] + xywh = torch.cat([xyxy[:2], wh]) + return xywh + + +def _get_clamp_bbox( + bbox: torch.Tensor, + box_crop_context: float = 0.0, + image_path: str = "", +) -> torch.Tensor: + # box_crop_context: rate of expansion for bbox + # returns possibly expanded bbox xyxy as float + + bbox = bbox.clone() # do not edit bbox in place + + # increase box size + if box_crop_context > 0.0: + c = box_crop_context + bbox = bbox.float() + bbox[0] -= bbox[2] * c / 2 + bbox[1] -= bbox[3] * c / 2 + bbox[2] += bbox[2] * c + bbox[3] += bbox[3] * c + + if (bbox[2:] <= 1.0).any(): + raise ValueError( + f"squashed image {image_path}!! The bounding box contains no pixels." + ) + + bbox[2:] = torch.clamp(bbox[2:], 2) # set min height, width to 2 along both axes + bbox_xyxy = _bbox_xywh_to_xyxy(bbox, clamp_size=2) + + return bbox_xyxy + + +def _rescale_bbox(bbox: torch.Tensor, orig_res, new_res) -> torch.Tensor: + assert bbox is not None + assert np.prod(orig_res) > 1e-8 + # average ratio of dimensions + rel_size = (new_res[0] / orig_res[0] + new_res[1] / orig_res[1]) / 2.0 + return bbox * rel_size + + +def _bbox_xywh_to_xyxy( + xywh: torch.Tensor, clamp_size: Optional[int] = None +) -> torch.Tensor: + xyxy = xywh.clone() + if clamp_size is not None: + xyxy[2:] = torch.clamp(xyxy[2:], clamp_size) + xyxy[2:] += xyxy[:2] + return xyxy + + +def _get_1d_bounds(arr) -> Tuple[int, int]: + nz = np.flatnonzero(arr) + return nz[0], nz[-1] + 1 diff --git a/tests/implicitron/test_bbox.py b/tests/implicitron/test_bbox.py index 48a8421bb..16199ad1e 100644 --- a/tests/implicitron/test_bbox.py +++ b/tests/implicitron/test_bbox.py @@ -9,7 +9,9 @@ import numpy as np import torch -from pytorch3d.implicitron.dataset.blob_loader import ( +from pytorch3d.implicitron.dataset.blob_loader import _resize_image + +from pytorch3d.implicitron.dataset.utils import ( _bbox_xywh_to_xyxy, _bbox_xyxy_to_xywh, _clamp_box_to_image_bounds_and_round, @@ -18,7 +20,6 @@ _get_bbox_from_mask, _get_clamp_bbox, _rescale_bbox, - _resize_image, ) from tests.common_testing import TestCaseMixin From 664d35d66de59e815f3feec581d6ad80bc0bdea0 Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Tue, 14 Mar 2023 18:16:40 +0000 Subject: [PATCH 36/43] tests fix, typos, linter --- pytorch3d/implicitron/dataset/blob_loader.py | 11 +++--- pytorch3d/implicitron/dataset/utils.py | 37 +++++++++++--------- tests/implicitron/test_blob_loader.py | 17 ++++----- 3 files changed, 30 insertions(+), 35 deletions(-) diff --git a/pytorch3d/implicitron/dataset/blob_loader.py b/pytorch3d/implicitron/dataset/blob_loader.py index ce59c542d..fa3a5ac29 100644 --- a/pytorch3d/implicitron/dataset/blob_loader.py +++ b/pytorch3d/implicitron/dataset/blob_loader.py @@ -6,7 +6,6 @@ import functools import os -import warnings from dataclasses import dataclass from pathlib import Path from typing import Any, Optional, Tuple, Union @@ -17,12 +16,10 @@ from pytorch3d.implicitron.dataset import types from pytorch3d.implicitron.dataset.dataset_base import FrameData +from pytorch3d.implicitron.dataset.utils import _get_bbox_from_mask from pytorch3d.io import IO from pytorch3d.renderer.cameras import PerspectiveCameras from pytorch3d.structures.pointclouds import Pointclouds -from pytorch3d.implicitron.dataset.utils import ( - _get_bbox_from_mask, -) @dataclass @@ -124,13 +121,13 @@ def load_( frame_data.sequence_point_cloud_path = pcl_path if self.box_crop: - frame_data.crop_by_bbox(bbox_xywh, self.box_crop_context, ) + frame_data.crop_by_bbox(bbox_xywh, self.box_crop_context) return frame_data def _load_crop_fg_probability( self, entry: types.FrameAnnotation - ) -> Tuple[Optional[torch.Tensor],Optional[str],Optional[torch.Tensor]]: + ) -> Tuple[Optional[torch.Tensor], Optional[str], Optional[torch.Tensor]]: fg_probability = None full_path = None bbox_xywh = None @@ -138,7 +135,7 @@ def _load_crop_fg_probability( if (self.load_masks) and entry.mask is not None: full_path = os.path.join(self.dataset_root, entry.mask.path) mask = _load_mask(self._local_path(full_path)) - bbox_xywh = torch.tensor(_get_bbox_from_mask(self.mask, self.box_crop_mask_thr)) + bbox_xywh = torch.tensor(_get_bbox_from_mask(mask, self.box_crop_mask_thr)) if mask.shape[-2:] != entry.image.size: raise ValueError( diff --git a/pytorch3d/implicitron/dataset/utils.py b/pytorch3d/implicitron/dataset/utils.py index b2ac99f36..6e9af933d 100644 --- a/pytorch3d/implicitron/dataset/utils.py +++ b/pytorch3d/implicitron/dataset/utils.py @@ -5,7 +5,10 @@ # LICENSE file in the root directory of this source tree. -from typing import List, Optional +import warnings +from typing import List, Optional, Tuple + +import numpy as np import torch @@ -55,22 +58,22 @@ def is_train_frame( def _get_bbox_from_mask( - mask, thr, decrease_quant: float = 0.05 - ) -> Tuple[int, int, int, int]: - # bbox in xywh - masks_for_box = np.zeros_like(mask) - while masks_for_box.sum() <= 1.0: - masks_for_box = (mask > thr).astype(np.float32) - thr -= decrease_quant - if thr <= 0.0: - warnings.warn( - f"Empty masks_for_bbox (thr={thr}) => using full image.", stacklevel=1 - ) - - x0, x1 = _get_1d_bounds(masks_for_box.sum(axis=-2)) - y0, y1 = _get_1d_bounds(masks_for_box.sum(axis=-1)) - - return x0, y0, x1 - x0, y1 - y0 + mask, thr, decrease_quant: float = 0.05 +) -> Tuple[int, int, int, int]: + # bbox in xywh + masks_for_box = np.zeros_like(mask) + while masks_for_box.sum() <= 1.0: + masks_for_box = (mask > thr).astype(np.float32) + thr -= decrease_quant + if thr <= 0.0: + warnings.warn( + f"Empty masks_for_bbox (thr={thr}) => using full image.", stacklevel=1 + ) + + x0, x1 = _get_1d_bounds(masks_for_box.sum(axis=-2)) + y0, y1 = _get_1d_bounds(masks_for_box.sum(axis=-1)) + + return x0, y0, x1 - x0, y1 - y0 def _crop_around_box(tensor, bbox, impath: str = ""): diff --git a/tests/implicitron/test_blob_loader.py b/tests/implicitron/test_blob_loader.py index fd8d8fd81..d2a612d48 100644 --- a/tests/implicitron/test_blob_loader.py +++ b/tests/implicitron/test_blob_loader.py @@ -69,25 +69,22 @@ def test_load_(self): fg_probability, mask_path, bbox_xywh, - clamp_bbox_xyxy, - crop_bbox_xywh, ) = self.blob_loader._load_crop_fg_probability(self.frame_annotation) assert mask_path assert torch.is_tensor(fg_probability) assert torch.is_tensor(bbox_xywh) - assert torch.is_tensor(clamp_bbox_xyxy) - assert torch.is_tensor(crop_bbox_xywh) # assert bboxes shape self.assertEqual( fg_probability.shape, torch.Size([1, self.image_height, self.image_width]) ) self.assertEqual(bbox_xywh.shape, torch.Size([4])) - self.assertEqual(clamp_bbox_xyxy.shape, torch.Size([4])) - self.assertEqual(crop_bbox_xywh.shape, torch.Size([4])) - (image_rgb, image_path, mask_crop, scale,) = self.blob_loader._load_crop_images( - self.frame_annotation, fg_probability, clamp_bbox_xyxy - ) + ( + image_rgb, + image_path, + mask_crop, + scale, + ) = self.blob_loader._load_crop_images(self.frame_annotation, fg_probability) assert torch.is_tensor(image_rgb) assert image_path assert torch.is_tensor(mask_crop) @@ -102,7 +99,6 @@ def test_load_(self): (depth_map, depth_path, depth_mask,) = self.blob_loader._load_mask_depth( self.frame_annotation, - clamp_bbox_xyxy, fg_probability, ) assert torch.is_tensor(depth_map) @@ -119,7 +115,6 @@ def test_load_(self): camera = self.blob_loader._get_pytorch3d_camera( self.frame_annotation, scale, - clamp_bbox_xyxy, ) self.assertEqual(type(camera), PerspectiveCameras) From 5c249db0a0160cf9c1b4043634a4e0a495cff6e1 Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Tue, 14 Mar 2023 18:25:50 +0000 Subject: [PATCH 37/43] renamed crop to crop_ to show inplace modification --- pytorch3d/implicitron/dataset/blob_loader.py | 4 ++-- pytorch3d/implicitron/dataset/dataset_base.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pytorch3d/implicitron/dataset/blob_loader.py b/pytorch3d/implicitron/dataset/blob_loader.py index fa3a5ac29..19417a639 100644 --- a/pytorch3d/implicitron/dataset/blob_loader.py +++ b/pytorch3d/implicitron/dataset/blob_loader.py @@ -85,7 +85,7 @@ def load_( ( frame_data.fg_probability, frame_data.mask_path, - bbox_xywh, + frame_data.bbox_xywh, ) = self._load_crop_fg_probability(entry) scale = min( @@ -121,7 +121,7 @@ def load_( frame_data.sequence_point_cloud_path = pcl_path if self.box_crop: - frame_data.crop_by_bbox(bbox_xywh, self.box_crop_context) + frame_data.crop_by_bbox_(self.box_crop_context) return frame_data diff --git a/pytorch3d/implicitron/dataset/dataset_base.py b/pytorch3d/implicitron/dataset/dataset_base.py index 322d1889b..7ddc9e122 100644 --- a/pytorch3d/implicitron/dataset/dataset_base.py +++ b/pytorch3d/implicitron/dataset/dataset_base.py @@ -151,10 +151,10 @@ def __getitem__(self, key): def __len__(self): return len(fields(self)) - def crop_by_bbox(self, bbox_xywh, box_crop_context): + def crop_by_bbox_(self, box_crop_context): clamp_bbox_xyxy = _clamp_box_to_image_bounds_and_round( _get_clamp_bbox( - bbox_xywh, + self.bbox_xywh, image_path=self.image.path, box_crop_context=box_crop_context, ), From 530b9a42d1ebfde8afa92dc3bded73f18d6e0a25 Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Tue, 14 Mar 2023 18:33:44 +0000 Subject: [PATCH 38/43] shifting camera according to bbox --- pytorch3d/implicitron/dataset/dataset_base.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pytorch3d/implicitron/dataset/dataset_base.py b/pytorch3d/implicitron/dataset/dataset_base.py index 7ddc9e122..2c1bb7527 100644 --- a/pytorch3d/implicitron/dataset/dataset_base.py +++ b/pytorch3d/implicitron/dataset/dataset_base.py @@ -173,8 +173,7 @@ def crop_by_bbox_(self, box_crop_context): depth_mask_bbox_xyxy = _rescale_bbox(clamp_bbox_xyxy, entry.image.size, self.depth_mask.shape[-2:]) self.depth_mask = _crop_around_box(self.depth_mask, depth_mask_bbox_xyxy, self.mask_path) - - principal_point_px -= clamp_bbox_xyxy[:2] + self.camera.principal_point_px -= clamp_bbox_xyxy[:2] @classmethod def collate(cls, batch): From e5500f329d3016740af50f4dc420b07d1383942a Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Wed, 15 Mar 2023 18:22:18 +0000 Subject: [PATCH 39/43] delegated reize_image to FrameData, made bbox_xywh optinal external parameter for load_, linter, fbcode tests --- pytorch3d/implicitron/dataset/blob_loader.py | 130 ++++++----------- pytorch3d/implicitron/dataset/dataset_base.py | 118 +++++++++++++--- .../implicitron/dataset/json_index_dataset.py | 44 ++++-- pytorch3d/implicitron/dataset/utils.py | 29 ++++ pytorch3d/implicitron/dataset/visualize.py | 1 + tests/implicitron/test_bbox.py | 2 +- tests/implicitron/test_blob_loader.py | 131 +++++++++++++----- 7 files changed, 307 insertions(+), 148 deletions(-) diff --git a/pytorch3d/implicitron/dataset/blob_loader.py b/pytorch3d/implicitron/dataset/blob_loader.py index 19417a639..9ccf53b2f 100644 --- a/pytorch3d/implicitron/dataset/blob_loader.py +++ b/pytorch3d/implicitron/dataset/blob_loader.py @@ -38,23 +38,23 @@ class BlobLoader: load_masks: Enable loading frame foreground masks. load_point_clouds: Enable loading sequence-level point clouds. max_points: Cap on the number of loaded points in the point cloud; - if reached, they are randomly sampled without replacement. + if reached, they are randomly sampled without replacement. mask_images: Whether to mask the images with the loaded foreground masks; - 0 value is used for background. + 0 value is used for background. mask_depths: Whether to mask the depth maps with the loaded foreground masks; 0 value is used for background. image_height: The height of the returned images, masks, and depth maps; - aspect ratio is preserved during cropping/resizing. + aspect ratio is preserved during cropping/resizing. image_width: The width of the returned images, masks, and depth maps; aspect ratio is preserved during cropping/resizing. box_crop: Enable cropping of the image around the bounding box inferred - from the foreground region of the loaded segmentation mask; masks - and depth maps are cropped accordingly; cameras are corrected. + from the foreground region of the loaded segmentation mask; masks + and depth maps are cropped accordingly; cameras are corrected. box_crop_mask_thr: The threshold used to separate pixels into foreground - and background based on the foreground_probability mask; if no value - is greater than this threshold, the loader lowers it and repeats. + and background based on the foreground_probability mask; if no value + is greater than this threshold, the loader lowers it and repeats. box_crop_context: The amount of additional padding added to each - dimension of the cropping bounding box, relative to box size. + dimension of the cropping bounding box, relative to box size. """ dataset_root: str = "" @@ -78,20 +78,18 @@ def load_( frame_data: FrameData, entry: types.FrameAnnotation, seq_annotation: types.SequenceAnnotation, + bbox_xywh: Optional[torch.Tensor] = None, ) -> FrameData: """Main method for loader. FrameData modification done inplace + if bbox_xywh not provided bbox will be calculated from mask """ ( frame_data.fg_probability, frame_data.mask_path, frame_data.bbox_xywh, - ) = self._load_crop_fg_probability(entry) + ) = self._load_fg_probability(entry, bbox_xywh) - scale = min( - self.image_height / entry.image.size[0], - self.image_width / entry.image.size[1], - ) if self.load_images and entry.image is not None: # original image size frame_data.image_size_hw = _safe_as_tensor(entry.image.size, torch.long) @@ -99,9 +97,7 @@ def load_( ( frame_data.image_rgb, frame_data.image_path, - frame_data.mask_crop, - scale, - ) = self._load_crop_images(entry, frame_data.fg_probability) + ) = self._load_images(entry, frame_data.fg_probability) if self.load_depths and entry.depth is not None: ( @@ -110,9 +106,6 @@ def load_( frame_data.depth_mask, ) = self._load_mask_depth(entry, frame_data.fg_probability) - if entry.viewpoint is not None: - frame_data.camera = self._get_pytorch3d_camera(entry, scale) - if self.load_point_clouds and seq_annotation.point_cloud is not None: pcl_path = self._fix_point_cloud_path(seq_annotation.point_cloud.path) frame_data.sequence_point_cloud = _load_pointcloud( @@ -120,42 +113,50 @@ def load_( ) frame_data.sequence_point_cloud_path = pcl_path + clamp_bbox_xyxy = None if self.box_crop: - frame_data.crop_by_bbox_(self.box_crop_context) + clamp_bbox_xyxy = frame_data.crop_by_bbox_(self.box_crop_context) + + scale = 1.0 + + if self.image_height is not None and self.image_width is not None: + scale = frame_data.resize_frame_(self.image_height, self.image_width) + # creating camera taking to account bbox and resize scale + if entry.viewpoint is not None: + frame_data.camera = self._get_pytorch3d_camera( + entry, scale, clamp_bbox_xyxy + ) return frame_data - def _load_crop_fg_probability( - self, entry: types.FrameAnnotation + def _load_fg_probability( + self, + entry: types.FrameAnnotation, + bbox_xywh: Optional[torch.Tensor], ) -> Tuple[Optional[torch.Tensor], Optional[str], Optional[torch.Tensor]]: fg_probability = None full_path = None - bbox_xywh = None if (self.load_masks) and entry.mask is not None: full_path = os.path.join(self.dataset_root, entry.mask.path) - mask = _load_mask(self._local_path(full_path)) - bbox_xywh = torch.tensor(_get_bbox_from_mask(mask, self.box_crop_mask_thr)) - - if mask.shape[-2:] != entry.image.size: + fg_probability = _load_mask(self._local_path(full_path)) + # we can use provided bbox_xywh or calculate it based on mask + if bbox_xywh is None: + bbox_xywh = torch.tensor( + _get_bbox_from_mask(fg_probability, self.box_crop_mask_thr) + ) + if fg_probability.shape[-2:] != entry.image.size: raise ValueError( - f"bad mask size: {mask.shape[-2:]} vs {entry.image.size}!" + f"bad mask size: {fg_probability.shape[-2:]} vs {entry.image.size}!" ) - fg_probability, _, _ = _resize_image( - mask, - image_height=self.image_height, - image_width=self.image_width, - mode="nearest", - ) - - return fg_probability, full_path, bbox_xywh + return torch.tensor(fg_probability), full_path, bbox_xywh - def _load_crop_images( + def _load_images( self, entry: types.FrameAnnotation, fg_probability: Optional[torch.Tensor], - ) -> Tuple[torch.Tensor, str, torch.Tensor, float]: + ) -> Tuple[torch.Tensor, str]: assert self.dataset_root is not None and entry.image is not None path = os.path.join(self.dataset_root, entry.image.path) image_rgb = _load_image(self._local_path(path)) @@ -165,15 +166,11 @@ def _load_crop_images( f"bad image size: {image_rgb.shape[-2:]} vs {entry.image.size}!" ) - image_rgb, scale, mask_crop = _resize_image( - image_rgb, image_height=self.image_height, image_width=self.image_width - ) - if self.mask_images: assert fg_probability is not None image_rgb *= fg_probability - return image_rgb, path, mask_crop, scale + return image_rgb, path def _load_mask_depth( self, @@ -185,13 +182,6 @@ def _load_mask_depth( path = os.path.join(self.dataset_root, entry_depth.path) depth_map = _load_depth(self._local_path(path), entry_depth.scale_adjustment) - depth_map, _, _ = _resize_image( - depth_map, - image_height=self.image_height, - image_width=self.image_width, - mode="nearest", - ) - if self.mask_depths: assert fg_probability is not None depth_map *= fg_probability @@ -200,22 +190,16 @@ def _load_mask_depth( assert entry_depth.mask_path is not None mask_path = os.path.join(self.dataset_root, entry_depth.mask_path) depth_mask = _load_depth_mask(self._local_path(mask_path)) - - depth_mask, _, _ = _resize_image( - depth_mask, - image_height=self.image_height, - image_width=self.image_width, - mode="nearest", - ) else: depth_mask = torch.ones_like(depth_map) - return depth_map, path, depth_mask + return torch.tensor(depth_map), path, torch.tensor(depth_mask) def _get_pytorch3d_camera( self, entry: types.FrameAnnotation, scale: float, + clamp_bbox_xyxy: Optional[torch.Tensor], ) -> PerspectiveCameras: entry_viewpoint = entry.viewpoint assert entry_viewpoint is not None @@ -243,6 +227,10 @@ def _get_pytorch3d_camera( principal_point_px = half_image_size_wh_orig - principal_point * rescale focal_length_px = focal_length * rescale + # changing principal_point according to bbox_crop + if clamp_bbox_xyxy is not None: + principal_point_px -= clamp_bbox_xyxy[:2] + # now, convert from pixels to PyTorch3D v0.5+ NDC convention if self.image_height is None or self.image_width is None: out_size = list(reversed(entry.image.size)) @@ -283,32 +271,6 @@ def _local_path(self, path: str) -> str: return self.path_manager.get_local_path(path) -def _resize_image( - image, image_height, image_width, mode="bilinear" -) -> Tuple[torch.Tensor, float, torch.Tensor]: - if image_height is None or image_width is None: - # skip the resizing - imre_ = torch.from_numpy(image) - return imre_, 1.0, torch.ones_like(imre_[:1]) - # takes numpy array, returns pytorch tensor - minscale = min( - image_height / image.shape[-2], - image_width / image.shape[-1], - ) - imre = torch.nn.functional.interpolate( - torch.from_numpy(image)[None], - scale_factor=minscale, - mode=mode, - align_corners=False if mode == "bilinear" else None, - recompute_scale_factor=True, - )[0] - imre_ = torch.zeros(image.shape[0], image_height, image_width) - imre_[:, 0 : imre.shape[1], 0 : imre.shape[2]] = imre - mask = torch.zeros(1, image_height, image_width) - mask[:, 0 : imre.shape[1], 0 : imre.shape[2]] = 1.0 - return imre_, minscale, mask - - def _load_image(path) -> np.ndarray: with Image.open(path) as pil_im: im = np.array(pil_im.convert("RGB")) diff --git a/pytorch3d/implicitron/dataset/dataset_base.py b/pytorch3d/implicitron/dataset/dataset_base.py index 2c1bb7527..cbc871a1e 100644 --- a/pytorch3d/implicitron/dataset/dataset_base.py +++ b/pytorch3d/implicitron/dataset/dataset_base.py @@ -4,6 +4,7 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. +import warnings from collections import defaultdict from dataclasses import dataclass, field, fields from typing import ( @@ -23,16 +24,17 @@ import numpy as np import torch -from pytorch3d.renderer.camera_utils import join_cameras_as_batch -from pytorch3d.renderer.cameras import CamerasBase, PerspectiveCameras -from pytorch3d.structures.pointclouds import join_pointclouds_as_batch, Pointclouds from pytorch3d.implicitron.dataset.utils import ( - _crop_around_box, - _clamp_box_to_image_bounds_and_round, _bbox_xyxy_to_xywh, + _clamp_box_to_image_bounds_and_round, + _crop_around_box, _get_clamp_bbox, _rescale_bbox, + _resize_image, ) +from pytorch3d.renderer.camera_utils import join_cameras_as_batch +from pytorch3d.renderer.cameras import CamerasBase, PerspectiveCameras +from pytorch3d.structures.pointclouds import join_pointclouds_as_batch, Pointclouds @dataclass @@ -97,6 +99,7 @@ class FrameData(Mapping[str, Any]): frame_type: The type of the loaded frame specified in `subset_lists_file`, if provided. meta: A dict for storing additional frame information. + cropped: Bool to avoid cropping FrameData twice """ frame_number: Optional[torch.LongTensor] @@ -123,6 +126,7 @@ class FrameData(Mapping[str, Any]): sequence_point_cloud_idx: Optional[torch.Tensor] = None frame_type: Union[str, List[str], None] = None # known | unseen meta: dict = field(default_factory=lambda: {}) + cropped: bool = False def to(self, *args, **kwargs): new_params = {} @@ -151,29 +155,105 @@ def __getitem__(self, key): def __len__(self): return len(fields(self)) - def crop_by_bbox_(self, box_crop_context): + def crop_by_bbox_(self, box_crop_context) -> Optional[torch.Tensor]: + if self.cropped: + warnings.warn( + f"You called cropping on same frame twice " + f"sequence_name: {self.sequence_name}, skipping cropping" + ) + return None + + if ( + self.bbox_xywh is None + or self.fg_probability is None + or self.mask_path is None + or self.image_path is None + ): + warnings.warn( + "You called cropping without loading frame data" + "please call blob_loader.load_ first, skipping cropping" + ) + return None + + bbox_xyxy = _get_clamp_bbox( + self.bbox_xywh, + # pyre-ignore + image_path=self.image_path, + box_crop_context=box_crop_context, + ) clamp_bbox_xyxy = _clamp_box_to_image_bounds_and_round( - _get_clamp_bbox( - self.bbox_xywh, - image_path=self.image.path, - box_crop_context=box_crop_context, - ), - image_size_hw=tuple(self.fg_probability.shape[-2:]), + bbox_xyxy, + # pyre-ignore + image_size_hw=tuple(self.image_size_hw), ) self.crop_bbox_xywh = _bbox_xyxy_to_xywh(clamp_bbox_xyxy) self.fg_probability = _crop_around_box( - self.fg_probability, clamp_bbox_xyxy, self.mask_path + self.fg_probability, + clamp_bbox_xyxy, + # pyre-ignore + self.mask_path, + ) + self.image_rgb = _crop_around_box( + self.image_rgb, + clamp_bbox_xyxy, + # pyre-ignore + self.image_path, ) - self.image_rgb = _crop_around_box(self.image_rgb, clamp_bbox_xyxy, self.image.path) - depth_bbox_xyxy = _rescale_bbox(clamp_bbox_xyxy, entry.image.size, self.depth_map.shape[-2:]) - self.depth_map = _crop_around_box(self.depth_map, depth_bbox_xyxy, self.depth_path) + if self.depth_map is not None: + self.depth_map = _crop_around_box( + self.depth_map, + clamp_bbox_xyxy, + # pyre-ignore + self.depth_path, + ) + if self.depth_mask is not None: + self.depth_mask = _crop_around_box( + self.depth_mask, + clamp_bbox_xyxy, + # pyre-ignore + self.mask_path, + ) + self.cropped = True + return clamp_bbox_xyxy + + def resize_frame_(self, image_height, image_width) -> float: + if self.bbox_xywh is not None: + self.bbox_xywh = _rescale_bbox( + self.bbox_xywh, + np.array(self.image_size_hw), + # pyre-ignore + self.image_rgb.shape[-2:], + ) + + self.image_rgb, scale, self.mask_crop = _resize_image( + self.image_rgb, image_height=image_height, image_width=image_width + ) - depth_mask_bbox_xyxy = _rescale_bbox(clamp_bbox_xyxy, entry.image.size, self.depth_mask.shape[-2:]) - self.depth_mask = _crop_around_box(self.depth_mask, depth_mask_bbox_xyxy, self.mask_path) + self.fg_probability, _, _ = _resize_image( + self.fg_probability, + image_height=image_height, + image_width=image_width, + mode="nearest", + ) - self.camera.principal_point_px -= clamp_bbox_xyxy[:2] + if self.depth_map is not None: + self.depth_map, _, _ = _resize_image( + self.depth_map, + image_height=image_height, + image_width=image_width, + mode="nearest", + ) + + if self.depth_mask is not None: + self.depth_mask, _, _ = _resize_image( + self.depth_mask, + image_height=image_height, + image_width=image_width, + mode="nearest", + ) + return scale @classmethod def collate(cls, batch): diff --git a/pytorch3d/implicitron/dataset/json_index_dataset.py b/pytorch3d/implicitron/dataset/json_index_dataset.py index 636630680..5f9b2685a 100644 --- a/pytorch3d/implicitron/dataset/json_index_dataset.py +++ b/pytorch3d/implicitron/dataset/json_index_dataset.py @@ -14,7 +14,6 @@ import random import warnings from collections import defaultdict -from dataclasses import field from itertools import islice from typing import ( Any, @@ -161,12 +160,12 @@ class JsonIndexDataset(DatasetBase, ReplaceableBase): sort_frames: bool = False eval_batches: Any = None eval_batch_index: Any = None - subset_to_image_path: Any = None # initialised in __post_init__ - blob_loader: BlobLoader = field(init=False) - frame_annots: List[FrameAnnotsEntry] = field(init=False) - seq_annots: Dict[str, types.SequenceAnnotation] = field(init=False) - _seq_to_idx: Dict[str, List[int]] = field(init=False) + # commented because of OmegaConf (for tests to pass) + # blob_loader: BlobLoader = field(init=False) + # frame_annots: List[FrameAnnotsEntry] = field(init=False) + # seq_annots: Dict[str, types.SequenceAnnotation] = field(init=False) + # _seq_to_idx: Dict[str, List[int]] = field(init=False) def __post_init__(self) -> None: self._load_frames() @@ -177,6 +176,7 @@ def __post_init__(self) -> None: self._filter_db() # also computes sequence indices self._extract_and_set_eval_batches() + # pyre-ignore self.blob_loader = BlobLoader( dataset_root=self.dataset_root, load_images=self.load_images, @@ -219,7 +219,9 @@ def join(self, other_datasets: Iterable["JsonIndexDataset"]) -> None: """ if not all(isinstance(d, JsonIndexDataset) for d in other_datasets): raise ValueError("This function can only join a list of JsonIndexDataset") + # pyre-ignore self.frame_annots.extend([fa for d in other_datasets for fa in d.frame_annots]) + # pyre-ignore self.seq_annots.update( # https://gist.github.com/treyhunner/f35292e676efa0be1728 functools.reduce( @@ -295,9 +297,11 @@ def seq_frame_index_to_dataset_index( """ _dataset_seq_frame_n_index = { seq: { + # pyre-ignore self.frame_annots[idx]["frame_annotation"].frame_number: idx for idx in seq_idx } + # pyre-ignore for seq, seq_idx in self._seq_to_idx.items() } @@ -320,6 +324,7 @@ def _get_dataset_idx( # Check that the loaded frame path is consistent # with the one stored in self.frame_annots. assert os.path.normpath( + # pyre-ignore self.frame_annots[idx]["frame_annotation"].image.path ) == os.path.normpath( path @@ -369,6 +374,7 @@ def subset_from_frame_index( # Deep copy the whole dataset except frame_annots, which are large so we # deep copy only the requested subset of frame_annots. + # pyre-ignore memo = {id(self.frame_annots): None} dataset_new = copy.deepcopy(self, memo) dataset_new.frame_annots = copy.deepcopy( @@ -397,9 +403,11 @@ def subset_from_frame_index( return dataset_new def __str__(self) -> str: + # pyre-ignore return f"JsonIndexDataset #frames={len(self.frame_annots)}" def __len__(self) -> int: + # pyre-ignore return len(self.frame_annots) def _get_frame_type(self, entry: FrameAnnotsEntry) -> Optional[str]: @@ -411,6 +419,7 @@ def get_all_train_cameras(self) -> CamerasBase: """ logger.info("Loading all train cameras.") cameras = [] + # pyre-ignore for frame_idx, frame_annot in enumerate(tqdm(self.frame_annots)): frame_type = self._get_frame_type(frame_annot) if frame_type is None: @@ -420,10 +429,12 @@ def get_all_train_cameras(self) -> CamerasBase: return join_cameras_as_batch(cameras) def __getitem__(self, index) -> FrameData: + # pyre-ignore if index >= len(self.frame_annots): raise IndexError(f"index {index} out of range {len(self.frame_annots)}") entry = self.frame_annots[index]["frame_annotation"] + # pyre-ignore point_cloud = self.seq_annots[entry.sequence_name].point_cloud frame_data = FrameData( frame_number=_safe_as_tensor(entry.frame_number, torch.long), @@ -443,9 +454,8 @@ def __getitem__(self, index) -> FrameData: # Optional field frame_data.frame_type = self._get_frame_type(self.frame_annots[index]) - self.blob_loader.load_( - frame_data, entry, self.seq_annots[entry.sequence_name] - ) + # pyre-ignore + self.blob_loader.load_(frame_data, entry, self.seq_annots[entry.sequence_name]) return frame_data def _load_frames(self) -> None: @@ -457,6 +467,7 @@ def _load_frames(self) -> None: ) if not frame_annots_list: raise ValueError("Empty dataset!") + # pyre-ignore self.frame_annots = [ FrameAnnotsEntry(frame_annotation=a, subset=None) for a in frame_annots_list ] @@ -468,6 +479,7 @@ def _load_sequences(self) -> None: seq_annots = types.load_dataclass(zipfile, List[types.SequenceAnnotation]) if not seq_annots: raise ValueError("Empty sequences file!") + # pyre-ignore self.seq_annots = {entry.sequence_name: entry for entry in seq_annots} def _load_subset_lists(self) -> None: @@ -483,6 +495,7 @@ def _load_subset_lists(self) -> None: for subset, frames in subset_to_seq_frame.items() for _, _, path in frames } + # pyre-ignore for frame in self.frame_annots: frame["subset"] = frame_path_to_subset.get( frame["frame_annotation"].image.path, None @@ -495,6 +508,7 @@ def _load_subset_lists(self) -> None: def _sort_frames(self) -> None: # Sort frames to have them grouped by sequence, ordered by timestamp + # pyre-ignore self.frame_annots = sorted( self.frame_annots, key=lambda f: ( @@ -506,6 +520,7 @@ def _sort_frames(self) -> None: def _filter_db(self) -> None: if self.remove_empty_masks: logger.info("Removing images with empty masks.") + # pyre-ignore old_len = len(self.frame_annots) msg = "remove_empty_masks needs every MaskAnnotation.mass to be set." @@ -546,6 +561,7 @@ def positive_mass(frame_annot: types.FrameAnnotation) -> bool: if len(self.limit_category_to) > 0: logger.info(f"Limiting dataset to categories: {self.limit_category_to}") + # pyre-ignore self.seq_annots = { name: entry for name, entry in self.seq_annots.items() @@ -583,6 +599,7 @@ def positive_mass(frame_annot: types.FrameAnnotation) -> bool: if self.n_frames_per_sequence > 0: logger.info(f"Taking max {self.n_frames_per_sequence} per sequence.") keep_idx = [] + # pyre-ignore for seq, seq_indices in self._seq_to_idx.items(): # infer the seed from the sequence name, this is reproducible # and makes the selection differ for different sequences @@ -612,14 +629,19 @@ def _invalidate_indexes(self, filter_seq_annots: bool = False) -> None: self._invalidate_seq_to_idx() if filter_seq_annots: + # pyre-ignore self.seq_annots = { - k: v for k, v in self.seq_annots.items() if k in self._seq_to_idx + k: v + for k, v in self.seq_annots.items() + if k in self._seq_to_idx # pyre-ignore } def _invalidate_seq_to_idx(self) -> None: seq_to_idx = defaultdict(list) + # pyre-ignore for idx, entry in enumerate(self.frame_annots): seq_to_idx[entry["frame_annotation"].sequence_name].append(idx) + # pyre-ignore self._seq_to_idx = seq_to_idx def _local_path(self, path: str) -> str: @@ -634,6 +656,7 @@ def get_frame_numbers_and_timestamps( for idx in idxs: if ( subset_filter is not None + # pyre-ignore and self.frame_annots[idx]["subset"] not in subset_filter ): continue @@ -646,6 +669,7 @@ def get_frame_numbers_and_timestamps( def category_to_sequence_names(self) -> Dict[str, List[str]]: c2seq = defaultdict(list) + # pyre-ignore for sequence_name, sa in self.seq_annots.items(): c2seq[sa.category].append(sequence_name) return dict(c2seq) diff --git a/pytorch3d/implicitron/dataset/utils.py b/pytorch3d/implicitron/dataset/utils.py index 6e9af933d..aca0507dd 100644 --- a/pytorch3d/implicitron/dataset/utils.py +++ b/pytorch3d/implicitron/dataset/utils.py @@ -156,3 +156,32 @@ def _bbox_xywh_to_xyxy( def _get_1d_bounds(arr) -> Tuple[int, int]: nz = np.flatnonzero(arr) return nz[0], nz[-1] + 1 + + +def _resize_image( + image, image_height, image_width, mode="bilinear" +) -> Tuple[torch.Tensor, float, torch.Tensor]: + + if type(image) == np.ndarray: + image = torch.from_numpy(image) + + if image_height is None or image_width is None: + # skip the resizing + return image, 1.0, torch.ones_like(image[:1]) + # takes numpy array or tensor, returns pytorch tensor + minscale = min( + image_height / image.shape[-2], + image_width / image.shape[-1], + ) + imre = torch.nn.functional.interpolate( + image[None], + scale_factor=minscale, + mode=mode, + align_corners=False if mode == "bilinear" else None, + recompute_scale_factor=True, + )[0] + imre_ = torch.zeros(image.shape[0], image_height, image_width) + imre_[:, 0 : imre.shape[1], 0 : imre.shape[2]] = imre + mask = torch.zeros(1, image_height, image_width) + mask[:, 0 : imre.shape[1], 0 : imre.shape[2]] = 1.0 + return imre_, minscale, mask diff --git a/pytorch3d/implicitron/dataset/visualize.py b/pytorch3d/implicitron/dataset/visualize.py index 284e903a0..6d0be0362 100644 --- a/pytorch3d/implicitron/dataset/visualize.py +++ b/pytorch3d/implicitron/dataset/visualize.py @@ -44,6 +44,7 @@ def get_implicitron_sequence_pointcloud( sequence_entries = [ ei for ei in sequence_entries + # pyre-ignore[16] if dataset.frame_annots[ei]["frame_annotation"].sequence_name == sequence_name ] diff --git a/tests/implicitron/test_bbox.py b/tests/implicitron/test_bbox.py index 16199ad1e..3c45ee793 100644 --- a/tests/implicitron/test_bbox.py +++ b/tests/implicitron/test_bbox.py @@ -9,7 +9,6 @@ import numpy as np import torch -from pytorch3d.implicitron.dataset.blob_loader import _resize_image from pytorch3d.implicitron.dataset.utils import ( _bbox_xywh_to_xyxy, @@ -20,6 +19,7 @@ _get_bbox_from_mask, _get_clamp_bbox, _rescale_bbox, + _resize_image, ) from tests.common_testing import TestCaseMixin diff --git a/tests/implicitron/test_blob_loader.py b/tests/implicitron/test_blob_loader.py index d2a612d48..ef18d6258 100644 --- a/tests/implicitron/test_blob_loader.py +++ b/tests/implicitron/test_blob_loader.py @@ -1,3 +1,9 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + import contextlib import gzip import os @@ -15,8 +21,10 @@ _load_depth_mask, _load_image, _load_mask, + _safe_as_tensor, BlobLoader, ) +from pytorch3d.implicitron.dataset.dataset_base import FrameData from pytorch3d.implicitron.tools.config import get_default_args from pytorch3d.renderer.cameras import PerspectiveCameras @@ -53,6 +61,37 @@ def setUp(self): ) self.frame_annotation = frame_annots_list[0] + sequence_annotations_file = os.path.join( + self.dataset_root, category, "sequence_annotations.jgz" + ) + local_file = self.path_manager.get_local_path(sequence_annotations_file) + with gzip.open(local_file, "rt", encoding="utf8") as zipfile: + seq_annots_list = types.load_dataclass( + zipfile, List[types.SequenceAnnotation] + ) + seq_annots = {entry.sequence_name: entry for entry in seq_annots_list} + self.seq_annotation = seq_annots[self.frame_annotation.sequence_name] + + point_cloud = self.seq_annotation.point_cloud + self.frame_data = FrameData( + frame_number=_safe_as_tensor( + self.frame_annotation.frame_number, torch.long + ), + frame_timestamp=_safe_as_tensor( + self.frame_annotation.frame_timestamp, torch.float + ), + sequence_name=self.frame_annotation.sequence_name, + sequence_category=self.seq_annotation.category, + camera_quality_score=_safe_as_tensor( + self.seq_annotation.viewpoint_quality_score, torch.float + ), + point_cloud_quality_score=_safe_as_tensor( + point_cloud.quality_score, torch.float + ) + if point_cloud is not None + else None, + ) + def test_BlobLoader_args(self): # test that BlobLoader works with get_default_args get_default_args(BlobLoader) @@ -65,58 +104,82 @@ def test_fix_point_cloud_path(self): assert self.blob_loader.dataset_root in modified_path def test_load_(self): + bbox_xywh = None + self.frame_data.image_size_hw = _safe_as_tensor( + self.frame_annotation.image.size, torch.long + ) ( - fg_probability, - mask_path, - bbox_xywh, - ) = self.blob_loader._load_crop_fg_probability(self.frame_annotation) - - assert mask_path - assert torch.is_tensor(fg_probability) - assert torch.is_tensor(bbox_xywh) + self.frame_data.fg_probability, + self.frame_data.mask_path, + self.frame_data.bbox_xywh, + ) = self.blob_loader._load_fg_probability(self.frame_annotation, bbox_xywh) + + assert self.frame_data.mask_path + assert torch.is_tensor(self.frame_data.fg_probability) + assert torch.is_tensor(self.frame_data.bbox_xywh) # assert bboxes shape - self.assertEqual( - fg_probability.shape, torch.Size([1, self.image_height, self.image_width]) + self.assertEqual(self.frame_data.bbox_xywh.shape, torch.Size([4])) + ( + self.frame_data.image_rgb, + self.frame_data.image_path, + ) = self.blob_loader._load_images( + self.frame_annotation, self.frame_data.fg_probability ) - self.assertEqual(bbox_xywh.shape, torch.Size([4])) + self.assertEqual(type(self.frame_data.image_rgb), np.ndarray) + assert self.frame_data.image_path + ( - image_rgb, - image_path, - mask_crop, - scale, - ) = self.blob_loader._load_crop_images(self.frame_annotation, fg_probability) - assert torch.is_tensor(image_rgb) - assert image_path - assert torch.is_tensor(mask_crop) + self.frame_data.depth_map, + depth_path, + self.frame_data.depth_mask, + ) = self.blob_loader._load_mask_depth( + self.frame_annotation, + self.frame_data.fg_probability, + ) + assert torch.is_tensor(self.frame_data.depth_map) + assert depth_path + assert torch.is_tensor(self.frame_data.depth_mask) + + clamp_bbox_xyxy = None + if self.blob_loader.box_crop: + clamp_bbox_xyxy = self.frame_data.crop_by_bbox_( + self.blob_loader.box_crop_context + ) + + # assert image and mask shapes after resize + scale = self.frame_data.resize_frame_(self.image_height, self.image_width) assert scale - # assert image and mask shapes self.assertEqual( - image_rgb.shape, torch.Size([3, self.image_height, self.image_width]) + self.frame_data.mask_crop.shape, + torch.Size([1, self.image_height, self.image_width]), ) self.assertEqual( - mask_crop.shape, torch.Size([1, self.image_height, self.image_width]) + self.frame_data.image_rgb.shape, + torch.Size([3, self.image_height, self.image_width]), ) - - (depth_map, depth_path, depth_mask,) = self.blob_loader._load_mask_depth( - self.frame_annotation, - fg_probability, + self.assertEqual( + self.frame_data.mask_crop.shape, + torch.Size([1, self.image_height, self.image_width]), + ) + self.assertEqual( + self.frame_data.fg_probability.shape, + torch.Size([1, self.image_height, self.image_width]), ) - assert torch.is_tensor(depth_map) - assert depth_path - assert torch.is_tensor(depth_mask) - # assert image and mask shapes self.assertEqual( - depth_map.shape, torch.Size([1, self.image_height, self.image_width]) + self.frame_data.depth_map.shape, + torch.Size([1, self.image_height, self.image_width]), ) self.assertEqual( - depth_mask.shape, torch.Size([1, self.image_height, self.image_width]) + self.frame_data.depth_mask.shape, + torch.Size([1, self.image_height, self.image_width]), ) - camera = self.blob_loader._get_pytorch3d_camera( + self.frame_data.camera = self.blob_loader._get_pytorch3d_camera( self.frame_annotation, scale, + clamp_bbox_xyxy, ) - self.assertEqual(type(camera), PerspectiveCameras) + self.assertEqual(type(self.frame_data.camera), PerspectiveCameras) def test_load_image(self): path = os.path.join(self.dataset_root, self.frame_annotation.image.path) From 0fc3253d029ccf1551f0439b7c787fbb4d76f8bd Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Wed, 15 Mar 2023 18:52:32 +0000 Subject: [PATCH 40/43] using safe_as_tensor for fg_probability --- pytorch3d/implicitron/dataset/blob_loader.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/pytorch3d/implicitron/dataset/blob_loader.py b/pytorch3d/implicitron/dataset/blob_loader.py index 9ccf53b2f..13eecdf79 100644 --- a/pytorch3d/implicitron/dataset/blob_loader.py +++ b/pytorch3d/implicitron/dataset/blob_loader.py @@ -142,15 +142,17 @@ def _load_fg_probability( fg_probability = _load_mask(self._local_path(full_path)) # we can use provided bbox_xywh or calculate it based on mask if bbox_xywh is None: - bbox_xywh = torch.tensor( - _get_bbox_from_mask(fg_probability, self.box_crop_mask_thr) - ) + bbox_xywh = _get_bbox_from_mask(fg_probability, self.box_crop_mask_thr) if fg_probability.shape[-2:] != entry.image.size: raise ValueError( f"bad mask size: {fg_probability.shape[-2:]} vs {entry.image.size}!" ) - return torch.tensor(fg_probability), full_path, bbox_xywh + return ( + _safe_as_tensor(fg_probability, torch.float), + full_path, + _safe_as_tensor(bbox_xywh, torch.long), + ) def _load_images( self, From 7c8d89daa2b3908a72f847f73edee704780d3f63 Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Wed, 15 Mar 2023 18:58:34 +0000 Subject: [PATCH 41/43] made resizing only for loaded objects --- pytorch3d/implicitron/dataset/dataset_base.py | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/pytorch3d/implicitron/dataset/dataset_base.py b/pytorch3d/implicitron/dataset/dataset_base.py index cbc871a1e..d567fb0b3 100644 --- a/pytorch3d/implicitron/dataset/dataset_base.py +++ b/pytorch3d/implicitron/dataset/dataset_base.py @@ -226,17 +226,18 @@ def resize_frame_(self, image_height, image_width) -> float: # pyre-ignore self.image_rgb.shape[-2:], ) + if self.image_rgb is not None: + self.image_rgb, scale, self.mask_crop = _resize_image( + self.image_rgb, image_height=image_height, image_width=image_width + ) - self.image_rgb, scale, self.mask_crop = _resize_image( - self.image_rgb, image_height=image_height, image_width=image_width - ) - - self.fg_probability, _, _ = _resize_image( - self.fg_probability, - image_height=image_height, - image_width=image_width, - mode="nearest", - ) + if self.fg_probability is not None: + self.fg_probability, _, _ = _resize_image( + self.fg_probability, + image_height=image_height, + image_width=image_width, + mode="nearest", + ) if self.depth_map is not None: self.depth_map, _, _ = _resize_image( From 3027cd7e5f2b615fead37f3338cfe587e84cc9db Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Wed, 15 Mar 2023 19:05:26 +0000 Subject: [PATCH 42/43] fixing scale --- pytorch3d/implicitron/dataset/dataset_base.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pytorch3d/implicitron/dataset/dataset_base.py b/pytorch3d/implicitron/dataset/dataset_base.py index d567fb0b3..1684251fb 100644 --- a/pytorch3d/implicitron/dataset/dataset_base.py +++ b/pytorch3d/implicitron/dataset/dataset_base.py @@ -226,6 +226,8 @@ def resize_frame_(self, image_height, image_width) -> float: # pyre-ignore self.image_rgb.shape[-2:], ) + + scale = 1.0 if self.image_rgb is not None: self.image_rgb, scale, self.mask_crop = _resize_image( self.image_rgb, image_height=image_height, image_width=image_width @@ -237,6 +239,7 @@ def resize_frame_(self, image_height, image_width) -> float: image_height=image_height, image_width=image_width, mode="nearest", + ) if self.depth_map is not None: From 7d570c179d94b28c00e3c0c749da6d7150e8d7e7 Mon Sep 17 00:00:00 2001 From: Ildar Salakhiev Date: Wed, 15 Mar 2023 19:21:46 +0000 Subject: [PATCH 43/43] fixing scale again.. --- pytorch3d/implicitron/dataset/blob_loader.py | 15 +++++++++++++-- pytorch3d/implicitron/dataset/dataset_base.py | 5 ++--- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/pytorch3d/implicitron/dataset/blob_loader.py b/pytorch3d/implicitron/dataset/blob_loader.py index 13eecdf79..83f39c78e 100644 --- a/pytorch3d/implicitron/dataset/blob_loader.py +++ b/pytorch3d/implicitron/dataset/blob_loader.py @@ -117,10 +117,21 @@ def load_( if self.box_crop: clamp_bbox_xyxy = frame_data.crop_by_bbox_(self.box_crop_context) - scale = 1.0 + scale = ( + min( + self.image_height / entry.image.size[0], + # pyre-ignore + self.image_width / entry.image.size[1], + ) + if self.image_height is not None and self.image_width is not None + else 1.0 + ) if self.image_height is not None and self.image_width is not None: - scale = frame_data.resize_frame_(self.image_height, self.image_width) + optional_scale = frame_data.resize_frame_( + self.image_height, self.image_width + ) + scale = optional_scale or scale # creating camera taking to account bbox and resize scale if entry.viewpoint is not None: diff --git a/pytorch3d/implicitron/dataset/dataset_base.py b/pytorch3d/implicitron/dataset/dataset_base.py index 1684251fb..7c4268fb9 100644 --- a/pytorch3d/implicitron/dataset/dataset_base.py +++ b/pytorch3d/implicitron/dataset/dataset_base.py @@ -218,7 +218,7 @@ def crop_by_bbox_(self, box_crop_context) -> Optional[torch.Tensor]: self.cropped = True return clamp_bbox_xyxy - def resize_frame_(self, image_height, image_width) -> float: + def resize_frame_(self, image_height, image_width) -> Optional[float]: if self.bbox_xywh is not None: self.bbox_xywh = _rescale_bbox( self.bbox_xywh, @@ -227,7 +227,7 @@ def resize_frame_(self, image_height, image_width) -> float: self.image_rgb.shape[-2:], ) - scale = 1.0 + scale = None if self.image_rgb is not None: self.image_rgb, scale, self.mask_crop = _resize_image( self.image_rgb, image_height=image_height, image_width=image_width @@ -239,7 +239,6 @@ def resize_frame_(self, image_height, image_width) -> float: image_height=image_height, image_width=image_width, mode="nearest", - ) if self.depth_map is not None: