Extract BlobLoader class from JsonIndexDataset and moving crop_by_bbox to FrameData

Summary: extracted blob loader added documentation for blob_loader did some refactoring on fields for detailed steps and discussions see: https://github.com/facebookresearch/pytorch3d/pull/1463 https://github.com/fairinternal/pixar_replay/pull/160 Reviewed By: bottler Differential Revision: D44061728 fbshipit-source-id: eefb21e9679003045d73729f96e6a93a1d4d2d51
2026-02-06 05:52:17 +08:00 · 2023-04-04 07:17:43 -07:00
parent c759fc560f
commit ebdbfde0ce
15 changed files with 1421 additions and 694 deletions
--- a/tests/implicitron/test_batch_sampler.py
+++ b/tests/implicitron/test_batch_sampler.py
@@ -17,7 +17,8 @@ from pytorch3d.implicitron.dataset.data_loader_map_provider import (
    DoublePoolBatchSampler,
 )

-from pytorch3d.implicitron.dataset.dataset_base import DatasetBase, FrameData
+from pytorch3d.implicitron.dataset.dataset_base import DatasetBase
+from pytorch3d.implicitron.dataset.frame_data import FrameData
 from pytorch3d.implicitron.dataset.scene_batch_sampler import SceneBatchSampler


--- a/tests/implicitron/test_bbox.py
+++ b/tests/implicitron/test_bbox.py
@@ -9,11 +9,19 @@ import unittest
 import numpy as np

 import torch
-from pytorch3d.implicitron.dataset.json_index_dataset import (
-    _bbox_xywh_to_xyxy,
-    _bbox_xyxy_to_xywh,
-    _get_bbox_from_mask,
+
+from pytorch3d.implicitron.dataset.utils import (
+    bbox_xywh_to_xyxy,
+    bbox_xyxy_to_xywh,
+    clamp_box_to_image_bounds_and_round,
+    crop_around_box,
+    get_1d_bounds,
+    get_bbox_from_mask,
+    get_clamp_bbox,
+    rescale_bbox,
+    resize_image,
 )
+
 from tests.common_testing import TestCaseMixin


@@ -31,9 +39,9 @@ class TestBBox(TestCaseMixin, unittest.TestCase):
            ]
        )
        for bbox_xywh in bbox_xywh_list:
-            bbox_xyxy = _bbox_xywh_to_xyxy(bbox_xywh)
-            bbox_xywh_ = _bbox_xyxy_to_xywh(bbox_xyxy)
-            bbox_xyxy_ = _bbox_xywh_to_xyxy(bbox_xywh_)
+            bbox_xyxy = bbox_xywh_to_xyxy(bbox_xywh)
+            bbox_xywh_ = bbox_xyxy_to_xywh(bbox_xyxy)
+            bbox_xyxy_ = bbox_xywh_to_xyxy(bbox_xywh_)
            self.assertClose(bbox_xywh_, bbox_xywh)
            self.assertClose(bbox_xyxy, bbox_xyxy_)

@@ -47,8 +55,8 @@ class TestBBox(TestCaseMixin, unittest.TestCase):
            ]
        )
        for bbox_xywh, bbox_xyxy_expected in bbox_xywh_to_xyxy_expected:
-            self.assertClose(_bbox_xywh_to_xyxy(bbox_xywh), bbox_xyxy_expected)
-            self.assertClose(_bbox_xyxy_to_xywh(bbox_xyxy_expected), bbox_xywh)
+            self.assertClose(bbox_xywh_to_xyxy(bbox_xywh), bbox_xyxy_expected)
+            self.assertClose(bbox_xyxy_to_xywh(bbox_xyxy_expected), bbox_xywh)

        clamp_amnt = 3
        bbox_xywh_to_xyxy_clamped_expected = torch.LongTensor(
@@ -61,7 +69,7 @@ class TestBBox(TestCaseMixin, unittest.TestCase):
        )
        for bbox_xywh, bbox_xyxy_expected in bbox_xywh_to_xyxy_clamped_expected:
            self.assertClose(
-                _bbox_xywh_to_xyxy(bbox_xywh, clamp_size=clamp_amnt),
+                bbox_xywh_to_xyxy(bbox_xywh, clamp_size=clamp_amnt),
                bbox_xyxy_expected,
            )

@@ -74,5 +82,61 @@ class TestBBox(TestCaseMixin, unittest.TestCase):
            ]
        ).astype(np.float32)
        expected_bbox_xywh = [2, 1, 2, 1]
-        bbox_xywh = _get_bbox_from_mask(mask, 0.5)
+        bbox_xywh = get_bbox_from_mask(mask, 0.5)
        self.assertClose(bbox_xywh, expected_bbox_xywh)
+
+    def test_crop_around_box(self):
+        bbox = torch.LongTensor([0, 1, 2, 3])  # (x_min, y_min, x_max, y_max)
+        image = torch.LongTensor(
+            [
+                [0, 0, 10, 20],
+                [10, 20, 5, 1],
+                [10, 20, 1, 1],
+                [5, 4, 0, 1],
+            ]
+        )
+        cropped = crop_around_box(image, bbox)
+        self.assertClose(cropped, image[1:3, 0:2])
+
+    def test_clamp_box_to_image_bounds_and_round(self):
+        bbox = torch.LongTensor([0, 1, 10, 12])
+        image_size = (5, 6)
+        expected_clamped_bbox = torch.LongTensor([0, 1, image_size[1], image_size[0]])
+        clamped_bbox = clamp_box_to_image_bounds_and_round(bbox, image_size)
+        self.assertClose(clamped_bbox, expected_clamped_bbox)
+
+    def test_get_clamp_bbox(self):
+        bbox_xywh = torch.LongTensor([1, 1, 4, 5])
+        clamped_bbox_xyxy = get_clamp_bbox(bbox_xywh, box_crop_context=2)
+        # size multiplied by 2 and added coordinates
+        self.assertClose(clamped_bbox_xyxy, torch.Tensor([-3, -4, 9, 11]))
+
+    def test_rescale_bbox(self):
+        bbox = torch.Tensor([0.0, 1.0, 3.0, 4.0])
+        original_resolution = (4, 4)
+        new_resolution = (8, 8)  # twice bigger
+        rescaled_bbox = rescale_bbox(bbox, original_resolution, new_resolution)
+        self.assertClose(bbox * 2, rescaled_bbox)
+
+    def test_get_1d_bounds(self):
+        array = [0, 1, 2]
+        bounds = get_1d_bounds(array)
+        # make nonzero 1d bounds of image
+        self.assertClose(bounds, [1, 3])
+
+    def test_resize_image(self):
+        image = np.random.rand(3, 300, 500)  # rgb image 300x500
+        expected_shape = (150, 250)
+
+        resized_image, scale, mask_crop = resize_image(
+            image, image_height=expected_shape[0], image_width=expected_shape[1]
+        )
+
+        original_shape = image.shape[-2:]
+        expected_scale = min(
+            expected_shape[0] / original_shape[0], expected_shape[1] / original_shape[1]
+        )
+
+        self.assertEqual(scale, expected_scale)
+        self.assertEqual(resized_image.shape[-2:], expected_shape)
+        self.assertEqual(mask_crop.shape[-2:], expected_shape)
--- a/tests/implicitron/test_data_cow.py
+++ b/tests/implicitron/test_data_cow.py
@@ -8,7 +8,7 @@ import os
 import unittest

 import torch
-from pytorch3d.implicitron.dataset.dataset_base import FrameData
+from pytorch3d.implicitron.dataset.frame_data import FrameData
 from pytorch3d.implicitron.dataset.rendered_mesh_dataset_map_provider import (
    RenderedMeshDatasetMapProvider,
 )
--- a/tests/implicitron/test_evaluation.py
+++ b/tests/implicitron/test_evaluation.py
@@ -13,8 +13,10 @@ import os
 import unittest

 import lpips
+import numpy as np
 import torch
-from pytorch3d.implicitron.dataset.dataset_base import FrameData
+
+from pytorch3d.implicitron.dataset.frame_data import FrameData
 from pytorch3d.implicitron.dataset.json_index_dataset import JsonIndexDataset
 from pytorch3d.implicitron.evaluation.evaluate_new_view_synthesis import eval_batch
 from pytorch3d.implicitron.models.base_model import ImplicitronModelBase
@@ -268,7 +270,7 @@ class TestEvaluation(unittest.TestCase):
        for metric in lower_better:
            m_better = eval_result[metric]
            m_worse = eval_result_bad[metric]
-            if m_better != m_better or m_worse != m_worse:
+            if np.isnan(m_better) or np.isnan(m_worse):
                continue  # metric is missing, i.e. NaN
            _assert = (
                self.assertLessEqual
--- a/tests/implicitron/test_frame_data_builder.py
+++ b/tests/implicitron/test_frame_data_builder.py
@@ -0,0 +1,224 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import contextlib
+import gzip
+import os
+import unittest
+from typing import List
+
+import numpy as np
+import torch
+
+from pytorch3d.implicitron.dataset import types
+from pytorch3d.implicitron.dataset.dataset_base import FrameData
+from pytorch3d.implicitron.dataset.frame_data import FrameDataBuilder
+from pytorch3d.implicitron.dataset.utils import (
+    load_16big_png_depth,
+    load_1bit_png_mask,
+    load_depth,
+    load_depth_mask,
+    load_image,
+    load_mask,
+    safe_as_tensor,
+)
+from pytorch3d.implicitron.tools.config import get_default_args
+from pytorch3d.renderer.cameras import PerspectiveCameras
+
+from tests.common_testing import TestCaseMixin
+from tests.implicitron.common_resources import get_skateboard_data
+
+
+class TestFrameDataBuilder(TestCaseMixin, unittest.TestCase):
+    def setUp(self):
+        torch.manual_seed(42)
+
+        category = "skateboard"
+        stack = contextlib.ExitStack()
+        self.dataset_root, self.path_manager = stack.enter_context(
+            get_skateboard_data()
+        )
+        self.addCleanup(stack.close)
+        self.image_height = 768
+        self.image_width = 512
+
+        self.frame_data_builder = FrameDataBuilder(
+            image_height=self.image_height,
+            image_width=self.image_width,
+            dataset_root=self.dataset_root,
+            path_manager=self.path_manager,
+        )
+
+        # loading single frame annotation of dataset (see JsonIndexDataset._load_frames())
+        frame_file = os.path.join(self.dataset_root, category, "frame_annotations.jgz")
+        local_file = self.path_manager.get_local_path(frame_file)
+        with gzip.open(local_file, "rt", encoding="utf8") as zipfile:
+            frame_annots_list = types.load_dataclass(
+                zipfile, List[types.FrameAnnotation]
+            )
+            self.frame_annotation = frame_annots_list[0]
+
+        sequence_annotations_file = os.path.join(
+            self.dataset_root, category, "sequence_annotations.jgz"
+        )
+        local_file = self.path_manager.get_local_path(sequence_annotations_file)
+        with gzip.open(local_file, "rt", encoding="utf8") as zipfile:
+            seq_annots_list = types.load_dataclass(
+                zipfile, List[types.SequenceAnnotation]
+            )
+            seq_annots = {entry.sequence_name: entry for entry in seq_annots_list}
+            self.seq_annotation = seq_annots[self.frame_annotation.sequence_name]
+
+        point_cloud = self.seq_annotation.point_cloud
+        self.frame_data = FrameData(
+            frame_number=safe_as_tensor(self.frame_annotation.frame_number, torch.long),
+            frame_timestamp=safe_as_tensor(
+                self.frame_annotation.frame_timestamp, torch.float
+            ),
+            sequence_name=self.frame_annotation.sequence_name,
+            sequence_category=self.seq_annotation.category,
+            camera_quality_score=safe_as_tensor(
+                self.seq_annotation.viewpoint_quality_score, torch.float
+            ),
+            point_cloud_quality_score=safe_as_tensor(
+                point_cloud.quality_score, torch.float
+            )
+            if point_cloud is not None
+            else None,
+        )
+
+    def test_frame_data_builder_args(self):
+        # test that FrameDataBuilder works with get_default_args
+        get_default_args(FrameDataBuilder)
+
+    def test_fix_point_cloud_path(self):
+        """Some files in Co3Dv2 have an accidental absolute path stored."""
+        original_path = "some_file_path"
+        modified_path = self.frame_data_builder._fix_point_cloud_path(original_path)
+        self.assertIn(original_path, modified_path)
+        self.assertIn(self.frame_data_builder.dataset_root, modified_path)
+
+    def test_load_and_adjust_frame_data(self):
+        self.frame_data.image_size_hw = safe_as_tensor(
+            self.frame_annotation.image.size, torch.long
+        )
+        self.frame_data.effective_image_size_hw = self.frame_data.image_size_hw
+
+        (
+            self.frame_data.fg_probability,
+            self.frame_data.mask_path,
+            self.frame_data.bbox_xywh,
+        ) = self.frame_data_builder._load_fg_probability(self.frame_annotation)
+
+        self.assertIsNotNone(self.frame_data.mask_path)
+        self.assertTrue(torch.is_tensor(self.frame_data.fg_probability))
+        self.assertTrue(torch.is_tensor(self.frame_data.bbox_xywh))
+        # assert bboxes shape
+        self.assertEqual(self.frame_data.bbox_xywh.shape, torch.Size([4]))
+
+        (
+            self.frame_data.image_rgb,
+            self.frame_data.image_path,
+        ) = self.frame_data_builder._load_images(
+            self.frame_annotation, self.frame_data.fg_probability
+        )
+        self.assertEqual(type(self.frame_data.image_rgb), np.ndarray)
+        self.assertIsNotNone(self.frame_data.image_path)
+
+        (
+            self.frame_data.depth_map,
+            depth_path,
+            self.frame_data.depth_mask,
+        ) = self.frame_data_builder._load_mask_depth(
+            self.frame_annotation,
+            self.frame_data.fg_probability,
+        )
+        self.assertTrue(torch.is_tensor(self.frame_data.depth_map))
+        self.assertIsNotNone(depth_path)
+        self.assertTrue(torch.is_tensor(self.frame_data.depth_mask))
+
+        new_size = (self.image_height, self.image_width)
+
+        if self.frame_data_builder.box_crop:
+            self.frame_data.crop_by_metadata_bbox_(
+                self.frame_data_builder.box_crop_context,
+            )
+
+        # assert image and mask shapes after resize
+        self.frame_data.resize_frame_(
+            new_size_hw=torch.tensor(new_size, dtype=torch.long),
+        )
+        self.assertEqual(
+            self.frame_data.mask_crop.shape,
+            torch.Size([1, self.image_height, self.image_width]),
+        )
+        self.assertEqual(
+            self.frame_data.image_rgb.shape,
+            torch.Size([3, self.image_height, self.image_width]),
+        )
+        self.assertEqual(
+            self.frame_data.mask_crop.shape,
+            torch.Size([1, self.image_height, self.image_width]),
+        )
+        self.assertEqual(
+            self.frame_data.fg_probability.shape,
+            torch.Size([1, self.image_height, self.image_width]),
+        )
+        self.assertEqual(
+            self.frame_data.depth_map.shape,
+            torch.Size([1, self.image_height, self.image_width]),
+        )
+        self.assertEqual(
+            self.frame_data.depth_mask.shape,
+            torch.Size([1, self.image_height, self.image_width]),
+        )
+        self.frame_data.camera = self.frame_data_builder._get_pytorch3d_camera(
+            self.frame_annotation,
+        )
+        self.assertEqual(type(self.frame_data.camera), PerspectiveCameras)
+
+    def test_load_image(self):
+        path = os.path.join(self.dataset_root, self.frame_annotation.image.path)
+        local_path = self.path_manager.get_local_path(path)
+        image = load_image(local_path)
+        self.assertEqual(image.dtype, np.float32)
+        self.assertLessEqual(np.max(image), 1.0)
+        self.assertGreaterEqual(np.min(image), 0.0)
+
+    def test_load_mask(self):
+        path = os.path.join(self.dataset_root, self.frame_annotation.mask.path)
+        mask = load_mask(path)
+        self.assertEqual(mask.dtype, np.float32)
+        self.assertLessEqual(np.max(mask), 1.0)
+        self.assertGreaterEqual(np.min(mask), 0.0)
+
+    def test_load_depth(self):
+        path = os.path.join(self.dataset_root, self.frame_annotation.depth.path)
+        depth_map = load_depth(path, self.frame_annotation.depth.scale_adjustment)
+        self.assertEqual(depth_map.dtype, np.float32)
+        self.assertEqual(len(depth_map.shape), 3)
+
+    def test_load_16big_png_depth(self):
+        path = os.path.join(self.dataset_root, self.frame_annotation.depth.path)
+        depth_map = load_16big_png_depth(path)
+        self.assertEqual(depth_map.dtype, np.float32)
+        self.assertEqual(len(depth_map.shape), 2)
+
+    def test_load_1bit_png_mask(self):
+        mask_path = os.path.join(
+            self.dataset_root, self.frame_annotation.depth.mask_path
+        )
+        mask = load_1bit_png_mask(mask_path)
+        self.assertEqual(mask.dtype, np.float32)
+        self.assertEqual(len(mask.shape), 2)
+
+    def test_load_depth_mask(self):
+        mask_path = os.path.join(
+            self.dataset_root, self.frame_annotation.depth.mask_path
+        )
+        mask = load_depth_mask(mask_path)
+        self.assertEqual(mask.dtype, np.float32)
+        self.assertEqual(len(mask.shape), 3)
--- a/tests/implicitron/test_json_index_dataset_provider_v2.py
+++ b/tests/implicitron/test_json_index_dataset_provider_v2.py
@@ -17,7 +17,7 @@ import numpy as np
 import torch
 import torchvision
 from PIL import Image
-from pytorch3d.implicitron.dataset.dataset_base import FrameData
+from pytorch3d.implicitron.dataset.frame_data import FrameData
 from pytorch3d.implicitron.dataset.json_index_dataset_map_provider_v2 import (
    JsonIndexDatasetMapProviderV2,
 )