provide cow dataset

Summary: Make a dummy single-scene dataset using the code from generate_cow_renders (used in existing NeRF tutorials) Reviewed By: kjchalup Differential Revision: D38116910 fbshipit-source-id: 8db6df7098aa221c81d392e5cd21b0e67f65bd70
2025-11-05 02:12:11 +08:00 · 2022-08-01 01:52:12 -07:00 · 2022-08-01 01:52:12 -07:00 · 14bd5e28e8
commit 14bd5e28e8
parent 1b0584f7bd
9 changed files with 302 additions and 5 deletions
--- a/docs/tutorials/data/cow_mesh/README.md
+++ b/docs/tutorials/data/cow_mesh/README.md
@ -1,5 +1,5 @@
 # Acknowledgements
-Thank you to Keenen Crane for allowing the cow mesh model to be used freely in the public domain.
+Thank you to Keenan Crane for allowing the cow mesh model to be used freely in the public domain.
 ###### Source: http://www.cs.cmu.edu/~kmcrane/Projects/ModelRepository/
--- a/docs/tutorials/utils/generate_cow_renders.py
+++ b/docs/tutorials/utils/generate_cow_renders.py
@ -44,6 +44,8 @@ def generate_cow_renders(
        data_dir: The folder that contains the cow mesh files. If the cow mesh
            files do not exist in the folder, this function will automatically
            download them.
        azimuth_range: number of degrees on each side of the start position to
            take samples
    Returns:
        cameras: A batch of `num_views` `FoVPerspectiveCameras` from which the
--- a/projects/implicitron_trainer/tests/experiment.yaml
+++ b/projects/implicitron_trainer/tests/experiment.yaml
@ -101,6 +101,15 @@ data_source_ImplicitronDataSource_args:
    n_known_frames_for_test: null
    path_manager_factory_PathManagerFactory_args:
      silence_logs: true
  dataset_map_provider_RenderedMeshDatasetMapProvider_args:
    num_views: 40
    data_file: null
    azimuth_range: 180.0
    resolution: 128
    use_point_light: true
    path_manager_factory_class_type: PathManagerFactory
    path_manager_factory_PathManagerFactory_args:
      silence_logs: true
  data_loader_map_provider_SequenceDataLoaderMapProvider_args:
    batch_size: 1
    num_workers: 0
--- a/pytorch3d/implicitron/dataset/data_source.py
+++ b/pytorch3d/implicitron/dataset/data_source.py
@ -19,6 +19,7 @@ from .dataset_map_provider import DatasetMap, DatasetMapProviderBase, Task
 from .json_index_dataset_map_provider import JsonIndexDatasetMapProvider  # noqa
 from .json_index_dataset_map_provider_v2 import JsonIndexDatasetMapProviderV2  # noqa
 from .llff_dataset_map_provider import LlffDatasetMapProvider  # noqa
 from .rendered_mesh_dataset_map_provider import RenderedMeshDatasetMapProvider  # noqa
 class DataSourceBase(ReplaceableBase):
--- a/pytorch3d/implicitron/dataset/rendered_mesh_dataset_map_provider.py
+++ b/pytorch3d/implicitron/dataset/rendered_mesh_dataset_map_provider.py
@ -0,0 +1,219 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 from os.path import dirname, join, realpath
 from typing import Optional, Tuple
 import torch
 from pytorch3d.implicitron.tools.config import (
    expand_args_fields,
    registry,
    run_auto_creation,
 )
 from pytorch3d.io import IO
 from pytorch3d.renderer import (
    AmbientLights,
    BlendParams,
    CamerasBase,
    FoVPerspectiveCameras,
    HardPhongShader,
    look_at_view_transform,
    MeshRasterizer,
    MeshRendererWithFragments,
    PointLights,
    RasterizationSettings,
 )
 from pytorch3d.structures.meshes import Meshes
 from .dataset_map_provider import (
    DatasetMap,
    DatasetMapProviderBase,
    PathManagerFactory,
    Task,
 )
 from .single_sequence_dataset import SingleSceneDataset
 from .utils import DATASET_TYPE_KNOWN
@registry.register
 class RenderedMeshDatasetMapProvider(DatasetMapProviderBase):  # pyre-ignore [13]
    """
    A simple single-scene dataset based on PyTorch3D renders of a mesh.
    Provides `num_views` renders of the mesh as train, with no val
    and test. The renders are generated from viewpoints sampled at uniformly
    distributed azimuth intervals. The elevation is kept constant so that the
    camera's vertical position coincides with the equator.
    By default, uses Keenan Crane's cow model, and the camera locations are
    set to make sense for that.
    Although the rendering used to generate this dataset will use a GPU
    if one is available, the data it produces is on the CPU just like
    the data returned by implicitron's other dataset map providers.
    This is because both datasets and models can be large, so implicitron's
    GenericModel.forward (etc) expects data on the CPU and only moves
    what it needs to the device.
    For a more detailed explanation of this code, please refer to the
    docs/tutorials/fit_textured_mesh.ipynb notebook.
    Members:
        num_views: The number of generated renders.
        data_file: The folder that contains the mesh file. By default, finds
            the cow mesh in the same repo as this code.
        azimuth_range: number of degrees on each side of the start position to
            take samples
        resolution: the common height and width of the output images.
        use_point_light: whether to use a particular point light as opposed
            to ambient white.
    """
    num_views: int = 40
    data_file: Optional[str] = None
    azimuth_range: float = 180
    resolution: int = 128
    use_point_light: bool = True
    path_manager_factory: PathManagerFactory
    path_manager_factory_class_type: str = "PathManagerFactory"
    def get_dataset_map(self) -> DatasetMap:
        # pyre-ignore[16]
        return DatasetMap(train=self.train_dataset, val=None, test=None)
    def get_task(self) -> Task:
        return Task.SINGLE_SEQUENCE
    def get_all_train_cameras(self) -> CamerasBase:
        # pyre-ignore[16]
        return self.poses
    def __post_init__(self) -> None:
        super().__init__()
        run_auto_creation(self)
        if torch.cuda.is_available():
            device = torch.device("cuda:0")
        else:
            device = torch.device("cpu")
        if self.data_file is None:
            data_file = join(
                dirname(dirname(dirname(dirname(realpath(__file__))))),
                "docs",
                "tutorials",
                "data",
                "cow_mesh",
                "cow.obj",
            )
        else:
            data_file = self.data_file
        io = IO(path_manager=self.path_manager_factory.get())
        mesh = io.load_mesh(data_file, device=device)
        poses, images, masks = _generate_cow_renders(
            num_views=self.num_views,
            mesh=mesh,
            azimuth_range=self.azimuth_range,
            resolution=self.resolution,
            device=device,
            use_point_light=self.use_point_light,
        )
        # pyre-ignore[16]
        self.poses = poses.cpu()
        expand_args_fields(SingleSceneDataset)
        # pyre-ignore[16]
        self.train_dataset = SingleSceneDataset(  # pyre-ignore[28]
            object_name="cow",
            images=list(images.permute(0, 3, 1, 2).cpu()),
            fg_probabilities=list(masks[:, None].cpu()),
            poses=[self.poses[i] for i in range(len(poses))],
            frame_types=[DATASET_TYPE_KNOWN] * len(poses),
            eval_batches=None,
        )
@torch.no_grad()
 def _generate_cow_renders(
    *,
    num_views: int,
    mesh: Meshes,
    azimuth_range: float,
    resolution: int,
    device: torch.device,
    use_point_light: bool,
 ) -> Tuple[CamerasBase, torch.Tensor, torch.Tensor]:
    """
    Returns:
        cameras: A batch of `num_views` `FoVPerspectiveCameras` from which the
            images are rendered.
        images: A tensor of shape `(num_views, height, width, 3)` containing
            the rendered images.
        silhouettes: A tensor of shape `(num_views, height, width)` containing
            the rendered silhouettes.
    """
    # Load obj file
    # We scale normalize and center the target mesh to fit in a sphere of radius 1
    # centered at (0,0,0). (scale, center) will be used to bring the predicted mesh
    # to its original center and scale.  Note that normalizing the target mesh,
    # speeds up the optimization but is not necessary!
    verts = mesh.verts_packed()
    N = verts.shape[0]
    center = verts.mean(0)
    scale = max((verts - center).abs().max(0)[0])
    mesh.offset_verts_(-(center.expand(N, 3)))
    mesh.scale_verts_((1.0 / float(scale)))
    # Get a batch of viewing angles.
    elev = torch.linspace(0, 0, num_views)  # keep constant
    azim = torch.linspace(-azimuth_range, azimuth_range, num_views) + 180.0
    # Place a point light in front of the object. As mentioned above, the front of
    # the cow is facing the -z direction.
    if use_point_light:
        lights = PointLights(device=device, location=[[0.0, 0.0, -3.0]])
    else:
        lights = AmbientLights(device=device)
    # Initialize an OpenGL perspective camera that represents a batch of different
    # viewing angles. All the cameras helper methods support mixed type inputs and
    # broadcasting. So we can view the camera from the a distance of dist=2.7, and
    # then specify elevation and azimuth angles for each viewpoint as tensors.
    R, T = look_at_view_transform(dist=2.7, elev=elev, azim=azim)
    cameras = FoVPerspectiveCameras(device=device, R=R, T=T)
    # Define the settings for rasterization and shading.
    # As we are rendering images for visualization
    # purposes only we will set faces_per_pixel=1 and blur_radius=0.0. Refer to
    # rasterize_meshes.py for explanations of these parameters.  We also leave
    # bin_size and max_faces_per_bin to their default values of None, which sets
    # their values using heuristics and ensures that the faster coarse-to-fine
    # rasterization method is used.  Refer to docs/notes/renderer.md for an
    # explanation of the difference between naive and coarse-to-fine rasterization.
    raster_settings = RasterizationSettings(
        image_size=resolution, blur_radius=0.0, faces_per_pixel=1
    )
    # Create a Phong renderer by composing a rasterizer and a shader. The textured
    # Phong shader will interpolate the texture uv coordinates for each vertex,
    # sample from a texture image and apply the Phong lighting model
    blend_params = BlendParams(sigma=1e-4, gamma=1e-4, background_color=(0.0, 0.0, 0.0))
    rasterizer_type = MeshRasterizer
    renderer = MeshRendererWithFragments(
        rasterizer=rasterizer_type(cameras=cameras, raster_settings=raster_settings),
        shader=HardPhongShader(
            device=device, cameras=cameras, lights=lights, blend_params=blend_params
        ),
    )
    # Create a batch of meshes by repeating the cow mesh and associated textures.
    # Meshes has a useful `extend` method which allows us do this very easily.
    # This also extends the textures.
    meshes = mesh.extend(num_views)
    # Render the cow mesh from each viewing angle
    target_images, fragments = renderer(meshes, cameras=cameras, lights=lights)
    silhouette_binary = (fragments.pix_to_face[..., 0] >= 0).float()
    return cameras, target_images[..., :3], silhouette_binary
--- a/pytorch3d/renderer/cameras.py
+++ b/pytorch3d/renderer/cameras.py
@ -1661,9 +1661,9 @@ def look_at_rotation(
 def look_at_view_transform(
-    dist: float = 1.0,
+    dist: _BatchFloatType = 1.0,
-    elev: float = 0.0,
+    elev: _BatchFloatType = 0.0,
-    azim: float = 0.0,
+    azim: _BatchFloatType = 0.0,
    degrees: bool = True,
    eye: Optional[Union[Sequence, torch.Tensor]] = None,
    at=((0, 0, 0),),  # (1, 3)
--- a/tests/data/missing_usemtl/README.md
+++ b/tests/data/missing_usemtl/README.md
@ -2,6 +2,6 @@
 This is copied version of docs/tutorials/data/cow_mesh with removed line 6159 (usemtl material_1) to test behavior without usemtl material_1 declaration.
-Thank you to Keenen Crane for allowing the cow mesh model to be used freely in the public domain.
+Thank you to Keenan Crane for allowing the cow mesh model to be used freely in the public domain.
 ###### Source: http://www.cs.cmu.edu/~kmcrane/Projects/ModelRepository/
--- a/tests/implicitron/data/data_source.yaml
+++ b/tests/implicitron/data/data_source.yaml
@ -90,6 +90,15 @@ dataset_map_provider_LlffDatasetMapProvider_args:
  n_known_frames_for_test: null
  path_manager_factory_PathManagerFactory_args:
    silence_logs: true
 dataset_map_provider_RenderedMeshDatasetMapProvider_args:
  num_views: 40
  data_file: null
  azimuth_range: 180.0
  resolution: 128
  use_point_light: true
  path_manager_factory_class_type: PathManagerFactory
  path_manager_factory_PathManagerFactory_args:
    silence_logs: true
 data_loader_map_provider_SequenceDataLoaderMapProvider_args:
  batch_size: 1
  num_workers: 0
--- a/tests/implicitron/test_data_cow.py
+++ b/tests/implicitron/test_data_cow.py
@ -0,0 +1,57 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 import os
 import unittest
 import torch
 from pytorch3d.implicitron.dataset.dataset_base import FrameData
 from pytorch3d.implicitron.dataset.rendered_mesh_dataset_map_provider import (
    RenderedMeshDatasetMapProvider,
 )
 from pytorch3d.implicitron.tools.config import expand_args_fields
 from pytorch3d.renderer import FoVPerspectiveCameras
 from tests.common_testing import TestCaseMixin
 inside_re_worker = os.environ.get("INSIDE_RE_WORKER", False)
 class TestDataCow(TestCaseMixin, unittest.TestCase):
    def test_simple(self):
        if inside_re_worker:
            return
        expand_args_fields(RenderedMeshDatasetMapProvider)
        self._runtest(use_point_light=True, num_views=4)
        self._runtest(use_point_light=False, num_views=4)
    def _runtest(self, **kwargs):
        provider = RenderedMeshDatasetMapProvider(**kwargs)
        dataset_map = provider.get_dataset_map()
        known_matrix = torch.zeros(1, 4, 4)
        known_matrix[0, 0, 0] = 1.7321
        known_matrix[0, 1, 1] = 1.7321
        known_matrix[0, 2, 2] = 1.0101
        known_matrix[0, 3, 2] = -1.0101
        known_matrix[0, 2, 3] = 1
        self.assertIsNone(dataset_map.val)
        self.assertIsNone(dataset_map.test)
        self.assertEqual(len(dataset_map.train), provider.num_views)
        value = dataset_map.train[0]
        self.assertIsInstance(value, FrameData)
        self.assertEqual(value.image_rgb.shape, (3, 128, 128))
        self.assertEqual(value.fg_probability.shape, (1, 128, 128))
        # corner of image is background
        self.assertEqual(value.fg_probability[0, 0, 0], 0)
        self.assertEqual(value.fg_probability.max(), 1.0)
        self.assertIsInstance(value.camera, FoVPerspectiveCameras)
        self.assertEqual(len(value.camera), 1)
        self.assertIsNone(value.camera.K)
        matrix = value.camera.get_projection_transform().get_matrix()
        self.assertClose(matrix, known_matrix, atol=1e-4)