voxel_grid_implicit_function

Reviewed By: shapovalov Differential Revision: D40622304 fbshipit-source-id: 277515a55c46d9b8300058b439526539a7fe00a0
2025-11-05 10:22:13 +08:00 · 2022-10-23 05:36:34 -07:00 · 2022-10-23 05:36:34 -07:00 · 74754bbf17
commit 74754bbf17
parent 611aba9a20
4 changed files with 1008 additions and 0 deletions
--- a/projects/implicitron_trainer/tests/experiment.yaml
+++ b/projects/implicitron_trainer/tests/experiment.yaml
@ -394,6 +394,168 @@ model_factory_ImplicitronModelFactory_args:
        in_features: 256
        out_features: 3
        ray_dir_in_camera_coords: false
    implicit_function_VoxelGridImplicitFunction_args:
      harmonic_embedder_xyz_density_args:
        n_harmonic_functions: 6
        omega_0: 1.0
        logspace: true
        append_input: true
      harmonic_embedder_xyz_color_args:
        n_harmonic_functions: 6
        omega_0: 1.0
        logspace: true
        append_input: true
      harmonic_embedder_dir_color_args:
        n_harmonic_functions: 6
        omega_0: 1.0
        logspace: true
        append_input: true
      decoder_density_class_type: MLPDecoder
      decoder_color_class_type: MLPDecoder
      use_multiple_streams: true
      xyz_ray_dir_in_camera_coords: false
      scaffold_calculating_epochs: []
      scaffold_resolution:
      - 128
      - 128
      - 128
      scaffold_empty_space_threshold: 0.001
      scaffold_occupancy_chunk_size: 'inf'
      scaffold_max_pool_kernel_size: 3
      scaffold_filter_points: true
      volume_cropping_epochs: []
      voxel_grid_density_args:
        voxel_grid_class_type: FullResolutionVoxelGrid
        extents:
        - 2.0
        - 2.0
        - 2.0
        translation:
        - 0.0
        - 0.0
        - 0.0
        init_std: 0.1
        init_mean: 0.0
        hold_voxel_grid_as_parameters: true
        param_groups: {}
        voxel_grid_CPFactorizedVoxelGrid_args:
          align_corners: true
          padding: zeros
          mode: bilinear
          n_features: 1
          resolution_changes:
            0:
            - 128
            - 128
            - 128
          n_components: 24
          basis_matrix: true
        voxel_grid_FullResolutionVoxelGrid_args:
          align_corners: true
          padding: zeros
          mode: bilinear
          n_features: 1
          resolution_changes:
            0:
            - 128
            - 128
            - 128
        voxel_grid_VMFactorizedVoxelGrid_args:
          align_corners: true
          padding: zeros
          mode: bilinear
          n_features: 1
          resolution_changes:
            0:
            - 128
            - 128
            - 128
          n_components: null
          distribution_of_components: null
          basis_matrix: true
      voxel_grid_color_args:
        voxel_grid_class_type: FullResolutionVoxelGrid
        extents:
        - 2.0
        - 2.0
        - 2.0
        translation:
        - 0.0
        - 0.0
        - 0.0
        init_std: 0.1
        init_mean: 0.0
        hold_voxel_grid_as_parameters: true
        param_groups: {}
        voxel_grid_CPFactorizedVoxelGrid_args:
          align_corners: true
          padding: zeros
          mode: bilinear
          n_features: 1
          resolution_changes:
            0:
            - 128
            - 128
            - 128
          n_components: 24
          basis_matrix: true
        voxel_grid_FullResolutionVoxelGrid_args:
          align_corners: true
          padding: zeros
          mode: bilinear
          n_features: 1
          resolution_changes:
            0:
            - 128
            - 128
            - 128
        voxel_grid_VMFactorizedVoxelGrid_args:
          align_corners: true
          padding: zeros
          mode: bilinear
          n_features: 1
          resolution_changes:
            0:
            - 128
            - 128
            - 128
          n_components: null
          distribution_of_components: null
          basis_matrix: true
      decoder_density_ElementwiseDecoder_args:
        scale: 1.0
        shift: 0.0
        operation: IDENTITY
      decoder_density_MLPDecoder_args:
        param_groups: {}
        network_args:
          n_layers: 8
          output_dim: 256
          skip_dim: 39
          hidden_dim: 256
          input_skips:
          - 5
          skip_affine_trans: false
          last_layer_bias_init: null
          last_activation: RELU
          use_xavier_init: true
      decoder_color_ElementwiseDecoder_args:
        scale: 1.0
        shift: 0.0
        operation: IDENTITY
      decoder_color_MLPDecoder_args:
        param_groups: {}
        network_args:
          n_layers: 8
          output_dim: 256
          skip_dim: 39
          hidden_dim: 256
          input_skips:
          - 5
          skip_affine_trans: false
          last_layer_bias_init: null
          last_activation: RELU
          use_xavier_init: true
    view_metrics_ViewMetrics_args: {}
    regularization_metrics_RegularizationMetrics_args: {}
 optimizer_factory_ImplicitronOptimizerFactory_args:
--- a/pytorch3d/implicitron/models/generic_model.py
+++ b/pytorch3d/implicitron/models/generic_model.py
@ -52,6 +52,9 @@ from .implicit_function.scene_representation_networks import (  # noqa
    SRNHyperNetImplicitFunction,
    SRNImplicitFunction,
 )
 from .implicit_function.voxel_grid_implicit_function import (  # noqa
    VoxelGridImplicitFunction,
 )
 from .renderer.base import (
    BaseRenderer,
--- a/pytorch3d/implicitron/models/implicit_function/voxel_grid_implicit_function.py
+++ b/pytorch3d/implicitron/models/implicit_function/voxel_grid_implicit_function.py
@ -0,0 +1,616 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 import math
 import warnings
 from dataclasses import fields
 from typing import Callable, Dict, Optional, Tuple, Union
 import torch
 from omegaconf import DictConfig
 from pytorch3d.implicitron.models.implicit_function.base import ImplicitFunctionBase
 from pytorch3d.implicitron.models.implicit_function.decoding_functions import (
    DecoderFunctionBase,
 )
 from pytorch3d.implicitron.models.implicit_function.voxel_grid import VoxelGridModule
 from pytorch3d.implicitron.models.renderer.base import ImplicitronRayBundle
 from pytorch3d.implicitron.tools.config import (
    enable_get_default_args,
    get_default_args_field,
    registry,
    run_auto_creation,
 )
 from pytorch3d.renderer import ray_bundle_to_ray_points
 from pytorch3d.renderer.cameras import CamerasBase
 from pytorch3d.renderer.implicit import HarmonicEmbedding
 enable_get_default_args(HarmonicEmbedding)
@registry.register
 # pyre-ignore[13]
 class VoxelGridImplicitFunction(ImplicitFunctionBase, torch.nn.Module):
    """
    This implicit function consists of two streams, one for the density calculation and one
    for the color calculation. Each of these streams has three main parts:
        1) Voxel grids:
            They take the (x, y, z) position and return the embedding of that point.
            These components are replaceable, you can make your own or choose one of
            several options.
        2) Harmonic embeddings:
            Convert each feature into series of 'harmonic features', feature is passed through
            sine and cosine functions. Input is of shape [minibatch, ..., D] output
            [minibatch, ..., (n_harmonic_functions * 2 + int(append_input)) * D]. Appends
            input by default. If you want it to behave like identity, put n_harmonic_functions=0
            and append_input=True.
        3) Decoding functions:
            The decoder is an instance of the DecoderFunctionBase and converts the embedding
            of a spatial location to density/color. Examples are Identity which returns its
            input and the MLP which uses fully connected nerual network to transform the input.
            These components are replaceable, you can make your own or choose from
            several options.
    Calculating density is done in three steps:
        1) Evaluating the voxel grid on points
        2) Embedding the outputs with harmonic embedding
        3) Passing through the Density decoder
    To calculate the color we need the embedding and the viewing direction, it has five steps:
        1) Transforming the viewing direction with camera
        2) Evaluating the voxel grid on points
        3) Embedding the outputs with harmonic embedding
        4) Embedding the normalized direction with harmonic embedding
        5) Passing everything through the Color decoder
    If using the Implicitron configuration system the input_dim to the decoding functions will
    be set to the output_dim of the Harmonic embeddings.
    A speed up comes from using the scaffold, a low resolution voxel grid.
    The scaffold is referenced as "binary occupancy grid mask" in TensoRF paper and "AlphaMask"
    in official TensoRF implementation.
    The scaffold is used in:
        1) filtering points in empty space
            - controlled by `scaffold_filter_points` boolean. If set to True, points for which
                scaffold predicts that are in empty space will return 0 density and
                (0, 0, 0) color.
        2) calculating the bounding box of an object and cropping the voxel grids
            - controlled by `volume_cropping_epochs`.
            - at those epochs the implicit function will find the bounding box of an object
                inside it and crop density and color grids. Cropping of the voxel grids means
                preserving only voxel values that are inside the bounding box and changing the
                resolution to match the original, while preserving the new cropped location in
                world coordinates.
    The scaffold has to exist before attempting filtering and cropping, and is created on
    `scaffold_calculating_epochs`. Each voxel in the scaffold is labeled as having density 1 if
    the point in the center of it evaluates to greater than `scaffold_empty_space_threshold`.
    3D max pooling is performed on the densities of the points in 3D.
    Scaffold features are off by default.
    Members:
        voxel_grid_density (VoxelGridBase): voxel grid to use for density estimation
        voxel_grid_color   (VoxelGridBase): voxel grid to use for color   estimation
        harmonic_embedder_xyz_density (HarmonicEmbedder): Function to transform the outputs of
            the voxel_grid_density
        harmonic_embedder_xyz_color (HarmonicEmbedder): Function to transform the outputs of
            the voxel_grid_color for density
        harmonic_embedder_dir_color (HarmonicEmbedder): Function to transform the outputs of
            the voxel_grid_color for color
        decoder_density (DecoderFunctionBase): decoder function to use for density estimation
        color_density   (DecoderFunctionBase): decoder function to use for color   estimation
        use_multiple_streams (bool): if you want the density and color calculations to run on
            different cuda streams set this to True. Default True.
        xyz_ray_dir_in_camera_coords (bool): This is true if the directions are given in
            camera coordinates. Default False.
        voxel_grid_scaffold (VoxelGridModule): which holds the scaffold. Extents and
            translation of it are set to those of voxel_grid_density.
        scaffold_calculating_epochs (Tuple[int, ...]): at which epochs to recalculate the
            scaffold. (The scaffold will be created automatically at the beginning of
            the calculation.)
        scaffold_resolution (Tuple[int, int, int]): (width, height, depth) of the underlying
            voxel grid which stores scaffold
        scaffold_empty_space_threshold (float): if `self.get_density` evaluates to less than
            this it will be considered as empty space and the scaffold at that point would
            evaluate as empty space.
        scaffold_occupancy_chunk_size (str or int): Number of xy scaffold planes to calculate
            at the same time. To calculate the scaffold we need to query `get_density()` at
            every voxel, this calculation can be split into scaffold depth number of xy plane
            calculations if you want the lowest memory usage, one calculation to calculate the
            whole scaffold, but with higher memory footprint or any other number of planes.
            Setting to 'inf' calculates all planes at the same time. Defaults to 'inf'.
        scaffold_max_pool_kernel_size (int): Size of the pooling region to use when
            calculating the scaffold. Defaults to 3.
        scaffold_filter_points (bool): If set to True the points will be filtered using
            `self.voxel_grid_scaffold`. Filtered points will be predicted as having 0 density
            and (0, 0, 0) color. The points which were not evaluated as empty space will be
            passed through the steps outlined above.
        volume_cropping_epochs: on which epochs to crop the voxel grids to fit the object's
            bounding box. Scaffold has to be calculated before cropping.
    """
    # ---- voxel grid for density
    voxel_grid_density: VoxelGridModule
    # ---- voxel grid for color
    voxel_grid_color: VoxelGridModule
    # ---- harmonic embeddings density
    harmonic_embedder_xyz_density_args: DictConfig = get_default_args_field(
        HarmonicEmbedding
    )
    harmonic_embedder_xyz_color_args: DictConfig = get_default_args_field(
        HarmonicEmbedding
    )
    harmonic_embedder_dir_color_args: DictConfig = get_default_args_field(
        HarmonicEmbedding
    )
    # ---- decoder function for density
    decoder_density_class_type: str = "MLPDecoder"
    decoder_density: DecoderFunctionBase
    # ---- decoder function for color
    decoder_color_class_type: str = "MLPDecoder"
    decoder_color: DecoderFunctionBase
    # ---- cuda streams
    use_multiple_streams: bool = True
    # ---- camera
    xyz_ray_dir_in_camera_coords: bool = False
    # --- scaffold
    # voxel_grid_scaffold: VoxelGridModule
    scaffold_calculating_epochs: Tuple[int, ...] = ()
    scaffold_resolution: Tuple[int, int, int] = (128, 128, 128)
    scaffold_empty_space_threshold: float = 0.001
    scaffold_occupancy_chunk_size: Union[str, int] = "inf"
    scaffold_max_pool_kernel_size: int = 3
    scaffold_filter_points: bool = True
    # --- cropping
    volume_cropping_epochs: Tuple[int, ...] = ()
    def __post_init__(self) -> None:
        super().__init__()
        run_auto_creation(self)
        # pyre-ignore[16]
        self.voxel_grid_scaffold = self._create_voxel_grid_scaffold()
        # pyre-ignore[16]
        self.harmonic_embedder_xyz_density = HarmonicEmbedding(
            **self.harmonic_embedder_xyz_density_args
        )
        # pyre-ignore[16]
        self.harmonic_embedder_xyz_color = HarmonicEmbedding(
            **self.harmonic_embedder_xyz_color_args
        )
        # pyre-ignore[16]
        self.harmonic_embedder_dir_color = HarmonicEmbedding(
            **self.harmonic_embedder_dir_color_args
        )
        # pyre-ignore[16]
        self._scaffold_ready = False
        if type(self.scaffold_occupancy_chunk_size) != int:
            if self.scaffold_occupancy_chunk_size != "inf":
                raise ValueError(
                    "`scaffold_occupancy_chunk_size` has to be int or 'inf'."
                )
    def forward(
        self,
        ray_bundle: ImplicitronRayBundle,
        fun_viewpool=None,
        camera: Optional[CamerasBase] = None,
        global_code=None,
        **kwargs,
    ) -> Tuple[torch.Tensor, torch.Tensor, Dict]:
        """
        The forward function accepts the parametrizations of 3D points sampled along
        projection rays. The forward pass is responsible for attaching a 3D vector
        and a 1D scalar representing the point's RGB color and opacity respectively.
        Args:
            ray_bundle: An ImplicitronRayBundle object containing the following variables:
                origins: A tensor of shape `(minibatch, ..., 3)` denoting the
                    origins of the sampling rays in world coords.
                directions: A tensor of shape `(minibatch, ..., 3)`
                    containing the direction vectors of sampling rays in world coords.
                lengths: A tensor of shape `(minibatch, ..., num_points_per_ray)`
                    containing the lengths at which the rays are sampled.
            fun_viewpool: an optional callback with the signature
                    fun_fiewpool(points) -> pooled_features
                where points is a [N_TGT x N x 3] tensor of world coords,
                and pooled_features is a [N_TGT x ... x N_SRC x latent_dim] tensor
                of the features pooled from the context images.
            camera: A camera model which will be used to transform the viewing
                directions
        Returns:
            rays_densities: A tensor of shape `(minibatch, ..., num_points_per_ray, 1)`
                denoting the opacitiy of each ray point.
            rays_colors: A tensor of shape `(minibatch, ..., num_points_per_ray, 3)`
                denoting the color of each ray point.
        """
        # ########## convert the ray parametrizations to world coordinates ########## #
        # points.shape = [minibatch x n_rays_width x n_rays_height x pts_per_ray x 3]
        # pyre-ignore[6]
        points = ray_bundle_to_ray_points(ray_bundle)
        directions = ray_bundle.directions.reshape(-1, 3)
        input_shape = points.shape
        points = points.view(-1, 3)
        # ########## filter the points using the scaffold ########## #
        if self._scaffold_ready and self.scaffold_filter_points:
            # pyre-ignore[29]
            non_empty_points = self.voxel_grid_scaffold(points)[..., 0] > 0
            points = points[non_empty_points]
            directions = directions[non_empty_points]
            if len(points) == 0:
                warnings.warn(
                    "The scaffold has filtered all the points."
                    "The voxel grids and decoding functions will not be run."
                )
                return (
                    points.new_zeros((*input_shape[:-1], 1)),
                    points.new_zeros((*input_shape[:-1], 3)),
                    {},
                )
        # ########## calculate color and density ########## #
        rays_densities, rays_colors = self.calculate_density_and_color(
            points, directions, camera
        )
        if not (self._scaffold_ready and self.scaffold_filter_points):
            return (
                rays_densities.view((*input_shape[:-1], rays_densities.shape[-1])),
                rays_colors.view((*input_shape[:-1], rays_colors.shape[-1])),
                {},
            )
        # ########## merge scaffold calculated points ########## #
        # Create a zeroed tensor corresponding to a point with density=0 and fill it
        # with calculated density for points which are not in empty space. Do the
        # same for color
        rays_densities_combined = rays_densities.new_zeros(
            (math.prod(input_shape[:-1]), rays_densities.shape[-1])
        )
        rays_colors_combined = rays_colors.new_zeros(
            (math.prod(input_shape[:-1]), rays_colors.shape[-1])
        )
        # pyre-ignore[61]
        rays_densities_combined[non_empty_points] = rays_densities
        # pyre-ignore[61]
        rays_colors_combined[non_empty_points] = rays_colors
        return (
            rays_densities_combined.view((*input_shape[:-1], rays_densities.shape[-1])),
            rays_colors_combined.view((*input_shape[:-1], rays_colors.shape[-1])),
            {},
        )
    def calculate_density_and_color(
        self,
        points: torch.Tensor,
        directions: torch.Tensor,
        camera: Optional[CamerasBase] = None,
    ) -> Tuple[torch.Tensor, torch.Tensor]:
        """
        Calculates density and color at `points`.
        If enabled use cuda streams.
        Args:
            points: points at which to calculate density and color.
                Tensor of shape [..., 3].
            directions: from which directions are the points viewed
                Tensor of shape [..., 3].
            camera: A camera model which will be used to transform the viewing
                directions
        Returns:
               Tuple of color (tensor of shape [..., 3]) and density
                (tensor of shape [..., 1])
        """
        if self.use_multiple_streams and points.is_cuda:
            current_stream = torch.cuda.current_stream(points.device)
            other_stream = torch.cuda.Stream(points.device)
            other_stream.wait_stream(current_stream)
            with torch.cuda.stream(other_stream):
                # rays_densities.shape =
                # [minibatch x n_rays_width x n_rays_height x pts_per_ray x density_dim]
                rays_densities = self.get_density(points)
            # rays_colors.shape =
            # [minibatch x n_rays_width x n_rays_height x pts_per_ray x color_dim]
            rays_colors = self.get_color(points, camera, directions)
            current_stream.wait_stream(other_stream)
        else:
            # Same calculation as above, just serial.
            rays_densities = self.get_density(points)
            rays_colors = self.get_color(points, camera, directions)
        return rays_densities, rays_colors
    def get_density(self, points: torch.Tensor) -> torch.Tensor:
        """
        Calculates density at points:
            1) Evaluates the voxel grid on points
            2) Embeds the outputs with harmonic embedding
            3) Passes everything through the Density decoder
        Args:
            points: tensor of shape [..., 3]
                where the last dimension is the points in the (x, y, z)
        Returns:
            calculated densities of shape [..., density_dim], `density_dim` is the
                feature dimensionality which `decoder_density` returns
        """
        embeds_density = self.voxel_grid_density(points)
        # pyre-ignore[29]
        harmonic_embedding_density = self.harmonic_embedder_xyz_density(embeds_density)
        # shape = [..., density_dim]
        return self.decoder_density(harmonic_embedding_density)
    def get_color(
        self,
        points: torch.Tensor,
        camera: Optional[CamerasBase],
        directions: torch.Tensor,
    ) -> torch.Tensor:
        """
        Calculates color at points using the viewing direction:
            1) Transforms the viewing direction with camera
            2) Evaluates the voxel grid on points
            3) Embeds the outputs with harmonic embedding
            4) Embeds the normalized direction with harmonic embedding
            5) Passes everything through the Color decoder
        Args:
            points: tensor of shape (..., 3)
                where the last dimension is the points in the (x, y, z)
            camera: A camera model which will be used to transform the viewing
                directions
            directions: A tensor of shape `(..., 3)`
                containing the direction vectors of sampling rays in world coords.
        """
        # ########## transform direction ########## #
        if self.xyz_ray_dir_in_camera_coords:
            if camera is None:
                raise ValueError("Camera must be given if xyz_ray_dir_in_camera_coords")
            directions = directions @ camera.R
        # ########## get voxel grid output ########## #
        # embeds_color.shape = [..., pts_per_ray, n_features]
        embeds_color = self.voxel_grid_color(points)
        # ########## embed with the harmonic function ########## #
        # Obtain the harmonic embedding of the voxel grid output.
        # pyre-ignore[29]
        harmonic_embedding_color = self.harmonic_embedder_xyz_color(embeds_color)
        # Normalize the ray_directions to unit l2 norm.
        rays_directions_normed = torch.nn.functional.normalize(directions, dim=-1)
        # Obtain the harmonic embedding of the normalized ray directions.
        # pyre-ignore[29]
        harmonic_embedding_dir = self.harmonic_embedder_dir_color(
            rays_directions_normed
        )
        n_rays = directions.shape[0]
        points_per_ray: int = points.shape[0] // n_rays
        harmonic_embedding_dir = torch.repeat_interleave(
            harmonic_embedding_dir, points_per_ray, dim=0
        )
        # total color embedding is concatenation of the harmonic embedding of voxel grid
        # output and harmonic embedding of the normalized direction
        total_color_embedding = torch.cat(
            (harmonic_embedding_color, harmonic_embedding_dir), dim=-1
        )
        # ########## evaluate color with the decoding function ########## #
        # rays_colors.shape = [..., pts_per_ray, 3] in [0-1]
        return self.decoder_color(total_color_embedding)
    @staticmethod
    def allows_multiple_passes() -> bool:
        """
        Returns True as this implicit function allows
        multiple passes. Overridden from ImplicitFunctionBase.
        """
        return True
    def subscribe_to_epochs(self) -> Tuple[Tuple[int, ...], Callable[[int], bool]]:
        """
        Method which expresses interest in subscribing to optimization epoch updates.
        This implicit function subscribes to epochs to calculate the scaffold and to
        crop voxel grids, so this method combines wanted epochs and wraps their callbacks.
        Returns:
            list of epochs on which to call a callable and callable to be called on
                particular epoch. The callable returns True if parameter change has
                happened else False and it must be supplied with one argument, epoch.
        """
        def callback(epoch) -> bool:
            change = False
            if epoch in self.scaffold_calculating_epochs:
                change = self._get_scaffold(epoch)
            if epoch in self.volume_cropping_epochs:
                change = self._crop(epoch) or change
            return change
        # remove duplicates
        call_epochs = list(
            set(self.scaffold_calculating_epochs) | set(self.volume_cropping_epochs)
        )
        return call_epochs, callback
    def _crop(self, epoch: int) -> bool:
        """
        Finds the bounding box of an object represented in the scaffold and crops
        density and color voxel grids to match that bounding box. If density of the
        scaffold is 0 everywhere (there is no object in it) no change will
        happen.
        Args:
            epoch: ignored
        Returns:
            True (indicating that parameter change has happened) if there is
            an object inside, else False.
        """
        # find bounding box
        # pyre-ignore[16]
        points = self.voxel_grid_scaffold.get_grid_points(epoch=epoch)
        assert self._scaffold_ready, "Scaffold has to be calculated before cropping."
        # pyre-ignore[29]
        occupancy = self.voxel_grid_scaffold(points)[..., 0] > 0
        non_zero_idxs = torch.nonzero(occupancy)
        if len(non_zero_idxs) == 0:
            return False
        min_indices = tuple(torch.min(non_zero_idxs, dim=0)[0])
        max_indices = tuple(torch.max(non_zero_idxs, dim=0)[0])
        min_point, max_point = points[min_indices], points[max_indices]
        # crop the voxel grids
        self.voxel_grid_density.crop_self(min_point, max_point)
        self.voxel_grid_color.crop_self(min_point, max_point)
        return True
    @torch.no_grad()
    def _get_scaffold(self, epoch: int) -> bool:
        """
        Creates a low resolution grid which is used to filter points that are in empty
        space.
        Args:
            epoch: epoch on which it is called, ignored inside method
        Returns:
             Always False: Modifies `self.voxel_grid_scaffold` member.
        """
        planes = []
        # pyre-ignore[16]
        points = self.voxel_grid_scaffold.get_grid_points(epoch=epoch)
        chunk_size = (
            self.scaffold_occupancy_chunk_size
            if type(self.scaffold_occupancy_chunk_size) == int
            else points.shape[-1]
        )
        for k in range(0, points.shape[-1], chunk_size):
            points_in_planes = points[..., k : k + chunk_size]
            planes.append(self.get_density(points_in_planes)[..., 0])
        density_cube = torch.cat(planes, dim=-1)
        density_cube = torch.nn.functional.max_pool3d(
            density_cube[None, None],
            kernel_size=self.scaffold_max_pool_kernel_size,
            padding=self.scaffold_max_pool_kernel_size // 2,
            stride=1,
        )
        occupancy_cube = density_cube > self.scaffold_empty_space_threshold
        # pyre-ignore[16]
        self.voxel_grid_scaffold.params["voxel_grid"] = occupancy_cube.float()
        # pyre-ignore[16]
        self._scaffold_ready = True
        return False
    @classmethod
    def decoder_density_tweak_args(cls, type, args: DictConfig) -> None:
        args.pop("input_dim", None)
    def create_decoder_density_impl(self, type, args: DictConfig) -> None:
        """
        Decoding functions come after harmonic embedding and voxel grid. In order to not
        calculate the input dimension of the decoder in the config file this function
        calculates the required input dimension and sets the input dimension of the
        decoding function to this value.
        """
        grid_args = self.voxel_grid_density_args
        # pyre-ignore[6]
        grid_output_dim = VoxelGridModule.get_output_dim(grid_args)
        embedder_args = self.harmonic_embedder_xyz_density_args
        input_dim = HarmonicEmbedding.get_output_dim_static(
            grid_output_dim,
            embedder_args["n_harmonic_functions"],
            embedder_args["append_input"],
        )
        cls = registry.get(DecoderFunctionBase, type)
        need_input_dim = any(field.name == "input_dim" for field in fields(cls))
        if need_input_dim:
            self.decoder_density = cls(input_dim=input_dim, **args)
        else:
            self.decoder_density = cls(**args)
    @classmethod
    def decoder_color_tweak_args(cls, type, args: DictConfig) -> None:
        args.pop("input_dim", None)
    def create_decoder_color_impl(self, type, args: DictConfig) -> None:
        """
        Decoding functions come after harmonic embedding and voxel grid. In order to not
        calculate the input dimension of the decoder in the config file this function
        calculates the required input dimension and sets the input dimension of the
        decoding function to this value.
        """
        grid_args = self.voxel_grid_color_args
        # pyre-ignore[6]
        grid_output_dim = VoxelGridModule.get_output_dim(grid_args)
        embedder_args = self.harmonic_embedder_xyz_color_args
        input_dim0 = HarmonicEmbedding.get_output_dim_static(
            grid_output_dim,
            embedder_args["n_harmonic_functions"],
            embedder_args["append_input"],
        )
        dir_dim = 3
        embedder_args = self.harmonic_embedder_dir_color_args
        input_dim1 = HarmonicEmbedding.get_output_dim_static(
            dir_dim,
            embedder_args["n_harmonic_functions"],
            embedder_args["append_input"],
        )
        input_dim = input_dim0 + input_dim1
        cls = registry.get(DecoderFunctionBase, type)
        need_input_dim = any(field.name == "input_dim" for field in fields(cls))
        if need_input_dim:
            self.decoder_color = cls(input_dim=input_dim, **args)
        else:
            self.decoder_color = cls(**args)
    def _create_voxel_grid_scaffold(self) -> VoxelGridModule:
        """
        Creates object to become self.voxel_grid_scaffold:
            -  makes `self.voxel_grid_scaffold` have same world to local mapping as
                    `self.voxel_grid_density`
        """
        return VoxelGridModule(
            # pyre-ignore[29]
            extents=self.voxel_grid_density_args["extents"],
            # pyre-ignore[29]
            translation=self.voxel_grid_density_args["translation"],
            voxel_grid_class_type="FullResolutionVoxelGrid",
            hold_voxel_grid_as_parameters=False,
            voxel_grid_FullResolutionVoxelGrid_args={
                "resolution_changes": {0: self.scaffold_resolution},
                "padding": "zeros",
                "align_corners": True,
                "mode": "trilinear",
            },
        )
--- a/tests/implicitron/test_voxel_grid_implicit_function.py
+++ b/tests/implicitron/test_voxel_grid_implicit_function.py
@ -0,0 +1,227 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 import unittest
 import torch
 from omegaconf import DictConfig, OmegaConf
 from pytorch3d.implicitron.models.implicit_function.voxel_grid_implicit_function import (
    VoxelGridImplicitFunction,
 )
 from pytorch3d.implicitron.models.renderer.base import ImplicitronRayBundle
 from pytorch3d.implicitron.tools.config import expand_args_fields, get_default_args
 from pytorch3d.renderer import ray_bundle_to_ray_points
 from tests.common_testing import TestCaseMixin
 class TestVoxelGridImplicitFunction(TestCaseMixin, unittest.TestCase):
    def setUp(self) -> None:
        torch.manual_seed(42)
        expand_args_fields(VoxelGridImplicitFunction)
    def _get_simple_implicit_function(self, scaffold_res=16):
        default_cfg = get_default_args(VoxelGridImplicitFunction)
        custom_cfg = DictConfig(
            {
                "voxel_grid_density_args": {
                    "voxel_grid_FullResolutionVoxelGrid_args": {"n_features": 7}
                },
                "decoder_density_class_type": "ElementwiseDecoder",
                "decoder_color_class_type": "MLPDecoder",
                "decoder_color_MLPDecoder_args": {
                    "network_args": {
                        "n_layers": 2,
                        "output_dim": 3,
                        "hidden_dim": 128,
                    }
                },
                "scaffold_resolution": (scaffold_res, scaffold_res, scaffold_res),
            }
        )
        cfg = OmegaConf.merge(default_cfg, custom_cfg)
        return VoxelGridImplicitFunction(**cfg)
    def test_forward(self) -> None:
        """
        Test one forward of VoxelGridImplicitFunction.
        """
        func = self._get_simple_implicit_function()
        n_grids, n_points = 10, 9
        raybundle = ImplicitronRayBundle(
            origins=torch.randn(n_grids, 2, 3, 3),
            directions=torch.randn(n_grids, 2, 3, 3),
            lengths=torch.randn(n_grids, 2, 3, n_points),
            xys=0,
        )
        func(raybundle)
    def test_scaffold_formation(self):
        """
        Test calculating the scaffold.
        We define a custom density function and make the implicit function use it
        After calculating the scaffold we compare the density of our custom
        density function with densities from the scaffold.
        """
        device = "cuda" if torch.cuda.is_available() else "cpu"
        func = self._get_simple_implicit_function().to(device)
        func.scaffold_max_pool_kernel_size = 1
        def new_density(points):
            """
            Density function which returns 1 if p>(0.5, 0.5, 0.5) or
            p < (-0.5, -0.5, -0.5) else 0
            """
            inshape = points.shape
            points = points.view(-1, 3)
            out = []
            for p in points:
                if torch.all(p > 0.5) or torch.all(p < -0.5):
                    out.append(torch.tensor([[1.0]]))
                else:
                    out.append(torch.tensor([[0.0]]))
            return torch.cat(out).view(*inshape[:-1], 1).to(device)
        func.get_density = new_density
        func._get_scaffold(0)
        points = torch.tensor(
            [
                [0, 0, 0],
                [1, 1, 1],
                [1, 0, 0],
                [0.1, 0, 0],
                [10, 1, -1],
                [-0.8, -0.7, -0.9],
            ]
        ).to(device)
        expected = new_density(points).float().to(device)
        assert torch.allclose(func.voxel_grid_scaffold(points), expected), (
            func.voxel_grid_scaffold(points),
            expected,
        )
    def test_scaffold_filtering(self, n_test_points=100):
        """
        Test that filtering points with scaffold works.
        We define a scaffold and make the implicit function use it. We also
        define new density and color functions which check that all passed
        points are not in empty space (with scaffold function). In the end
        we compare the result from the implicit function with one calculated
        simple python, this checks that the points were merged correectly.
        """
        device = "cuda"
        func = self._get_simple_implicit_function().to(device)
        def scaffold(points):
            """'
            Function to deterministically and randomly enough assign a point
            to empty or occupied space.
            Return 1 if second digit of sum after 0 is odd else 0
            """
            return (
                ((points.sum(dim=-1, keepdim=True) * 10**2 % 10).long() % 2) == 1
            ).float()
        def new_density(points):
            # check if all passed points should be passed here
            assert torch.all(scaffold(points)), (scaffold(points), points.shape)
            return points.sum(dim=-1, keepdim=True)
        def new_color(points, camera, directions):
            # check if all passed points should be passed here
            assert torch.all(scaffold(points))  # , (scaffold(points), points)
            return points * 2
        # check both computation paths that they contain only points
        # which are not in empty space
        func.get_density = new_density
        func.get_color = new_color
        func.voxel_grid_scaffold.forward = scaffold
        func._scaffold_ready = True
        bundle = ImplicitronRayBundle(
            origins=torch.rand((n_test_points, 2, 1, 3), device=device),
            directions=torch.rand((n_test_points, 2, 1, 3), device=device),
            lengths=torch.rand((n_test_points, 2, 1, 4), device=device),
            xys=None,
        )
        points = ray_bundle_to_ray_points(bundle)
        result_density, result_color, _ = func(bundle)
        # construct the wanted result 'by hand'
        flat_points = points.view(-1, 3)
        expected_result_density, expected_result_color = [], []
        for point in flat_points:
            if scaffold(point) == 1:
                expected_result_density.append(point.sum(dim=-1, keepdim=True))
                expected_result_color.append(point * 2)
            else:
                expected_result_density.append(point.new_zeros((1,)))
                expected_result_color.append(point.new_zeros((3,)))
        expected_result_density = torch.stack(expected_result_density, dim=0).view(
            *points.shape[:-1], 1
        )
        expected_result_color = torch.stack(expected_result_color, dim=0).view(
            *points.shape[:-1], 3
        )
        # check that thre result is expected
        assert torch.allclose(result_density, expected_result_density), (
            result_density,
            expected_result_density,
        )
        assert torch.allclose(result_color, expected_result_color), (
            result_color,
            expected_result_color,
        )
    def test_cropping(self, scaffold_res=9):
        """
        Tests whether implicit function finds the bounding box of the object and sends
        correct min and max points to voxel grids for rescaling.
        """
        device = "cuda" if torch.cuda.is_available() else "cpu"
        func = self._get_simple_implicit_function(scaffold_res=scaffold_res).to(device)
        assert scaffold_res >= 8
        div = (scaffold_res - 1) / 2
        true_min_point = torch.tensor(
            [-3 / div, 0 / div, -3 / div],
            device=device,
        )
        true_max_point = torch.tensor(
            [1 / div, 2 / div, 3 / div],
            device=device,
        )
        def new_scaffold(points):
            # 1 if between true_min and true_max point else 0
            # return points.new_ones((*points.shape[:-1], 1))
            return (
                torch.logical_and(true_min_point <= points, points <= true_max_point)
                .all(dim=-1)
                .float()[..., None]
            )
        called_crop = []
        def assert_min_max_points(min_point, max_point):
            called_crop.append(1)
            self.assertClose(min_point, true_min_point)
            self.assertClose(max_point, true_max_point)
        func.voxel_grid_density.crop_self = assert_min_max_points
        func.voxel_grid_color.crop_self = assert_min_max_points
        func.voxel_grid_scaffold.forward = new_scaffold
        func._scaffold_ready = True
        func._crop(epoch=0)
        assert len(called_crop) == 2