Camera inheritance + unprojections

Summary: Made a CameraBase class. Added `unproject_points` method for each camera class. Reviewed By: nikhilaravi Differential Revision: D20373602 fbshipit-source-id: 7e3da5ae420091b5fcab400a9884ef29ad7a7343
2025-08-03 04:12:48 +08:00 · 2020-04-17 04:35:56 -07:00 · 2020-04-17 04:35:56 -07:00 · 7788a38050
commit 7788a38050
parent 365945b1fd
4 changed files with 415 additions and 348 deletions
--- a/pytorch3d/renderer/init.py
+++ b/pytorch3d/renderer/init.py
@ -9,6 +9,8 @@ from .blending import (
 from .cameras import (
    OpenGLOrthographicCameras,
    OpenGLPerspectiveCameras,
    SfMOrthographicCameras,
    SfMPerspectiveCameras,
    camera_position_from_spherical_angles,
    get_world_to_view_transform,
    look_at_rotation,
--- a/pytorch3d/renderer/cameras.py
+++ b/pytorch3d/renderer/cameras.py
@ -16,7 +16,202 @@ r = np.expand_dims(np.eye(3), axis=0)  # (1, 3, 3)
 t = np.expand_dims(np.zeros(3), axis=0)  # (1, 3)
-class OpenGLPerspectiveCameras(TensorProperties):
+class CamerasBase(TensorProperties):
    """
    `CamerasBase` implements a base class for all cameras.
    It defines methods that are common to all camera models:
        - `get_camera_center` that returns the optical center of the camera in
    world coordinates
        - `get_world_to_view_transform` which returns a 3D transform from
    world coordinates to the camera coordinates
        - `get_full_projection_transform` which composes the projection
    transform with the world-to-view transform
        - `transform_points` which takes a set of input points and
    projects them onto a 2D camera plane.
    For each new camera, one should implement the `get_projection_transform`
    routine that returns the mapping from camera coordinates in world units
    to the screen coordinates.
    Another useful function that is specific to each camera model is
    `unproject_points` which sends points from screen coordinates back to
    camera or world coordinates depending on the `world_coordinates`
    boolean argument of the function.
    """
    def get_projection_transform(self):
        """
        Calculate the projective transformation matrix.
        Args:
            **kwargs: parameters for the projection can be passed in as keyword
                arguments to override the default values set in `__init__`.
        Return:
            P: a `Transform3d` object which represents a batch of projection
            matrices of shape (N, 3, 3)
        """
        raise NotImplementedError()
    def unproject_points(self):
        """
        Transform input points in screen coodinates
        to the world / camera coordinates.
        Each of the input points `xy_depth` of shape (..., 3) is
        a concatenation of the x, y location and its depth.
        For instance, for an input 2D tensor of shape `(num_points, 3)`
        `xy_depth` takes the following form:
            `xy_depth[i] = [x[i], y[i], depth[i]]`,
        for a each point at an index `i`.
        The following example demonstrates the relationship between
        `transform_points` and `unproject_points`:
        .. code-block:: python
            cameras = # camera object derived from CamerasBase
            xyz = # 3D points of shape (batch_size, num_points, 3)
            # transform xyz to the camera coordinates
            xyz_cam = cameras.get_world_to_view_transform().transform_points(xyz)
            # extract the depth of each point as the 3rd coord of xyz_cam
            depth = xyz_cam[:, :, 2:]
            # project the points xyz to the camera
            xy = cameras.transform_points(xyz)[:, :, :2]
            # append depth to xy
            xy_depth = torch.cat((xy, depth), dim=2)
            # unproject to the world coordinates
            xyz_unproj_world = cameras.unproject_points(xy_depth, world_coordinates=True)
            print(torch.allclose(xyz, xyz_unproj_world)) # True
            # unproject to the camera coordinates
            xyz_unproj = cameras.unproject_points(xy_depth, world_coordinates=False)
            print(torch.allclose(xyz_cam, xyz_unproj)) # True
        Args:
            xy_depth: torch tensor of shape (..., 3).
            world_coordinates: If `True`, unprojects the points back to world
                coordinates using the camera extrinsics `R` and `T`.
                `False` ignores `R` and `T` and unprojects to
                the camera coordinates.
        Returns
            new_points: unprojected points with the same shape as `xy_depth`.
        """
        raise NotImplementedError()
    def get_camera_center(self, **kwargs) -> torch.Tensor:
        """
        Return the 3D location of the camera optical center
        in the world coordinates.
        Args:
            **kwargs: parameters for the camera extrinsics can be passed in
                as keyword arguments to override the default values
                set in __init__.
        Setting T here will update the values set in init as this
        value may be needed later on in the rendering pipeline e.g. for
        lighting calculations.
        Returns:
            C: a batch of 3D locations of shape (N, 3) denoting
            the locations of the center of each camera in the batch.
        """
        w2v_trans = self.get_world_to_view_transform(**kwargs)
        P = w2v_trans.inverse().get_matrix()
        # the camera center is the translation component (the first 3 elements
        # of the last row) of the inverted world-to-view
        # transform (4x4 RT matrix)
        C = P[:, 3, :3]
        return C
    def get_world_to_view_transform(self, **kwargs) -> Transform3d:
        """
        Return the world-to-view transform.
        Args:
            **kwargs: parameters for the camera extrinsics can be passed in
                as keyword arguments to override the default values
                set in __init__.
        Setting R and T here will update the values set in init as these
        values may be needed later on in the rendering pipeline e.g. for
        lighting calculations.
        Returns:
            T: a Transform3d object which represents a batch of transforms
            of shape (N, 3, 3)
        """
        self.R = kwargs.get("R", self.R)  # pyre-ignore[16]
        self.T = kwargs.get("T", self.T)  # pyre-ignore[16]
        world_to_view_transform = get_world_to_view_transform(R=self.R, T=self.T)
        return world_to_view_transform
    def get_full_projection_transform(self, **kwargs) -> Transform3d:
        """
        Return the full world-to-screen transform composing the
        world-to-view and view-to-screen transforms.
        Args:
            **kwargs: parameters for the projection transforms can be passed in
                as keyword arguments to override the default values
                set in __init__.
        Setting R and T here will update the values set in init as these
        values may be needed later on in the rendering pipeline e.g. for
        lighting calculations.
        Returns:
            T: a Transform3d object which represents a batch of transforms
            of shape (N, 3, 3)
        """
        self.R = kwargs.get("R", self.R)  # pyre-ignore[16]
        self.T = kwargs.get("T", self.T)  # pyre-ignore[16]
        world_to_view_transform = self.get_world_to_view_transform(R=self.R, T=self.T)
        view_to_screen_transform = self.get_projection_transform(**kwargs)
        return world_to_view_transform.compose(view_to_screen_transform)
    def transform_points(
        self, points, eps: Optional[float] = None, **kwargs
    ) -> torch.Tensor:
        """
        Transform input points from world to screen space.
        Args:
            points: torch tensor of shape (..., 3).
            eps: If eps!=None, the argument is used to clamp the
                divisor in the homogeneous normalization of the points
                transformed to the screen space. Plese see
                `transforms.Transform3D.transform_points` for details.
                For `CamerasBase.transform_points`, setting `eps > 0`
                stabilizes gradients since it leads to avoiding division
                by excessivelly low numbers for points close to the
                camera plane.
        Returns
            new_points: transformed points with the same shape as the input.
        """
        world_to_screen_transform = self.get_full_projection_transform(**kwargs)
        return world_to_screen_transform.transform_points(points, eps=eps)
    def clone(self):
        """
        Returns a copy of `self`.
        """
        cam_type = type(self)
        other = cam_type(device=self.device)
        return super().clone(other)
 ########################
 # Specific camera classes
 ########################
 class OpenGLPerspectiveCameras(CamerasBase):
    """
    A class which stores a batch of parameters to generate a batch of
    projection matrices using the OpenGL convention for a perspective camera.
@ -97,7 +292,7 @@ class OpenGLPerspectiveCameras(TensorProperties):
                    [s1,   0,   w1,   0],
                    [0,   s2,   h1,   0],
                    [0,    0,   f1,  f2],
-                    [0,    0,   -1,   0],
+                    [0,    0,    1,   0],
            ]
        """
        znear = kwargs.get("znear", self.znear)  # pyre-ignore[16]
@ -154,97 +349,52 @@ class OpenGLPerspectiveCameras(TensorProperties):
        transform._matrix = P.transpose(1, 2).contiguous()
        return transform
-    def clone(self):
+    def unproject_points(
-        other = OpenGLPerspectiveCameras(device=self.device)
+        self,
-        return super().clone(other)
+        xy_depth: torch.Tensor,
-
+        world_coordinates: bool = True,
-    def get_camera_center(self, **kwargs):
+        scaled_depth_input: bool = False,
-        """
+        **kwargs
-        Return the 3D location of the camera optical center
+    ) -> torch.Tensor:
-        in the world coordinates.
+        """>!
        OpenGL cameras further allow for passing depth in world units
        (`scaled_depth_input=False`) or in the [0, 1]-normalized units
        (`scaled_depth_input=True`)
        Args:
-            **kwargs: parameters for the camera extrinsics can be passed in
+            scaled_depth_input: If `True`, assumes the input depth is in
-                as keyword arguments to override the default values
+                the [0, 1]-normalized units. If `False` the input depth is in
-                set in __init__.
+                the world units.
        Setting T here will update the values set in init as this
        value may be needed later on in the rendering pipeline e.g. for
        lighting calculations.
        Returns:
            C: a batch of 3D locations of shape (N, 3) denoting
            the locations of the center of each camera in the batch.
        """
        w2v_trans = self.get_world_to_view_transform(**kwargs)
        P = w2v_trans.inverse().get_matrix()
        # the camera center is the translation component (the first 3 elements
        # of the last row) of the inverted world-to-view
        # transform (4x4 RT matrix)
        C = P[:, 3, :3]
        return C
-    def get_world_to_view_transform(self, **kwargs) -> Transform3d:
+        # obtain the relevant transformation to screen
-        """
+        if world_coordinates:
-        Return the world-to-view transform.
+            to_screen_transform = self.get_full_projection_transform()
        else:
            to_screen_transform = self.get_projection_transform()
-        Args:
+        if scaled_depth_input:
-            **kwargs: parameters for the camera extrinsics can be passed in
+            # the input is scaled depth, so we don't have to do anything
-                as keyword arguments to override the default values
+            xy_sdepth = xy_depth
-                set in __init__.
+        else:
            # parse out important values from the projection matrix
            P_matrix = self.get_projection_transform(**kwargs.copy()).get_matrix()
            # parse out f1, f2 from P_matrix
            unsqueeze_shape = [1] * xy_depth.dim()
            unsqueeze_shape[0] = P_matrix.shape[0]
            f1 = P_matrix[:, 2, 2].reshape(unsqueeze_shape)
            f2 = P_matrix[:, 3, 2].reshape(unsqueeze_shape)
            # get the scaled depth
            sdepth = (f1 * xy_depth[..., 2:3] + f2) / xy_depth[..., 2:3]
            # concatenate xy + scaled depth
            xy_sdepth = torch.cat((xy_depth[..., 0:2], sdepth), dim=-1)
-        Setting R and T here will update the values set in init as these
+        # unproject with inverse of the projection
-        values may be needed later on in the rendering pipeline e.g. for
+        unprojection_transform = to_screen_transform.inverse()
-        lighting calculations.
+        return unprojection_transform.transform_points(xy_sdepth)
        Returns:
            T: a Transform3d object which represents a batch of transforms
            of shape (N, 3, 3)
        """
        self.R = kwargs.get("R", self.R)  # pyre-ignore[16]
        self.T = kwargs.get("T", self.T)  # pyre-ignore[16]
        world_to_view_transform = get_world_to_view_transform(R=self.R, T=self.T)
        return world_to_view_transform
    def get_full_projection_transform(self, **kwargs) -> Transform3d:
        """
        Return the full world-to-screen transform composing the
        world-to-view and view-to-screen transforms.
        Args:
            **kwargs: parameters for the projection transforms can be passed in
                as keyword arguments to override the default values
                set in __init__.
        Setting R and T here will update the values set in init as these
        values may be needed later on in the rendering pipeline e.g. for
        lighting calculations.
        Returns:
            T: a Transform3d object which represents a batch of transforms
            of shape (N, 3, 3)
        """
        self.R = kwargs.get("R", self.R)  # pyre-ignore[16]
        self.T = kwargs.get("T", self.T)  # pyre-ignore[16]
        world_to_view_transform = self.get_world_to_view_transform(R=self.R, T=self.T)
        view_to_screen_transform = self.get_projection_transform(**kwargs)
        return world_to_view_transform.compose(view_to_screen_transform)
    def transform_points(self, points, **kwargs) -> torch.Tensor:
        """
        Transform input points from world to screen space.
        Args:
            points: torch tensor of shape (..., 3).
        Returns
            new_points: transformed points with the same shape as the input.
        """
        world_to_screen_transform = self.get_full_projection_transform(**kwargs)
        return world_to_screen_transform.transform_points(points)
-class OpenGLOrthographicCameras(TensorProperties):
+class OpenGLOrthographicCameras(CamerasBase):
    """
    A class which stores a batch of parameters to generate a batch of
    transformation matrices using the OpenGL convention for orthographic camera.
@ -360,98 +510,48 @@ class OpenGLOrthographicCameras(TensorProperties):
        transform._matrix = P.transpose(1, 2).contiguous()
        return transform
-    def clone(self):
+    def unproject_points(
-        other = OpenGLOrthographicCameras(device=self.device)
+        self,
-        return super().clone(other)
+        xy_depth: torch.Tensor,
-
+        world_coordinates: bool = True,
-    def get_camera_center(self, **kwargs):
+        scaled_depth_input: bool = False,
-        """
+        **kwargs
-        Return the 3D location of the camera optical center
+    ) -> torch.Tensor:
-        in the world coordinates.
+        """>!
        OpenGL cameras further allow for passing depth in world units
        (`scaled_depth_input=False`) or in the [0, 1]-normalized units
        (`scaled_depth_input=True`)
        Args:
-            **kwargs: parameters for the camera extrinsics can be passed in
+            scaled_depth_input: If `True`, assumes the input depth is in
-                as keyword arguments to override the default values
+                the [0, 1]-normalized units. If `False` the input depth is in
-                set in __init__.
+                the world units.
        Setting T here will update the values set in init as this
        value may be needed later on in the rendering pipeline e.g. for
        lighting calculations.
        Returns:
            C: a batch of 3D locations of shape (N, 3) denoting
            the locations of the center of each camera in the batch.
        """
        w2v_trans = self.get_world_to_view_transform(**kwargs)
        P = w2v_trans.inverse().get_matrix()
        # The camera center is the translation component (the first 3 elements
        # of the last row) of the inverted world-to-view
        # transform (4x4 RT matrix).
        C = P[:, 3, :3]
        return C
-    def get_world_to_view_transform(self, **kwargs) -> Transform3d:
+        if world_coordinates:
-        """
+            to_screen_transform = self.get_full_projection_transform(**kwargs.copy())
-        Return the world-to-view transform.
+        else:
            to_screen_transform = self.get_projection_transform(**kwargs.copy())
-        Args:
+        if scaled_depth_input:
-            **kwargs: parameters for the camera extrinsics can be passed in
+            # the input depth is already scaled
-                as keyword arguments to override the default values
+            xy_sdepth = xy_depth
-                set in __init__.
+        else:
-
+            # we have to obtain the scaled depth first
-        Setting R and T here will update the values set in init as these
+            P = self.get_projection_transform(**kwargs).get_matrix()
-        values may be needed later on in the rendering pipeline e.g. for
+            unsqueeze_shape = [1] * P.dim()
-        lighting calculations.
+            unsqueeze_shape[0] = P.shape[0]
-
+            mid_z = P[:, 3, 2].reshape(unsqueeze_shape)
-        Returns:
+            scale_z = P[:, 2, 2].reshape(unsqueeze_shape)
-            T: a Transform3d object which represents a batch of transforms
+            scaled_depth = scale_z * xy_depth[..., 2:3] + mid_z
-            of shape (N, 3, 3)
+            # cat xy and scaled depth
-        """
+            xy_sdepth = torch.cat((xy_depth[..., :2], scaled_depth), dim=-1)
-        self.R = kwargs.get("R", self.R)  # pyre-ignore[16]
+        # finally invert the transform
-        self.T = kwargs.get("T", self.T)  # pyre-ignore[16]
+        unprojection_transform = to_screen_transform.inverse()
-        world_to_view_transform = get_world_to_view_transform(R=self.R, T=self.T)
+        return unprojection_transform.transform_points(xy_sdepth)
        return world_to_view_transform
    def get_full_projection_transform(self, **kwargs) -> Transform3d:
        """
        Return the full world-to-screen transform composing the
        world-to-view and view-to-screen transforms.
        Args:
            **kwargs: parameters for the projection transforms can be passed in
                as keyword arguments to override the default values
                set in `__init__`.
        Setting R and T here will update the values set in init as these
        values may be needed later on in the rendering pipeline e.g. for
        lighting calculations.
        Returns:
            T: a Transform3d object which represents a batch of transforms
            of shape (N, 3, 3)
        """
        self.R = kwargs.get("R", self.R)  # pyre-ignore[16]
        self.T = kwargs.get("T", self.T)  # pyre-ignore[16]
        world_to_view_transform = self.get_world_to_view_transform(R=self.R, T=self.T)
        view_to_screen_transform = self.get_projection_transform(**kwargs)
        return world_to_view_transform.compose(view_to_screen_transform)
    def transform_points(self, points, **kwargs) -> torch.Tensor:
        """
        Transform input points from world to screen space.
        Args:
            points: torch tensor of shape (..., 3).
        Returns
            new_points: transformed points with the same shape as the input.
        """
        world_to_screen_transform = self.get_full_projection_transform(**kwargs)
        return world_to_screen_transform.transform_points(points)
-class SfMPerspectiveCameras(TensorProperties):
+class SfMPerspectiveCameras(CamerasBase):
    """
    A class which stores a batch of parameters to generate a batch of
    transformation matrices using the multi-view geometry convention for
@ -495,7 +595,7 @@ class SfMPerspectiveCameras(TensorProperties):
                arguments to override the default values set in __init__.
        Returns:
-            P: a batch of projection matrices of shape (N, 4, 4)
+            P: A `Transform3d` object with a batch of `N` projection transforms.
        .. code-block:: python
@ -524,93 +624,22 @@ class SfMPerspectiveCameras(TensorProperties):
        transform._matrix = P.transpose(1, 2).contiguous()
        return transform
-    def clone(self):
+    def unproject_points(
-        other = SfMPerspectiveCameras(device=self.device)
+        self, xy_depth: torch.Tensor, world_coordinates: bool = True, **kwargs
-        return super().clone(other)
+    ) -> torch.Tensor:
        if world_coordinates:
            to_screen_transform = self.get_full_projection_transform(**kwargs)
        else:
            to_screen_transform = self.get_projection_transform(**kwargs)
-    def get_camera_center(self, **kwargs):
+        unprojection_transform = to_screen_transform.inverse()
-        """
+        xy_inv_depth = torch.cat(
-        Return the 3D location of the camera optical center
+            (xy_depth[..., :2], 1.0 / xy_depth[..., 2:3]), dim=-1  # type: ignore
-        in the world coordinates.
+        )
-
+        return unprojection_transform.transform_points(xy_inv_depth)
        Args:
            **kwargs: parameters for the camera extrinsics can be passed in
                as keyword arguments to override the default values
                set in __init__.
        Setting T here will update the values set in init as this
        value may be needed later on in the rendering pipeline e.g. for
        lighting calculations.
        Returns:
            C: a batch of 3D locations of shape (N, 3) denoting
            the locations of the center of each camera in the batch.
        """
        w2v_trans = self.get_world_to_view_transform(**kwargs)
        P = w2v_trans.inverse().get_matrix()
        # the camera center is the translation component (the first 3 elements
        # of the last row) of the inverted world-to-view
        # transform (4x4 RT matrix)
        C = P[:, 3, :3]
        return C
    def get_world_to_view_transform(self, **kwargs) -> Transform3d:
        """
        Return the world-to-view transform.
        Args:
            **kwargs: parameters for the camera extrinsics can be passed in
                as keyword arguments to override the default values
                set in __init__.
        Setting R and T here will update the values set in init as these
        values may be needed later on in the rendering pipeline e.g. for
        lighting calculations.
        Returns:
            T: a Transform3d object which represents a batch of transforms
            of shape (N, 3, 3)
        """
        self.R = kwargs.get("R", self.R)  # pyre-ignore[16]
        self.T = kwargs.get("T", self.T)  # pyre-ignore[16]
        world_to_view_transform = get_world_to_view_transform(R=self.R, T=self.T)
        return world_to_view_transform
    def get_full_projection_transform(self, **kwargs) -> Transform3d:
        """
        Return the full world-to-screen transform composing the
        world-to-view and view-to-screen transforms.
        Args:
            **kwargs: parameters for the projection transforms can be passed in
                as keyword arguments to override the default values
                set in __init__.
        Setting R and T here will update the values set in init as these
        values may be needed later on in the rendering pipeline e.g. for
        lighting calculations.
        """
        self.R = kwargs.get("R", self.R)  # pyre-ignore[16]
        self.T = kwargs.get("T", self.T)  # pyre-ignore[16]
        world_to_view_transform = self.get_world_to_view_transform(R=self.R, T=self.T)
        view_to_screen_transform = self.get_projection_transform(**kwargs)
        return world_to_view_transform.compose(view_to_screen_transform)
    def transform_points(self, points, **kwargs) -> torch.Tensor:
        """
        Transform input points from world to screen space.
        Args:
            points: torch tensor of shape (..., 3).
        Returns
            new_points: transformed points with the same shape as the input.
        """
        world_to_screen_transform = self.get_full_projection_transform(**kwargs)
        return world_to_screen_transform.transform_points(points)
-class SfMOrthographicCameras(TensorProperties):
+class SfMOrthographicCameras(CamerasBase):
    """
    A class which stores a batch of parameters to generate a batch of
    transformation matrices using the multi-view geometry convention for
@ -653,8 +682,8 @@ class SfMOrthographicCameras(TensorProperties):
            **kwargs: parameters for the projection can be passed in as keyword
                arguments to override the default values set in __init__.
-        Return:
+        Returns:
-            P: a batch of projection matrices of shape (N, 4, 4)
+            P: A `Transform3d` object with a batch of `N` projection transforms.
        .. code-block:: python
@ -683,90 +712,16 @@ class SfMOrthographicCameras(TensorProperties):
        transform._matrix = P.transpose(1, 2).contiguous()
        return transform
-    def clone(self):
+    def unproject_points(
-        other = SfMOrthographicCameras(device=self.device)
+        self, xy_depth: torch.Tensor, world_coordinates: bool = True, **kwargs
-        return super().clone(other)
+    ) -> torch.Tensor:
        if world_coordinates:
            to_screen_transform = self.get_full_projection_transform(**kwargs)
        else:
            to_screen_transform = self.get_projection_transform(**kwargs)
-    def get_camera_center(self, **kwargs):
+        unprojection_transform = to_screen_transform.inverse()
-        """
+        return unprojection_transform.transform_points(xy_depth)
        Return the 3D location of the camera optical center
        in the world coordinates.
        Args:
            **kwargs: parameters for the camera extrinsics can be passed in
                as keyword arguments to override the default values
                set in __init__.
        Setting T here will update the values set in init as this
        value may be needed later on in the rendering pipeline e.g. for
        lighting calculations.
        Returns:
            C: a batch of 3D locations of shape (N, 3) denoting
            the locations of the center of each camera in the batch.
        """
        w2v_trans = self.get_world_to_view_transform(**kwargs)
        P = w2v_trans.inverse().get_matrix()
        # the camera center is the translation component (the first 3 elements
        # of the last row) of the inverted world-to-view
        # transform (4x4 RT matrix)
        C = P[:, 3, :3]
        return C
    def get_world_to_view_transform(self, **kwargs) -> Transform3d:
        """
        Return the world-to-view transform.
        Args:
            **kwargs: parameters for the camera extrinsics can be passed in
                as keyword arguments to override the default values
                set in __init__.
        Setting R and T here will update the values set in init as these
        values may be needed later on in the rendering pipeline e.g. for
        lighting calculations.
        Returns:
            T: a Transform3d object which represents a batch of transforms
            of shape (N, 3, 3)
        """
        self.R = kwargs.get("R", self.R)  # pyre-ignore[16]
        self.T = kwargs.get("T", self.T)  # pyre-ignore[16]
        world_to_view_transform = get_world_to_view_transform(R=self.R, T=self.T)
        return world_to_view_transform
    def get_full_projection_transform(self, **kwargs) -> Transform3d:
        """
        Return the full world-to-screen transform composing the
        world-to-view and view-to-screen transforms.
        Args:
            **kwargs: parameters for the projection transforms can be passed in
                as keyword arguments to override the default values
                set in `__init__`.
        Setting R and T here will update the values set in init as these
        values may be needed later on in the rendering pipeline e.g. for
        lighting calculations.
        """
        self.R = kwargs.get("R", self.R)  # pyre-ignore[16]
        self.T = kwargs.get("T", self.T)  # pyre-ignore[16]
        world_to_view_transform = self.get_world_to_view_transform(R=self.R, T=self.T)
        view_to_screen_transform = self.get_projection_transform(**kwargs)
        return world_to_view_transform.compose(view_to_screen_transform)
    def transform_points(self, points, **kwargs) -> torch.Tensor:
        """
        Transform input points from world to screen space.
        Args:
            points: torch tensor of shape (..., 3).
        Returns
            new_points: transformed points with the same shape as the input.
        """
        world_to_screen_transform = self.get_full_projection_transform(**kwargs)
        return world_to_screen_transform.transform_points(points)
 # SfMCameras helper
--- a/pytorch3d/renderer/utils.py
+++ b/pytorch3d/renderer/utils.py
@ -1,6 +1,8 @@
 # Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.
 import copy
 import inspect
 import warnings
 from typing import Any, Union
@ -168,10 +170,13 @@ class TensorProperties(object):
        """
        for k in dir(self):
            v = getattr(self, k)
-            if k == "device":
+            if inspect.ismethod(v) or k.startswith("__"):
-                setattr(self, k, v)
+                continue
            if torch.is_tensor(v):
-                setattr(other, k, v.clone())
+                v_clone = v.clone()
            else:
                v_clone = copy.deepcopy(v)
            setattr(other, k, v_clone)
        return other
    def gather_props(self, batch_idx):
--- a/tests/test_cameras.py
+++ b/tests/test_cameras.py
@ -32,6 +32,7 @@ import numpy as np
 import torch
 from common_testing import TestCaseMixin
 from pytorch3d.renderer.cameras import (
    CamerasBase,
    OpenGLOrthographicCameras,
    OpenGLPerspectiveCameras,
    SfMOrthographicCameras,
@ -347,6 +348,8 @@ class TestCameraHelpers(TestCaseMixin, unittest.TestCase):
        RT = get_world_to_view_transform(R=R, T=T)
        self.assertTrue(isinstance(RT, Transform3d))
 class TestCamerasCommon(TestCaseMixin, unittest.TestCase):
    def test_view_transform_class_method(self):
        T = torch.tensor([0.0, 0.0, -1.0], requires_grad=True).view(1, -1)
        R = look_at_rotation(T)
@ -377,6 +380,108 @@ class TestCameraHelpers(TestCaseMixin, unittest.TestCase):
            C_ = -torch.bmm(R, T[:, :, None])[:, :, 0]
            self.assertTrue(torch.allclose(C, C_, atol=1e-05))
    @staticmethod
    def init_random_cameras(cam_type: CamerasBase, batch_size: int):
        cam_params = {}
        T = torch.randn(batch_size, 3) * 0.03
        T[:, 2] = 4
        R = so3_exponential_map(torch.randn(batch_size, 3) * 3.0)
        cam_params = {"R": R, "T": T}
        if cam_type in (OpenGLPerspectiveCameras, OpenGLOrthographicCameras):
            cam_params["znear"] = torch.rand(batch_size) * 10 + 0.1
            cam_params["zfar"] = torch.rand(batch_size) * 4 + 1 + cam_params["znear"]
            if cam_type == OpenGLPerspectiveCameras:
                cam_params["fov"] = torch.rand(batch_size) * 60 + 30
                cam_params["aspect_ratio"] = torch.rand(batch_size) * 0.5 + 0.5
            else:
                cam_params["top"] = torch.rand(batch_size) * 0.2 + 0.9
                cam_params["bottom"] = -torch.rand(batch_size) * 0.2 - 0.9
                cam_params["left"] = -torch.rand(batch_size) * 0.2 - 0.9
                cam_params["right"] = torch.rand(batch_size) * 0.2 + 0.9
        elif cam_type in (SfMOrthographicCameras, SfMPerspectiveCameras):
            cam_params["focal_length"] = torch.rand(batch_size) * 10 + 0.1
            cam_params["principal_point"] = torch.randn((batch_size, 2))
        else:
            raise ValueError(str(cam_type))
        return cam_type(**cam_params)
    def test_unproject_points(self, batch_size=50, num_points=100):
        """
        Checks that an unprojection of a randomly projected point cloud
        stays the same.
        """
        for cam_type in (
            SfMOrthographicCameras,
            OpenGLPerspectiveCameras,
            OpenGLOrthographicCameras,
            SfMPerspectiveCameras,
        ):
            # init the cameras
            cameras = TestCamerasCommon.init_random_cameras(cam_type, batch_size)
            # xyz - the ground truth point cloud
            xyz = torch.randn(batch_size, num_points, 3) * 0.3
            # xyz in camera coordinates
            xyz_cam = cameras.get_world_to_view_transform().transform_points(xyz)
            # depth = z-component of xyz_cam
            depth = xyz_cam[:, :, 2:]
            # project xyz
            xyz_proj = cameras.transform_points(xyz)
            xy, cam_depth = xyz_proj.split(2, dim=2)
            # input to the unprojection function
            xy_depth = torch.cat((xy, depth), dim=2)
            for to_world in (False, True):
                if to_world:
                    matching_xyz = xyz
                else:
                    matching_xyz = xyz_cam
                # if we have OpenGL cameras
                # test for scaled_depth_input=True/False
                if cam_type in (OpenGLPerspectiveCameras, OpenGLOrthographicCameras):
                    for scaled_depth_input in (True, False):
                        if scaled_depth_input:
                            xy_depth_ = xyz_proj
                        else:
                            xy_depth_ = xy_depth
                        xyz_unproj = cameras.unproject_points(
                            xy_depth_,
                            world_coordinates=to_world,
                            scaled_depth_input=scaled_depth_input,
                        )
                        self.assertTrue(
                            torch.allclose(xyz_unproj, matching_xyz, atol=1e-4)
                        )
                else:
                    xyz_unproj = cameras.unproject_points(
                        xy_depth, world_coordinates=to_world
                    )
                    self.assertTrue(torch.allclose(xyz_unproj, matching_xyz, atol=1e-4))
    def test_clone(self, batch_size: int = 10):
        """
        Checks the clone function of the cameras.
        """
        for cam_type in (
            SfMOrthographicCameras,
            OpenGLPerspectiveCameras,
            OpenGLOrthographicCameras,
            SfMPerspectiveCameras,
        ):
            cameras = TestCamerasCommon.init_random_cameras(cam_type, batch_size)
            cameras = cameras.to(torch.device("cpu"))
            cameras_clone = cameras.clone()
            for var in cameras.__dict__.keys():
                val = getattr(cameras, var)
                val_clone = getattr(cameras_clone, var)
                if torch.is_tensor(val):
                    self.assertClose(val, val_clone)
                    self.assertSeparate(val, val_clone)
                else:
                    self.assertTrue(val == val_clone)
 class TestPerspectiveProjection(TestCaseMixin, unittest.TestCase):
    def test_perspective(self):
@ -679,4 +784,4 @@ class TestSfMPerspectiveProjection(TestCaseMixin, unittest.TestCase):
        vertices = torch.randn([3, 4, 3], dtype=torch.float32)
        v1 = P.transform_points(vertices)
        v2 = sfm_perspective_project_naive(vertices, fx=2.0, fy=2.0, p0x=2.5, p0y=3.5)
-        self.assertClose(v1, v2)
+        self.assertClose(v1, v2, atol=1e-6)