diff --git a/pytorch3d/renderer/cameras.py b/pytorch3d/renderer/cameras.py index 15057883..22887b1e 100644 --- a/pytorch3d/renderer/cameras.py +++ b/pytorch3d/renderer/cameras.py @@ -42,15 +42,16 @@ class CamerasBase(TensorProperties): It defines methods that are common to all camera models: - `get_camera_center` that returns the optical center of the camera in - world coordinates + world coordinates - `get_world_to_view_transform` which returns a 3D transform from - world coordinates to the camera view coordinates (R, T) + world coordinates to the camera view coordinates (R, T) - `get_full_projection_transform` which composes the projection - transform (P) with the world-to-view transform (R, T) + transform (P) with the world-to-view transform (R, T) - `transform_points` which takes a set of input points in world coordinates and - projects to NDC coordinates ranging from [-1, -1, znear] to [+1, +1, zfar]. + projects to NDC coordinates ranging from [-1, -1, znear] to [+1, +1, zfar]. - `transform_points_screen` which takes a set of input points in world coordinates and - projects them to the screen coordinates ranging from [0, 0, znear] to [W-1, H-1, zfar] + projects them to the screen coordinates ranging from + [0, 0, znear] to [W-1, H-1, zfar] For each new camera, one should implement the `get_projection_transform` routine that returns the mapping from camera view coordinates to NDC coordinates. @@ -268,6 +269,12 @@ class CamerasBase(TensorProperties): other = cam_type(device=self.device) return super().clone(other) + def is_perspective(self): + raise NotImplementedError() + + def get_znear(self): + return self.znear if hasattr(self, "znear") else None + ############################################################ # Field of View Camera Classes # @@ -534,6 +541,9 @@ class FoVPerspectiveCameras(CamerasBase): unprojection_transform = to_ndc_transform.inverse() return unprojection_transform.transform_points(xy_sdepth) + def is_perspective(self): + return True + def OpenGLOrthographicCameras( znear=1.0, @@ -752,6 +762,9 @@ class FoVOrthographicCameras(CamerasBase): unprojection_transform = to_ndc_transform.inverse() return unprojection_transform.transform_points(xy_sdepth) + def is_perspective(self): + return False + ############################################################ # MultiView Camera Classes # @@ -927,6 +940,9 @@ class PerspectiveCameras(CamerasBase): ) return unprojection_transform.transform_points(xy_inv_depth) + def is_perspective(self): + return True + def SfMOrthographicCameras( focal_length=1.0, principal_point=((0.0, 0.0),), R=_R, T=_T, device="cpu" @@ -1086,6 +1102,9 @@ class OrthographicCameras(CamerasBase): unprojection_transform = to_ndc_transform.inverse() return unprojection_transform.transform_points(xy_depth) + def is_perspective(self): + return False + ################################################ # Helper functions for cameras # diff --git a/pytorch3d/renderer/mesh/rasterizer.py b/pytorch3d/renderer/mesh/rasterizer.py index 13ffe184..3042b071 100644 --- a/pytorch3d/renderer/mesh/rasterizer.py +++ b/pytorch3d/renderer/mesh/rasterizer.py @@ -139,8 +139,13 @@ class MeshRasterizer(nn.Module): if clip_barycentric_coords is None: clip_barycentric_coords = raster_settings.blur_radius > 0.0 - # TODO(jcjohns): Should we try to set perspective_correct automatically - # based on the type of the camera? + # If not specified, infer perspective_correct from the camera + cameras = kwargs.get("cameras", self.cameras) + if raster_settings.perspective_correct is not None: + perspective_correct = raster_settings.perspective_correct + else: + perspective_correct = cameras.is_perspective() + pix_to_face, zbuf, bary_coords, dists = rasterize_meshes( meshes_screen, image_size=raster_settings.image_size, @@ -148,7 +153,7 @@ class MeshRasterizer(nn.Module): faces_per_pixel=raster_settings.faces_per_pixel, bin_size=raster_settings.bin_size, max_faces_per_bin=raster_settings.max_faces_per_bin, - perspective_correct=raster_settings.perspective_correct, + perspective_correct=perspective_correct, clip_barycentric_coords=clip_barycentric_coords, cull_backfaces=raster_settings.cull_backfaces, ) diff --git a/tests/test_cameras.py b/tests/test_cameras.py index 891ac3ab..f401a09d 100644 --- a/tests/test_cameras.py +++ b/tests/test_cameras.py @@ -794,6 +794,11 @@ class TestFoVPerspectiveProjection(TestCaseMixin, unittest.TestCase): new_points = cam.transform_points(points) self.assertClose(new_points, projected_points) + def test_perspective_type(self): + cam = FoVPerspectiveCameras(znear=1.0, zfar=10.0, fov=60.0) + self.assertTrue(cam.is_perspective()) + self.assertEquals(cam.get_znear(), 1.0) + ############################################################ # FoVOrthographic Camera # @@ -885,6 +890,11 @@ class TestFoVOrthographicProjection(TestCaseMixin, unittest.TestCase): ) self.assertClose(scale_grad, grad_scale) + def test_perspective_type(self): + cam = FoVOrthographicCameras(znear=1.0, zfar=10.0) + self.assertFalse(cam.is_perspective()) + self.assertEquals(cam.get_znear(), 1.0) + ############################################################ # Orthographic Camera # @@ -937,6 +947,11 @@ class TestOrthographicProjection(TestCaseMixin, unittest.TestCase): v1 = P.transform_points(vertices) self.assertClose(v1, projected_verts) + def test_perspective_type(self): + cam = OrthographicCameras(focal_length=5.0, principal_point=((2.5, 2.5),)) + self.assertFalse(cam.is_perspective()) + self.assertEquals(cam.get_znear(), None) + ############################################################ # Perspective Camera # @@ -983,3 +998,8 @@ class TestPerspectiveProjection(TestCaseMixin, unittest.TestCase): v1 = P.transform_points(vertices) v2 = sfm_perspective_project_naive(vertices, fx=2.0, fy=2.0, p0x=2.5, p0y=3.5) self.assertClose(v1, v2, atol=1e-6) + + def test_perspective_type(self): + cam = PerspectiveCameras(focal_length=5.0, principal_point=((2.5, 2.5),)) + self.assertTrue(cam.is_perspective()) + self.assertEquals(cam.get_znear(), None) diff --git a/tests/test_render_implicit.py b/tests/test_render_implicit.py index 0884a818..c4f0cb5e 100644 --- a/tests/test_render_implicit.py +++ b/tests/test_render_implicit.py @@ -242,7 +242,10 @@ class TestRenderImplicit(TestCaseMixin, unittest.TestCase): rasterizer=MeshRasterizer( cameras=cameras_randomized, raster_settings=RasterizationSettings( - image_size=image_size, blur_radius=1e-3, faces_per_pixel=10 + image_size=image_size, + blur_radius=1e-3, + faces_per_pixel=10, + perspective_correct=False, ), ), shader=SoftPhongShader( diff --git a/tests/test_render_meshes.py b/tests/test_render_meshes.py index 5c46bd25..81505776 100644 --- a/tests/test_render_meshes.py +++ b/tests/test_render_meshes.py @@ -500,6 +500,7 @@ class TestRenderMeshes(TestCaseMixin, unittest.TestCase): blur_radius=np.log(1.0 / 1e-4 - 1.0) * blend_params.sigma, faces_per_pixel=100, clip_barycentric_coords=True, + perspective_correct=False, ) # Load reference image @@ -844,7 +845,10 @@ class TestRenderMeshes(TestCaseMixin, unittest.TestCase): cameras = FoVPerspectiveCameras(device=device, R=R, T=T) raster_settings = RasterizationSettings( - image_size=512, blur_radius=0.0, faces_per_pixel=1 + image_size=512, + blur_radius=0.0, + faces_per_pixel=1, + perspective_correct=False, ) lights = PointLights( @@ -919,7 +923,10 @@ class TestRenderMeshes(TestCaseMixin, unittest.TestCase): R, T = look_at_view_transform(2.7, 0.0, 0.0) cameras = FoVPerspectiveCameras(device=device, R=R, T=T) raster_settings = RasterizationSettings( - image_size=512, blur_radius=0.0, faces_per_pixel=1 + image_size=512, + blur_radius=0.0, + faces_per_pixel=1, + perspective_correct=False, ) # Init shader settings