mirror of
https://github.com/facebookresearch/pytorch3d.git
synced 2025-12-19 05:40:34 +08:00
NDC/screen cameras API fix, compatibility with renderer
Summary: API fix for NDC/screen cameras and compatibility with PyTorch3D renderers. With this new fix: * Users can define cameras and `transform_points` under any coordinate system conventions. The transformation applies the camera K and RT to the input points, not regarding for PyTorch3D conventions. So this makes cameras completely independent from PyTorch3D renderer. * Cameras can be defined either in NDC space or screen space. For existing ones, FoV cameras are in NDC space. Perspective/Orthographic can be defined in NDC or screen space. * The interface with PyTorch3D renderers happens through `transform_points_ndc` which transforms points to the NDC space and assumes that input points are provided according to PyTorch3D conventions. * Similarly, `transform_points_screen` transforms points to screen space and again assumes that input points are under PyTorch3D conventions. * For Orthographic/Perspective cameras, if they are defined in screen space, the `get_ndc_camera_transform` allows points to be converted to NDC for use for the renderers. Reviewed By: nikhilaravi Differential Revision: D26932657 fbshipit-source-id: 1a964e3e7caa54d10c792cf39c4d527ba2fb2e79
This commit is contained in:
committed by
Facebook GitHub Bot
parent
9a14f54e8b
commit
0c32f094af
@@ -124,17 +124,23 @@ def ndc_to_screen_points_naive(points, imsize):
|
||||
Transforms points from PyTorch3D's NDC space to screen space
|
||||
Args:
|
||||
points: (N, V, 3) representing padded points
|
||||
imsize: (N, 2) image size = (width, height)
|
||||
imsize: (N, 2) image size = (height, width)
|
||||
Returns:
|
||||
(N, V, 3) tensor of transformed points
|
||||
"""
|
||||
imwidth, imheight = imsize.unbind(1)
|
||||
imwidth = imwidth.view(-1, 1)
|
||||
imheight = imheight.view(-1, 1)
|
||||
height, width = imsize.unbind(1)
|
||||
width = width.view(-1, 1)
|
||||
half_width = (width - 1.0) / 2.0
|
||||
height = height.view(-1, 1)
|
||||
half_height = (height - 1.0) / 2.0
|
||||
|
||||
scale = (
|
||||
half_width * (height > width).float() + half_height * (height <= width).float()
|
||||
)
|
||||
|
||||
x, y, z = points.unbind(2)
|
||||
x = (1.0 - x) * (imwidth - 1) / 2.0
|
||||
y = (1.0 - y) * (imheight - 1) / 2.0
|
||||
x = -scale * x + half_width
|
||||
y = -scale * y + half_height
|
||||
return torch.stack((x, y, z), dim=2)
|
||||
|
||||
|
||||
@@ -513,17 +519,23 @@ class TestCamerasCommon(TestCaseMixin, unittest.TestCase):
|
||||
screen_cam_params = {"R": R, "T": T}
|
||||
ndc_cam_params = {"R": R, "T": T}
|
||||
if cam_type in (OrthographicCameras, PerspectiveCameras):
|
||||
ndc_cam_params["focal_length"] = torch.rand((batch_size, 2)) * 3.0
|
||||
ndc_cam_params["principal_point"] = torch.randn((batch_size, 2))
|
||||
|
||||
fcl = torch.rand((batch_size, 2)) * 3.0 + 0.1
|
||||
prc = torch.randn((batch_size, 2)) * 0.2
|
||||
# (height, width)
|
||||
image_size = torch.randint(low=2, high=64, size=(batch_size, 2))
|
||||
# scale
|
||||
scale = (image_size.min(dim=1, keepdim=True).values - 1.0) / 2.0
|
||||
|
||||
ndc_cam_params["focal_length"] = fcl
|
||||
ndc_cam_params["principal_point"] = prc
|
||||
ndc_cam_params["image_size"] = image_size
|
||||
|
||||
screen_cam_params["image_size"] = image_size
|
||||
screen_cam_params["focal_length"] = (
|
||||
ndc_cam_params["focal_length"] * image_size / 2.0
|
||||
)
|
||||
screen_cam_params["focal_length"] = fcl * scale
|
||||
screen_cam_params["principal_point"] = (
|
||||
(1.0 - ndc_cam_params["principal_point"]) * image_size / 2.0
|
||||
)
|
||||
image_size[:, [1, 0]] - 1.0
|
||||
) / 2.0 - prc * scale
|
||||
screen_cam_params["in_ndc"] = False
|
||||
else:
|
||||
raise ValueError(str(cam_type))
|
||||
return cam_type(**ndc_cam_params), cam_type(**screen_cam_params)
|
||||
@@ -611,17 +623,22 @@ class TestCamerasCommon(TestCaseMixin, unittest.TestCase):
|
||||
# init the cameras
|
||||
cameras = init_random_cameras(cam_type, batch_size)
|
||||
# xyz - the ground truth point cloud
|
||||
xyz = torch.randn(batch_size, num_points, 3) * 0.3
|
||||
xy = torch.randn(batch_size, num_points, 2) * 2.0 - 1.0
|
||||
z = torch.randn(batch_size, num_points, 1) * 3.0 + 1.0
|
||||
xyz = torch.cat((xy, z), dim=2)
|
||||
# image size
|
||||
image_size = torch.randint(low=2, high=64, size=(batch_size, 2))
|
||||
image_size = torch.randint(low=32, high=64, size=(batch_size, 2))
|
||||
# project points
|
||||
xyz_project_ndc = cameras.transform_points(xyz)
|
||||
xyz_project_screen = cameras.transform_points_screen(xyz, image_size)
|
||||
xyz_project_ndc = cameras.transform_points_ndc(xyz)
|
||||
xyz_project_screen = cameras.transform_points_screen(
|
||||
xyz, image_size=image_size
|
||||
)
|
||||
# naive
|
||||
xyz_project_screen_naive = ndc_to_screen_points_naive(
|
||||
xyz_project_ndc, image_size
|
||||
)
|
||||
self.assertClose(xyz_project_screen, xyz_project_screen_naive)
|
||||
# we set atol to 1e-4, remember that screen points are in [0, W-1]x[0, H-1] space
|
||||
self.assertClose(xyz_project_screen, xyz_project_screen_naive, atol=1e-4)
|
||||
|
||||
def test_equiv_project_points(self, batch_size=50, num_points=100):
|
||||
"""
|
||||
@@ -634,12 +651,15 @@ class TestCamerasCommon(TestCaseMixin, unittest.TestCase):
|
||||
ndc_cameras,
|
||||
screen_cameras,
|
||||
) = TestCamerasCommon.init_equiv_cameras_ndc_screen(cam_type, batch_size)
|
||||
# xyz - the ground truth point cloud
|
||||
xyz = torch.randn(batch_size, num_points, 3) * 0.3
|
||||
# xyz - the ground truth point cloud in Py3D space
|
||||
xy = torch.randn(batch_size, num_points, 2) * 0.3
|
||||
z = torch.rand(batch_size, num_points, 1) + 3.0 + 0.1
|
||||
xyz = torch.cat((xy, z), dim=2)
|
||||
# project points
|
||||
xyz_ndc_cam = ndc_cameras.transform_points(xyz)
|
||||
xyz_screen_cam = screen_cameras.transform_points(xyz)
|
||||
self.assertClose(xyz_ndc_cam, xyz_screen_cam, atol=1e-6)
|
||||
xyz_ndc = ndc_cameras.transform_points_ndc(xyz)
|
||||
xyz_screen = screen_cameras.transform_points_ndc(xyz)
|
||||
# check correctness
|
||||
self.assertClose(xyz_ndc, xyz_screen, atol=1e-5)
|
||||
|
||||
def test_clone(self, batch_size: int = 10):
|
||||
"""
|
||||
|
||||
@@ -255,9 +255,20 @@ class TestRenderMeshes(TestCaseMixin, unittest.TestCase):
|
||||
device=device,
|
||||
R=R,
|
||||
T=T,
|
||||
principal_point=((256.0, 256.0),),
|
||||
focal_length=((256.0, 256.0),),
|
||||
principal_point=(
|
||||
(
|
||||
(512.0 - 1.0) / 2.0,
|
||||
(512.0 - 1.0) / 2.0,
|
||||
),
|
||||
),
|
||||
focal_length=(
|
||||
(
|
||||
(512.0 - 1.0) / 2.0,
|
||||
(512.0 - 1.0) / 2.0,
|
||||
),
|
||||
),
|
||||
image_size=((512, 512),),
|
||||
in_ndc=False,
|
||||
)
|
||||
rasterizer = MeshRasterizer(
|
||||
cameras=cameras, raster_settings=raster_settings
|
||||
|
||||
Reference in New Issue
Block a user