mirror of
https://github.com/facebookresearch/pytorch3d.git
synced 2025-12-19 22:00:35 +08:00
Camera alignment
Summary: adds `corresponding_cameras_alignment` function that estimates a similarity transformation between two sets of cameras. The function is essential for computing camera errors in SfM pipelines. ``` Benchmark Avg Time(μs) Peak Time(μs) Iterations -------------------------------------------------------------------------------- CORRESPONDING_CAMERAS_ALIGNMENT_10_centers_False 32219 36211 16 CORRESPONDING_CAMERAS_ALIGNMENT_10_centers_True 32429 36063 16 CORRESPONDING_CAMERAS_ALIGNMENT_10_extrinsics_False 5548 8782 91 CORRESPONDING_CAMERAS_ALIGNMENT_10_extrinsics_True 6153 9752 82 CORRESPONDING_CAMERAS_ALIGNMENT_100_centers_False 33344 40398 16 CORRESPONDING_CAMERAS_ALIGNMENT_100_centers_True 34528 37095 15 CORRESPONDING_CAMERAS_ALIGNMENT_100_extrinsics_False 5576 7187 90 CORRESPONDING_CAMERAS_ALIGNMENT_100_extrinsics_True 6256 9166 80 CORRESPONDING_CAMERAS_ALIGNMENT_1000_centers_False 32020 37247 16 CORRESPONDING_CAMERAS_ALIGNMENT_1000_centers_True 32776 37644 16 CORRESPONDING_CAMERAS_ALIGNMENT_1000_extrinsics_False 5336 8795 94 CORRESPONDING_CAMERAS_ALIGNMENT_1000_extrinsics_True 6266 9929 80 -------------------------------------------------------------------------------- ``` Reviewed By: shapovalov Differential Revision: D22946415 fbshipit-source-id: 8caae7ee365b304d8aa1f8133cf0dd92c35bc0dd
This commit is contained in:
committed by
Facebook GitHub Bot
parent
14f015d8bf
commit
316b77782e
23
tests/bm_cameras_alignment.py
Normal file
23
tests/bm_cameras_alignment.py
Normal file
@@ -0,0 +1,23 @@
|
||||
# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.
|
||||
|
||||
import itertools
|
||||
from fvcore.common.benchmark import benchmark
|
||||
from test_cameras_alignment import TestCamerasAlignment
|
||||
|
||||
|
||||
def bm_cameras_alignment() -> None:
|
||||
|
||||
case_grid = {
|
||||
"batch_size": [10, 100, 1000],
|
||||
"mode": ["centers", "extrinsics"],
|
||||
"estimate_scale": [False, True],
|
||||
}
|
||||
test_cases = itertools.product(*case_grid.values())
|
||||
kwargs_list = [dict(zip(case_grid.keys(), case)) for case in test_cases]
|
||||
|
||||
benchmark(
|
||||
TestCamerasAlignment.corresponding_cameras_alignment,
|
||||
"CORRESPONDING_CAMERAS_ALIGNMENT",
|
||||
kwargs_list,
|
||||
warmup_iters=1,
|
||||
)
|
||||
@@ -26,6 +26,7 @@
|
||||
# SOFTWARE.
|
||||
|
||||
import math
|
||||
import typing
|
||||
import unittest
|
||||
|
||||
import numpy as np
|
||||
@@ -47,6 +48,7 @@ from pytorch3d.renderer.cameras import (
|
||||
look_at_view_transform,
|
||||
)
|
||||
from pytorch3d.transforms import Transform3d
|
||||
from pytorch3d.transforms.rotation_conversions import random_rotations
|
||||
from pytorch3d.transforms.so3 import so3_exponential_map
|
||||
|
||||
|
||||
@@ -132,6 +134,51 @@ def ndc_to_screen_points_naive(points, imsize):
|
||||
return torch.stack((x, y, z), dim=2)
|
||||
|
||||
|
||||
def init_random_cameras(
|
||||
cam_type: typing.Type[CamerasBase], batch_size: int, random_z: bool = False
|
||||
):
|
||||
cam_params = {}
|
||||
T = torch.randn(batch_size, 3) * 0.03
|
||||
if not random_z:
|
||||
T[:, 2] = 4
|
||||
R = so3_exponential_map(torch.randn(batch_size, 3) * 3.0)
|
||||
cam_params = {"R": R, "T": T}
|
||||
if cam_type in (OpenGLPerspectiveCameras, OpenGLOrthographicCameras):
|
||||
cam_params["znear"] = torch.rand(batch_size) * 10 + 0.1
|
||||
cam_params["zfar"] = torch.rand(batch_size) * 4 + 1 + cam_params["znear"]
|
||||
if cam_type == OpenGLPerspectiveCameras:
|
||||
cam_params["fov"] = torch.rand(batch_size) * 60 + 30
|
||||
cam_params["aspect_ratio"] = torch.rand(batch_size) * 0.5 + 0.5
|
||||
else:
|
||||
cam_params["top"] = torch.rand(batch_size) * 0.2 + 0.9
|
||||
cam_params["bottom"] = -(torch.rand(batch_size)) * 0.2 - 0.9
|
||||
cam_params["left"] = -(torch.rand(batch_size)) * 0.2 - 0.9
|
||||
cam_params["right"] = torch.rand(batch_size) * 0.2 + 0.9
|
||||
elif cam_type in (FoVPerspectiveCameras, FoVOrthographicCameras):
|
||||
cam_params["znear"] = torch.rand(batch_size) * 10 + 0.1
|
||||
cam_params["zfar"] = torch.rand(batch_size) * 4 + 1 + cam_params["znear"]
|
||||
if cam_type == FoVPerspectiveCameras:
|
||||
cam_params["fov"] = torch.rand(batch_size) * 60 + 30
|
||||
cam_params["aspect_ratio"] = torch.rand(batch_size) * 0.5 + 0.5
|
||||
else:
|
||||
cam_params["max_y"] = torch.rand(batch_size) * 0.2 + 0.9
|
||||
cam_params["min_y"] = -(torch.rand(batch_size)) * 0.2 - 0.9
|
||||
cam_params["min_x"] = -(torch.rand(batch_size)) * 0.2 - 0.9
|
||||
cam_params["max_x"] = torch.rand(batch_size) * 0.2 + 0.9
|
||||
elif cam_type in (
|
||||
SfMOrthographicCameras,
|
||||
SfMPerspectiveCameras,
|
||||
OrthographicCameras,
|
||||
PerspectiveCameras,
|
||||
):
|
||||
cam_params["focal_length"] = torch.rand(batch_size) * 10 + 0.1
|
||||
cam_params["principal_point"] = torch.randn((batch_size, 2))
|
||||
|
||||
else:
|
||||
raise ValueError(str(cam_type))
|
||||
return cam_type(**cam_params)
|
||||
|
||||
|
||||
class TestCameraHelpers(TestCaseMixin, unittest.TestCase):
|
||||
def setUp(self) -> None:
|
||||
super().setUp()
|
||||
@@ -410,7 +457,7 @@ class TestCamerasCommon(TestCaseMixin, unittest.TestCase):
|
||||
|
||||
def test_get_camera_center(self, batch_size=10):
|
||||
T = torch.randn(batch_size, 3)
|
||||
R = so3_exponential_map(torch.randn(batch_size, 3) * 3.0)
|
||||
R = random_rotations(batch_size)
|
||||
for cam_type in (
|
||||
OpenGLPerspectiveCameras,
|
||||
OpenGLOrthographicCameras,
|
||||
@@ -426,48 +473,6 @@ class TestCamerasCommon(TestCaseMixin, unittest.TestCase):
|
||||
C_ = -torch.bmm(R, T[:, :, None])[:, :, 0]
|
||||
self.assertTrue(torch.allclose(C, C_, atol=1e-05))
|
||||
|
||||
@staticmethod
|
||||
def init_random_cameras(cam_type: CamerasBase, batch_size: int):
|
||||
cam_params = {}
|
||||
T = torch.randn(batch_size, 3) * 0.03
|
||||
T[:, 2] = 4
|
||||
R = so3_exponential_map(torch.randn(batch_size, 3) * 3.0)
|
||||
cam_params = {"R": R, "T": T}
|
||||
if cam_type in (OpenGLPerspectiveCameras, OpenGLOrthographicCameras):
|
||||
cam_params["znear"] = torch.rand(batch_size) * 10 + 0.1
|
||||
cam_params["zfar"] = torch.rand(batch_size) * 4 + 1 + cam_params["znear"]
|
||||
if cam_type == OpenGLPerspectiveCameras:
|
||||
cam_params["fov"] = torch.rand(batch_size) * 60 + 30
|
||||
cam_params["aspect_ratio"] = torch.rand(batch_size) * 0.5 + 0.5
|
||||
else:
|
||||
cam_params["top"] = torch.rand(batch_size) * 0.2 + 0.9
|
||||
cam_params["bottom"] = -(torch.rand(batch_size)) * 0.2 - 0.9
|
||||
cam_params["left"] = -(torch.rand(batch_size)) * 0.2 - 0.9
|
||||
cam_params["right"] = torch.rand(batch_size) * 0.2 + 0.9
|
||||
elif cam_type in (FoVPerspectiveCameras, FoVOrthographicCameras):
|
||||
cam_params["znear"] = torch.rand(batch_size) * 10 + 0.1
|
||||
cam_params["zfar"] = torch.rand(batch_size) * 4 + 1 + cam_params["znear"]
|
||||
if cam_type == FoVPerspectiveCameras:
|
||||
cam_params["fov"] = torch.rand(batch_size) * 60 + 30
|
||||
cam_params["aspect_ratio"] = torch.rand(batch_size) * 0.5 + 0.5
|
||||
else:
|
||||
cam_params["max_y"] = torch.rand(batch_size) * 0.2 + 0.9
|
||||
cam_params["min_y"] = -(torch.rand(batch_size)) * 0.2 - 0.9
|
||||
cam_params["min_x"] = -(torch.rand(batch_size)) * 0.2 - 0.9
|
||||
cam_params["max_x"] = torch.rand(batch_size) * 0.2 + 0.9
|
||||
elif cam_type in (
|
||||
SfMOrthographicCameras,
|
||||
SfMPerspectiveCameras,
|
||||
OrthographicCameras,
|
||||
PerspectiveCameras,
|
||||
):
|
||||
cam_params["focal_length"] = torch.rand(batch_size) * 10 + 0.1
|
||||
cam_params["principal_point"] = torch.randn((batch_size, 2))
|
||||
|
||||
else:
|
||||
raise ValueError(str(cam_type))
|
||||
return cam_type(**cam_params)
|
||||
|
||||
@staticmethod
|
||||
def init_equiv_cameras_ndc_screen(cam_type: CamerasBase, batch_size: int):
|
||||
T = torch.randn(batch_size, 3) * 0.03
|
||||
@@ -508,7 +513,7 @@ class TestCamerasCommon(TestCaseMixin, unittest.TestCase):
|
||||
PerspectiveCameras,
|
||||
):
|
||||
# init the cameras
|
||||
cameras = TestCamerasCommon.init_random_cameras(cam_type, batch_size)
|
||||
cameras = init_random_cameras(cam_type, batch_size)
|
||||
# xyz - the ground truth point cloud
|
||||
xyz = torch.randn(batch_size, num_points, 3) * 0.3
|
||||
# xyz in camera coordinates
|
||||
@@ -572,7 +577,7 @@ class TestCamerasCommon(TestCaseMixin, unittest.TestCase):
|
||||
):
|
||||
|
||||
# init the cameras
|
||||
cameras = TestCamerasCommon.init_random_cameras(cam_type, batch_size)
|
||||
cameras = init_random_cameras(cam_type, batch_size)
|
||||
# xyz - the ground truth point cloud
|
||||
xyz = torch.randn(batch_size, num_points, 3) * 0.3
|
||||
# image size
|
||||
@@ -618,7 +623,7 @@ class TestCamerasCommon(TestCaseMixin, unittest.TestCase):
|
||||
OrthographicCameras,
|
||||
PerspectiveCameras,
|
||||
):
|
||||
cameras = TestCamerasCommon.init_random_cameras(cam_type, batch_size)
|
||||
cameras = init_random_cameras(cam_type, batch_size)
|
||||
cameras = cameras.to(torch.device("cpu"))
|
||||
cameras_clone = cameras.clone()
|
||||
|
||||
|
||||
174
tests/test_cameras_alignment.py
Normal file
174
tests/test_cameras_alignment.py
Normal file
@@ -0,0 +1,174 @@
|
||||
# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.
|
||||
|
||||
import unittest
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from common_testing import TestCaseMixin
|
||||
from pytorch3d.ops import corresponding_cameras_alignment
|
||||
from pytorch3d.renderer.cameras import (
|
||||
OpenGLOrthographicCameras,
|
||||
OpenGLPerspectiveCameras,
|
||||
SfMOrthographicCameras,
|
||||
SfMPerspectiveCameras,
|
||||
)
|
||||
from pytorch3d.transforms.rotation_conversions import random_rotations
|
||||
from pytorch3d.transforms.so3 import so3_exponential_map, so3_relative_angle
|
||||
from test_cameras import init_random_cameras
|
||||
|
||||
|
||||
class TestCamerasAlignment(TestCaseMixin, unittest.TestCase):
|
||||
def setUp(self) -> None:
|
||||
super().setUp()
|
||||
torch.manual_seed(42)
|
||||
np.random.seed(42)
|
||||
|
||||
def test_corresponding_cameras_alignment(self):
|
||||
"""
|
||||
Checks the corresponding_cameras_alignment function.
|
||||
"""
|
||||
device = torch.device("cuda:0")
|
||||
|
||||
# try few different random setups
|
||||
for _ in range(3):
|
||||
for estimate_scale in (True, False):
|
||||
# init true alignment transform
|
||||
R_align_gt = random_rotations(1, device=device)[0]
|
||||
T_align_gt = torch.randn(3, dtype=torch.float32, device=device)
|
||||
|
||||
# init true scale
|
||||
if estimate_scale:
|
||||
s_align_gt = torch.randn(
|
||||
1, dtype=torch.float32, device=device
|
||||
).exp()
|
||||
else:
|
||||
s_align_gt = torch.tensor(1.0, dtype=torch.float32, device=device)
|
||||
|
||||
for cam_type in (
|
||||
SfMOrthographicCameras,
|
||||
OpenGLPerspectiveCameras,
|
||||
OpenGLOrthographicCameras,
|
||||
SfMPerspectiveCameras,
|
||||
):
|
||||
# try well-determined and underdetermined cases
|
||||
for batch_size in (10, 4, 3, 2, 1):
|
||||
# get random cameras
|
||||
cameras = init_random_cameras(
|
||||
cam_type, batch_size, random_z=True
|
||||
).to(device)
|
||||
# try all alignment modes
|
||||
for mode in ("extrinsics", "centers"):
|
||||
# try different noise levels
|
||||
for add_noise in (0.0, 0.01, 1e-4):
|
||||
self._corresponding_cameras_alignment_test_case(
|
||||
cameras,
|
||||
R_align_gt,
|
||||
T_align_gt,
|
||||
s_align_gt,
|
||||
estimate_scale,
|
||||
mode,
|
||||
add_noise,
|
||||
)
|
||||
|
||||
def _corresponding_cameras_alignment_test_case(
|
||||
self,
|
||||
cameras,
|
||||
R_align_gt,
|
||||
T_align_gt,
|
||||
s_align_gt,
|
||||
estimate_scale,
|
||||
mode,
|
||||
add_noise,
|
||||
):
|
||||
batch_size = cameras.R.shape[0]
|
||||
|
||||
# get target camera centers
|
||||
R_new = torch.bmm(R_align_gt[None].expand_as(cameras.R), cameras.R)
|
||||
T_new = (
|
||||
torch.bmm(T_align_gt[None, None].repeat(batch_size, 1, 1), cameras.R)[:, 0]
|
||||
+ cameras.T
|
||||
) * s_align_gt
|
||||
|
||||
if add_noise != 0.0:
|
||||
R_new = torch.bmm(
|
||||
R_new, so3_exponential_map(torch.randn_like(T_new) * add_noise)
|
||||
)
|
||||
T_new += torch.randn_like(T_new) * add_noise
|
||||
|
||||
# create new cameras from R_new and T_new
|
||||
cameras_tgt = cameras.clone()
|
||||
cameras_tgt.R = R_new
|
||||
cameras_tgt.T = T_new
|
||||
|
||||
# align cameras and cameras_tgt
|
||||
cameras_aligned = corresponding_cameras_alignment(
|
||||
cameras, cameras_tgt, estimate_scale=estimate_scale, mode=mode
|
||||
)
|
||||
|
||||
if batch_size <= 2 and mode == "centers":
|
||||
# underdetermined case - check only the center alignment error
|
||||
# since the rotation and translation are ambiguous here
|
||||
self.assertClose(
|
||||
cameras_aligned.get_camera_center(),
|
||||
cameras_tgt.get_camera_center(),
|
||||
atol=max(add_noise * 7.0, 1e-4),
|
||||
)
|
||||
|
||||
else:
|
||||
|
||||
def _rmse(a):
|
||||
return (torch.norm(a, dim=1, p=2) ** 2).mean().sqrt()
|
||||
|
||||
if add_noise != 0.0:
|
||||
# in a noisy case check mean rotation/translation error for
|
||||
# extrinsic alignment and root mean center error for center alignment
|
||||
if mode == "centers":
|
||||
self.assertNormsClose(
|
||||
cameras_aligned.get_camera_center(),
|
||||
cameras_tgt.get_camera_center(),
|
||||
_rmse,
|
||||
atol=max(add_noise * 10.0, 1e-4),
|
||||
)
|
||||
elif mode == "extrinsics":
|
||||
angle_err = so3_relative_angle(
|
||||
cameras_aligned.R, cameras_tgt.R
|
||||
).mean()
|
||||
self.assertClose(
|
||||
angle_err, torch.zeros_like(angle_err), atol=add_noise * 10.0
|
||||
)
|
||||
self.assertNormsClose(
|
||||
cameras_aligned.T, cameras_tgt.T, _rmse, atol=add_noise * 7.0
|
||||
)
|
||||
else:
|
||||
raise ValueError(mode)
|
||||
|
||||
else:
|
||||
# compare the rotations and translations of cameras
|
||||
self.assertClose(cameras_aligned.R, cameras_tgt.R, atol=3e-4)
|
||||
self.assertClose(cameras_aligned.T, cameras_tgt.T, atol=3e-4)
|
||||
# compare the centers
|
||||
self.assertClose(
|
||||
cameras_aligned.get_camera_center(),
|
||||
cameras_tgt.get_camera_center(),
|
||||
atol=3e-4,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def corresponding_cameras_alignment(
|
||||
batch_size: int, estimate_scale: bool, mode: str, cam_type=SfMPerspectiveCameras
|
||||
):
|
||||
device = torch.device("cuda:0")
|
||||
cameras_src, cameras_tgt = [
|
||||
init_random_cameras(cam_type, batch_size, random_z=True).to(device)
|
||||
for _ in range(2)
|
||||
]
|
||||
|
||||
torch.cuda.synchronize()
|
||||
|
||||
def compute_corresponding_cameras_alignment():
|
||||
corresponding_cameras_alignment(
|
||||
cameras_src, cameras_tgt, estimate_scale=estimate_scale, mode=mode
|
||||
)
|
||||
torch.cuda.synchronize()
|
||||
|
||||
return compute_corresponding_cameras_alignment
|
||||
Reference in New Issue
Block a user