Camera alignment

Summary:
adds `corresponding_cameras_alignment` function that estimates a similarity transformation between two sets of cameras.

The function is essential for computing camera errors in SfM pipelines.

```
Benchmark                                                   Avg Time(μs)      Peak Time(μs) Iterations
--------------------------------------------------------------------------------
CORRESPONDING_CAMERAS_ALIGNMENT_10_centers_False                32219           36211             16
CORRESPONDING_CAMERAS_ALIGNMENT_10_centers_True                 32429           36063             16
CORRESPONDING_CAMERAS_ALIGNMENT_10_extrinsics_False              5548            8782             91
CORRESPONDING_CAMERAS_ALIGNMENT_10_extrinsics_True               6153            9752             82
CORRESPONDING_CAMERAS_ALIGNMENT_100_centers_False               33344           40398             16
CORRESPONDING_CAMERAS_ALIGNMENT_100_centers_True                34528           37095             15
CORRESPONDING_CAMERAS_ALIGNMENT_100_extrinsics_False             5576            7187             90
CORRESPONDING_CAMERAS_ALIGNMENT_100_extrinsics_True              6256            9166             80
CORRESPONDING_CAMERAS_ALIGNMENT_1000_centers_False              32020           37247             16
CORRESPONDING_CAMERAS_ALIGNMENT_1000_centers_True               32776           37644             16
CORRESPONDING_CAMERAS_ALIGNMENT_1000_extrinsics_False            5336            8795             94
CORRESPONDING_CAMERAS_ALIGNMENT_1000_extrinsics_True             6266            9929             80
--------------------------------------------------------------------------------
```

Reviewed By: shapovalov

Differential Revision: D22946415

fbshipit-source-id: 8caae7ee365b304d8aa1f8133cf0dd92c35bc0dd
This commit is contained in:
David Novotny
2020-09-03 13:26:13 -07:00
committed by Facebook GitHub Bot
parent 14f015d8bf
commit 316b77782e
6 changed files with 482 additions and 65 deletions

View File

@@ -0,0 +1,23 @@
# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.
import itertools
from fvcore.common.benchmark import benchmark
from test_cameras_alignment import TestCamerasAlignment
def bm_cameras_alignment() -> None:
case_grid = {
"batch_size": [10, 100, 1000],
"mode": ["centers", "extrinsics"],
"estimate_scale": [False, True],
}
test_cases = itertools.product(*case_grid.values())
kwargs_list = [dict(zip(case_grid.keys(), case)) for case in test_cases]
benchmark(
TestCamerasAlignment.corresponding_cameras_alignment,
"CORRESPONDING_CAMERAS_ALIGNMENT",
kwargs_list,
warmup_iters=1,
)

View File

@@ -26,6 +26,7 @@
# SOFTWARE.
import math
import typing
import unittest
import numpy as np
@@ -47,6 +48,7 @@ from pytorch3d.renderer.cameras import (
look_at_view_transform,
)
from pytorch3d.transforms import Transform3d
from pytorch3d.transforms.rotation_conversions import random_rotations
from pytorch3d.transforms.so3 import so3_exponential_map
@@ -132,6 +134,51 @@ def ndc_to_screen_points_naive(points, imsize):
return torch.stack((x, y, z), dim=2)
def init_random_cameras(
cam_type: typing.Type[CamerasBase], batch_size: int, random_z: bool = False
):
cam_params = {}
T = torch.randn(batch_size, 3) * 0.03
if not random_z:
T[:, 2] = 4
R = so3_exponential_map(torch.randn(batch_size, 3) * 3.0)
cam_params = {"R": R, "T": T}
if cam_type in (OpenGLPerspectiveCameras, OpenGLOrthographicCameras):
cam_params["znear"] = torch.rand(batch_size) * 10 + 0.1
cam_params["zfar"] = torch.rand(batch_size) * 4 + 1 + cam_params["znear"]
if cam_type == OpenGLPerspectiveCameras:
cam_params["fov"] = torch.rand(batch_size) * 60 + 30
cam_params["aspect_ratio"] = torch.rand(batch_size) * 0.5 + 0.5
else:
cam_params["top"] = torch.rand(batch_size) * 0.2 + 0.9
cam_params["bottom"] = -(torch.rand(batch_size)) * 0.2 - 0.9
cam_params["left"] = -(torch.rand(batch_size)) * 0.2 - 0.9
cam_params["right"] = torch.rand(batch_size) * 0.2 + 0.9
elif cam_type in (FoVPerspectiveCameras, FoVOrthographicCameras):
cam_params["znear"] = torch.rand(batch_size) * 10 + 0.1
cam_params["zfar"] = torch.rand(batch_size) * 4 + 1 + cam_params["znear"]
if cam_type == FoVPerspectiveCameras:
cam_params["fov"] = torch.rand(batch_size) * 60 + 30
cam_params["aspect_ratio"] = torch.rand(batch_size) * 0.5 + 0.5
else:
cam_params["max_y"] = torch.rand(batch_size) * 0.2 + 0.9
cam_params["min_y"] = -(torch.rand(batch_size)) * 0.2 - 0.9
cam_params["min_x"] = -(torch.rand(batch_size)) * 0.2 - 0.9
cam_params["max_x"] = torch.rand(batch_size) * 0.2 + 0.9
elif cam_type in (
SfMOrthographicCameras,
SfMPerspectiveCameras,
OrthographicCameras,
PerspectiveCameras,
):
cam_params["focal_length"] = torch.rand(batch_size) * 10 + 0.1
cam_params["principal_point"] = torch.randn((batch_size, 2))
else:
raise ValueError(str(cam_type))
return cam_type(**cam_params)
class TestCameraHelpers(TestCaseMixin, unittest.TestCase):
def setUp(self) -> None:
super().setUp()
@@ -410,7 +457,7 @@ class TestCamerasCommon(TestCaseMixin, unittest.TestCase):
def test_get_camera_center(self, batch_size=10):
T = torch.randn(batch_size, 3)
R = so3_exponential_map(torch.randn(batch_size, 3) * 3.0)
R = random_rotations(batch_size)
for cam_type in (
OpenGLPerspectiveCameras,
OpenGLOrthographicCameras,
@@ -426,48 +473,6 @@ class TestCamerasCommon(TestCaseMixin, unittest.TestCase):
C_ = -torch.bmm(R, T[:, :, None])[:, :, 0]
self.assertTrue(torch.allclose(C, C_, atol=1e-05))
@staticmethod
def init_random_cameras(cam_type: CamerasBase, batch_size: int):
cam_params = {}
T = torch.randn(batch_size, 3) * 0.03
T[:, 2] = 4
R = so3_exponential_map(torch.randn(batch_size, 3) * 3.0)
cam_params = {"R": R, "T": T}
if cam_type in (OpenGLPerspectiveCameras, OpenGLOrthographicCameras):
cam_params["znear"] = torch.rand(batch_size) * 10 + 0.1
cam_params["zfar"] = torch.rand(batch_size) * 4 + 1 + cam_params["znear"]
if cam_type == OpenGLPerspectiveCameras:
cam_params["fov"] = torch.rand(batch_size) * 60 + 30
cam_params["aspect_ratio"] = torch.rand(batch_size) * 0.5 + 0.5
else:
cam_params["top"] = torch.rand(batch_size) * 0.2 + 0.9
cam_params["bottom"] = -(torch.rand(batch_size)) * 0.2 - 0.9
cam_params["left"] = -(torch.rand(batch_size)) * 0.2 - 0.9
cam_params["right"] = torch.rand(batch_size) * 0.2 + 0.9
elif cam_type in (FoVPerspectiveCameras, FoVOrthographicCameras):
cam_params["znear"] = torch.rand(batch_size) * 10 + 0.1
cam_params["zfar"] = torch.rand(batch_size) * 4 + 1 + cam_params["znear"]
if cam_type == FoVPerspectiveCameras:
cam_params["fov"] = torch.rand(batch_size) * 60 + 30
cam_params["aspect_ratio"] = torch.rand(batch_size) * 0.5 + 0.5
else:
cam_params["max_y"] = torch.rand(batch_size) * 0.2 + 0.9
cam_params["min_y"] = -(torch.rand(batch_size)) * 0.2 - 0.9
cam_params["min_x"] = -(torch.rand(batch_size)) * 0.2 - 0.9
cam_params["max_x"] = torch.rand(batch_size) * 0.2 + 0.9
elif cam_type in (
SfMOrthographicCameras,
SfMPerspectiveCameras,
OrthographicCameras,
PerspectiveCameras,
):
cam_params["focal_length"] = torch.rand(batch_size) * 10 + 0.1
cam_params["principal_point"] = torch.randn((batch_size, 2))
else:
raise ValueError(str(cam_type))
return cam_type(**cam_params)
@staticmethod
def init_equiv_cameras_ndc_screen(cam_type: CamerasBase, batch_size: int):
T = torch.randn(batch_size, 3) * 0.03
@@ -508,7 +513,7 @@ class TestCamerasCommon(TestCaseMixin, unittest.TestCase):
PerspectiveCameras,
):
# init the cameras
cameras = TestCamerasCommon.init_random_cameras(cam_type, batch_size)
cameras = init_random_cameras(cam_type, batch_size)
# xyz - the ground truth point cloud
xyz = torch.randn(batch_size, num_points, 3) * 0.3
# xyz in camera coordinates
@@ -572,7 +577,7 @@ class TestCamerasCommon(TestCaseMixin, unittest.TestCase):
):
# init the cameras
cameras = TestCamerasCommon.init_random_cameras(cam_type, batch_size)
cameras = init_random_cameras(cam_type, batch_size)
# xyz - the ground truth point cloud
xyz = torch.randn(batch_size, num_points, 3) * 0.3
# image size
@@ -618,7 +623,7 @@ class TestCamerasCommon(TestCaseMixin, unittest.TestCase):
OrthographicCameras,
PerspectiveCameras,
):
cameras = TestCamerasCommon.init_random_cameras(cam_type, batch_size)
cameras = init_random_cameras(cam_type, batch_size)
cameras = cameras.to(torch.device("cpu"))
cameras_clone = cameras.clone()

View File

@@ -0,0 +1,174 @@
# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.
import unittest
import numpy as np
import torch
from common_testing import TestCaseMixin
from pytorch3d.ops import corresponding_cameras_alignment
from pytorch3d.renderer.cameras import (
OpenGLOrthographicCameras,
OpenGLPerspectiveCameras,
SfMOrthographicCameras,
SfMPerspectiveCameras,
)
from pytorch3d.transforms.rotation_conversions import random_rotations
from pytorch3d.transforms.so3 import so3_exponential_map, so3_relative_angle
from test_cameras import init_random_cameras
class TestCamerasAlignment(TestCaseMixin, unittest.TestCase):
def setUp(self) -> None:
super().setUp()
torch.manual_seed(42)
np.random.seed(42)
def test_corresponding_cameras_alignment(self):
"""
Checks the corresponding_cameras_alignment function.
"""
device = torch.device("cuda:0")
# try few different random setups
for _ in range(3):
for estimate_scale in (True, False):
# init true alignment transform
R_align_gt = random_rotations(1, device=device)[0]
T_align_gt = torch.randn(3, dtype=torch.float32, device=device)
# init true scale
if estimate_scale:
s_align_gt = torch.randn(
1, dtype=torch.float32, device=device
).exp()
else:
s_align_gt = torch.tensor(1.0, dtype=torch.float32, device=device)
for cam_type in (
SfMOrthographicCameras,
OpenGLPerspectiveCameras,
OpenGLOrthographicCameras,
SfMPerspectiveCameras,
):
# try well-determined and underdetermined cases
for batch_size in (10, 4, 3, 2, 1):
# get random cameras
cameras = init_random_cameras(
cam_type, batch_size, random_z=True
).to(device)
# try all alignment modes
for mode in ("extrinsics", "centers"):
# try different noise levels
for add_noise in (0.0, 0.01, 1e-4):
self._corresponding_cameras_alignment_test_case(
cameras,
R_align_gt,
T_align_gt,
s_align_gt,
estimate_scale,
mode,
add_noise,
)
def _corresponding_cameras_alignment_test_case(
self,
cameras,
R_align_gt,
T_align_gt,
s_align_gt,
estimate_scale,
mode,
add_noise,
):
batch_size = cameras.R.shape[0]
# get target camera centers
R_new = torch.bmm(R_align_gt[None].expand_as(cameras.R), cameras.R)
T_new = (
torch.bmm(T_align_gt[None, None].repeat(batch_size, 1, 1), cameras.R)[:, 0]
+ cameras.T
) * s_align_gt
if add_noise != 0.0:
R_new = torch.bmm(
R_new, so3_exponential_map(torch.randn_like(T_new) * add_noise)
)
T_new += torch.randn_like(T_new) * add_noise
# create new cameras from R_new and T_new
cameras_tgt = cameras.clone()
cameras_tgt.R = R_new
cameras_tgt.T = T_new
# align cameras and cameras_tgt
cameras_aligned = corresponding_cameras_alignment(
cameras, cameras_tgt, estimate_scale=estimate_scale, mode=mode
)
if batch_size <= 2 and mode == "centers":
# underdetermined case - check only the center alignment error
# since the rotation and translation are ambiguous here
self.assertClose(
cameras_aligned.get_camera_center(),
cameras_tgt.get_camera_center(),
atol=max(add_noise * 7.0, 1e-4),
)
else:
def _rmse(a):
return (torch.norm(a, dim=1, p=2) ** 2).mean().sqrt()
if add_noise != 0.0:
# in a noisy case check mean rotation/translation error for
# extrinsic alignment and root mean center error for center alignment
if mode == "centers":
self.assertNormsClose(
cameras_aligned.get_camera_center(),
cameras_tgt.get_camera_center(),
_rmse,
atol=max(add_noise * 10.0, 1e-4),
)
elif mode == "extrinsics":
angle_err = so3_relative_angle(
cameras_aligned.R, cameras_tgt.R
).mean()
self.assertClose(
angle_err, torch.zeros_like(angle_err), atol=add_noise * 10.0
)
self.assertNormsClose(
cameras_aligned.T, cameras_tgt.T, _rmse, atol=add_noise * 7.0
)
else:
raise ValueError(mode)
else:
# compare the rotations and translations of cameras
self.assertClose(cameras_aligned.R, cameras_tgt.R, atol=3e-4)
self.assertClose(cameras_aligned.T, cameras_tgt.T, atol=3e-4)
# compare the centers
self.assertClose(
cameras_aligned.get_camera_center(),
cameras_tgt.get_camera_center(),
atol=3e-4,
)
@staticmethod
def corresponding_cameras_alignment(
batch_size: int, estimate_scale: bool, mode: str, cam_type=SfMPerspectiveCameras
):
device = torch.device("cuda:0")
cameras_src, cameras_tgt = [
init_random_cameras(cam_type, batch_size, random_z=True).to(device)
for _ in range(2)
]
torch.cuda.synchronize()
def compute_corresponding_cameras_alignment():
corresponding_cameras_alignment(
cameras_src, cameras_tgt, estimate_scale=estimate_scale, mode=mode
)
torch.cuda.synchronize()
return compute_corresponding_cameras_alignment