From fef5bcd8f94c22b0e9312fdbe52eb039a660a60b Mon Sep 17 00:00:00 2001
From: Patrick Labatut <plabatut@fb.com>
Date: Fri, 9 Jul 2021 10:02:10 -0700
Subject: [PATCH] Use rotation matrices for OpenCV / PyTorch3D conversions

Summary: Use rotation matrices for OpenCV / PyTorch3D conversions: this avoids hiding issues with conversions to / from axis-angle vectors and ensure new conversion functions have a consistent interface.

Reviewed By: bottler, classner

Differential Revision: D29634099

fbshipit-source-id: 40b28357914eb563fedea60a965dcf69e848ccfa
---
 pytorch3d/utils/camera_conversions.py | 32 ++++++++++++++++-----------
 tests/test_camera_conversions.py      |  9 ++++----
 2 files changed, 23 insertions(+), 18 deletions(-)

diff --git a/pytorch3d/utils/camera_conversions.py b/pytorch3d/utils/camera_conversions.py
index 134e81fe..866090cf 100644
--- a/pytorch3d/utils/camera_conversions.py
+++ b/pytorch3d/utils/camera_conversions.py
@@ -13,14 +13,14 @@ from ..transforms import so3_exp_map, so3_log_map
 
 
 def cameras_from_opencv_projection(
-    rvec: torch.Tensor,
+    R: torch.Tensor,
     tvec: torch.Tensor,
     camera_matrix: torch.Tensor,
     image_size: torch.Tensor,
 ) -> PerspectiveCameras:
     """
     Converts a batch of OpenCV-conventioned cameras parametrized with the
-    axis-angle rotation vectors `rvec`, translation vectors `tvec`, and the camera
+    rotation matrices `R`, translation vectors `tvec`, and the camera
     calibration matrices `camera_matrix` to `PerspectiveCameras` in PyTorch3D
     convention.
 
@@ -32,16 +32,20 @@ def cameras_from_opencv_projection(
     More specifically, the OpenCV convention projects points to the OpenCV screen
     space as follows:
         ```
-        x_screen_opencv = camera_matrix @ (exp(rvec) @ x_world + tvec)
+        x_screen_opencv = camera_matrix @ (R @ x_world + tvec)
         ```
     followed by the homogenization of `x_screen_opencv`.
 
     Note:
-        The parameters `rvec, tvec, camera_matrix` correspond, e.g., to the inputs
-        of `cv2.projectPoints`, or to the ouputs of `cv2.calibrateCamera`.
+        The parameters `R, tvec, camera_matrix` correspond to the outputs of
+        `cv2.decomposeProjectionMatrix`.
+
+        The `rvec` parameter of the `cv2.projectPoints` is an axis-angle vector
+        that can be converted to the rotation matrix `R` expected here by
+        calling the `so3_exp_map` function.
 
     Args:
-        rvec: A batch of axis-angle rotation vectors of shape `(N, 3)`.
+        R: A batch of rotation matrices of shape `(N, 3, 3)`.
         tvec: A batch of translation vectors of shape `(N, 3)`.
         camera_matrix: A batch of camera calibration matrices of shape `(N, 3, 3)`.
         image_size: A tensor of shape `(N, 2)` containing the sizes of the images
@@ -51,7 +55,6 @@ def cameras_from_opencv_projection(
         cameras_pytorch3d: A batch of `N` cameras in the PyTorch3D convention.
     """
 
-    R = so3_exp_map(rvec)
     focal_length = torch.stack([camera_matrix[:, 0, 0], camera_matrix[:, 1, 1]], dim=-1)
     principal_point = camera_matrix[:, :2, 2]
 
@@ -84,13 +87,17 @@ def opencv_from_cameras_projection(
 ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
     """
     Converts a batch of `PerspectiveCameras` into OpenCV-convention
-    axis-angle rotation vectors `rvec`, translation vectors `tvec`, and the camera
+    rotation matrices `R`, translation vectors `tvec`, and the camera
     calibration matrices `camera_matrix`. This operation is exactly the inverse
     of `cameras_from_opencv_projection`.
 
     Note:
-        The parameters `rvec, tvec, camera_matrix` correspond, e.g., to the inputs
-        of `cv2.projectPoints`, or to the ouputs of `cv2.calibrateCamera`.
+        The outputs `R, tvec, camera_matrix` correspond to the outputs of
+        `cv2.decomposeProjectionMatrix`.
+
+        The `rvec` parameter of the `cv2.projectPoints` is an axis-angle vector
+        that can be converted from the returned rotation matrix `R` here by
+        calling the `so3_log_map` function.
 
     Args:
         cameras: A batch of `N` cameras in the PyTorch3D convention.
@@ -98,7 +105,7 @@ def opencv_from_cameras_projection(
             (height, width) attached to each camera.
 
     Returns:
-        rvec: A batch of axis-angle rotation vectors of shape `(N, 3)`.
+        R: A batch of rotation matrices of shape `(N, 3, 3)`.
         tvec: A batch of translation vectors of shape `(N, 3)`.
         camera_matrix: A batch of camera calibration matrices of shape `(N, 3, 3)`.
     """
@@ -122,5 +129,4 @@ def opencv_from_cameras_projection(
     camera_matrix[:, 2, 2] = 1.0
     camera_matrix[:, 0, 0] = focal_length[:, 0]
     camera_matrix[:, 1, 1] = focal_length[:, 1]
-    rvec = so3_log_map(R)
-    return rvec, tvec, camera_matrix
+    return R, tvec, camera_matrix
diff --git a/tests/test_camera_conversions.py b/tests/test_camera_conversions.py
index 0f1c7acc..cacf3487 100644
--- a/tests/test_camera_conversions.py
+++ b/tests/test_camera_conversions.py
@@ -129,16 +129,15 @@ class TestCameraConversions(TestCaseMixin, unittest.TestCase):
         )
         camera_matrix[:, :2, 2] = principal_point
 
-        rvec = so3_log_map(R)
-
         pts = torch.nn.functional.normalize(torch.randn(4, 1000, 3), dim=-1)
 
         # project the 3D points with the opencv projection function
+        rvec = so3_log_map(R)
         pts_proj_opencv = cv2_project_points(pts, rvec, tvec, camera_matrix)
 
         # make the pytorch3d cameras
         cameras_opencv_to_pytorch3d = cameras_from_opencv_projection(
-            rvec, tvec, camera_matrix, image_size
+            R, tvec, camera_matrix, image_size
         )
 
         # project the 3D points with converted cameras
@@ -155,9 +154,9 @@ class TestCameraConversions(TestCaseMixin, unittest.TestCase):
         )
 
         # Check the inverse.
-        rvec_i, tvec_i, camera_matrix_i = opencv_from_cameras_projection(
+        R_i, tvec_i, camera_matrix_i = opencv_from_cameras_projection(
             cameras_opencv_to_pytorch3d, image_size
         )
-        self.assertClose(rvec, rvec_i)
+        self.assertClose(R, R_i)
         self.assertClose(tvec, tvec_i)
         self.assertClose(camera_matrix, camera_matrix_i)