Conversion from OpenCV cameras

Summary: Implements a conversion function between OpenCV and PyTorch3D cameras. Reviewed By: patricklabatut Differential Revision: D28992470 fbshipit-source-id: dbcc9f213ec293c2f6938261c704aea09aad3c90
2026-02-07 06:22:19 +08:00 · 2021-06-21 05:02:46 -07:00
parent b2ac2655b3
commit 8006842f2a
4 changed files with 1450 additions and 0 deletions
--- a/pytorch3d/utils/init.py
+++ b/pytorch3d/utils/init.py
@@ -1,5 +1,6 @@
 # Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.

+from .camera_conversions import cameras_from_opencv_projection
 from .ico_sphere import ico_sphere
 from .torus import torus

--- a/pytorch3d/utils/camera_conversions.py
+++ b/pytorch3d/utils/camera_conversions.py
@@ -0,0 +1,70 @@
+import torch
+
+from ..renderer import PerspectiveCameras
+from ..transforms import so3_exponential_map
+
+
+def cameras_from_opencv_projection(
+    rvec: torch.Tensor,
+    tvec: torch.Tensor,
+    camera_matrix: torch.Tensor,
+    image_size: torch.Tensor,
+) -> PerspectiveCameras:
+    """
+    Converts a batch of OpenCV-conventioned cameras parametrized with the
+    axis-angle rotation vectors `rvec`, translation vectors `tvec`, and the camera
+    calibration matrices `camera_matrix` to `PerspectiveCameras` in PyTorch3D
+    convention.
+
+    More specifically, the conversion is carried out such that a projection
+    of a 3D shape to the OpenCV-conventioned screen of size `image_size` results
+    in the same image as a projection with the corresponding PyTorch3D camera
+    to the NDC screen convention of PyTorch3D.
+
+    More specifically, the OpenCV convention projects points to the OpenCV screen
+    space as follows:
+        ```
+        x_screen_opencv = camera_matrix @ (exp(rvec) @ x_world + tvec)
+        ```
+    followed by the homogenization of `x_screen_opencv`.
+
+    Note:
+        The parameters `rvec, tvec, camera_matrix` correspond e.g. to the inputs
+        of `cv2.projectPoints`, or to the ouputs of `cv2.calibrateCamera`.
+
+    Args:
+        rvec: A batch of axis-angle rotation vectors of shape `(N, 3)`.
+        tvec: A batch of translation vectors of shape `(N, 3)`.
+        camera_matrix: A batch of camera calibration matrices of shape `(N, 3, 3)`.
+        image_size: A tensor of shape `(N, 2)` containing the sizes of the images
+            (height, width) attached to each camera.
+
+    Returns:
+        cameras_pytorch3d: A batch of `N` cameras in the PyTorch3D convention.
+    """
+
+    R = so3_exponential_map(rvec)
+    focal_length = torch.stack([camera_matrix[:, 0, 0], camera_matrix[:, 1, 1]], dim=-1)
+    principal_point = camera_matrix[:, :2, 2]
+
+    # Retype the image_size correctly and flip to width, height.
+    image_size_wh = image_size.to(R).flip(dims=(1,))
+
+    # Get the PyTorch3D focal length and principal point.
+    focal_pytorch3d = focal_length / (0.5 * image_size_wh)
+    p0_pytorch3d = -(principal_point / (0.5 * image_size_wh) - 1)
+
+    # For R, T we flip x, y axes (opencv screen space has an opposite
+    # orientation of screen axes).
+    # We also transpose R (opencv multiplies points from the opposite=left side).
+    R_pytorch3d = R.permute(0, 2, 1)
+    T_pytorch3d = tvec.clone()
+    R_pytorch3d[:, :, :2] *= -1
+    T_pytorch3d[:, :2] *= -1
+
+    return PerspectiveCameras(
+        R=R_pytorch3d,
+        T=T_pytorch3d,
+        focal_length=focal_pytorch3d,
+        principal_point=p0_pytorch3d,
+    )