pytorch3d/docs/cameras.html

<!DOCTYPE html><html lang="en"><head><meta charSet="utf-8"/><meta http-equiv="X-UA-Compatible" content="IE=edge"/><title>cameras · PyTorch3D</title><meta name="viewport" content="width=device-width"/><meta name="generator" content="Docusaurus"/><meta name="description" content="# Cameras"/><meta name="docsearch:language" content="en"/><meta property="og:title" content="cameras · PyTorch3D"/><meta property="og:type" content="website"/><meta property="og:url" content="https://pytorch3d.org/"/><meta property="og:description" content="# Cameras"/><meta property="og:image" content="https://pytorch3d.org/img/pytorch3dlogoicon.svg"/><meta name="twitter:card" content="summary"/><meta name="twitter:image" content="https://pytorch3d.org/img/pytorch3dlogoicon.svg"/><link rel="shortcut icon" href="/img/pytorch3dfavicon.png"/><link rel="stylesheet" href="//cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/default.min.css"/><script>
              (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
              (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
              m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
              })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');

              ga('create', 'UA-157376881-1', 'auto');
              ga('send', 'pageview');
            </script><script type="text/javascript" src="https://buttons.github.io/buttons.js"></script><script src="/js/scrollSpy.js"></script><link rel="stylesheet" href="/css/main.css"/><script src="/js/codetabs.js"></script></head><body class="sideNavVisible separateOnPageNav"><div class="fixedHeaderContainer"><div class="headerWrapper wrapper"><header><a href="/"><img class="logo" src="/img/pytorch3dfavicon.png" alt="PyTorch3D"/><h2 class="headerTitleWithLogo">PyTorch3D</h2></a><div class="navigationWrapper navigationSlider"><nav class="slidingNav"><ul class="nav-site nav-site-internal"><li class="siteNavGroupActive"><a href="/docs/why_pytorch3d" target="_self">Docs</a></li><li class=""><a href="/tutorials" target="_self">Tutorials</a></li><li class=""><a href="https://pytorch3d.readthedocs.io/" target="_self">API</a></li><li class=""><a href="https://github.com/facebookresearch/pytorch3d" target="_self">GitHub</a></li></ul></nav></div></header></div></div><div class="navPusher"><div class="docMainWrapper wrapper"><div class="docsNavContainer" id="docsNav"><nav class="toc"><div class="toggleNav"><section class="navWrapper wrapper"><div class="navBreadcrumb wrapper"><div class="navToggle" id="navToggler"><div class="hamburger-menu"><div class="line1"></div><div class="line2"></div><div class="line3"></div></div></div><h2><i>›</i><span>Renderer</span></h2><div class="tocToggler" id="tocToggler"><i class="icon-toc"></i></div></div><div class="navGroups"><div class="navGroup"><h3 class="navGroupCategoryTitle">Introduction</h3><ul class=""><li class="navListItem"><a class="navItem" href="/docs/why_pytorch3d">Why PyTorch3D</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle">Data</h3><ul class=""><li class="navListItem"><a class="navItem" href="/docs/meshes_io">Loading from file</a></li><li class="navListItem"><a class="navItem" href="/docs/datasets">Data loaders</a></li><li class="navListItem"><a class="navItem" href="/docs/batching">Batching</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle">Ops</h3><ul class=""><li class="navListItem"><a class="navItem" href="/docs/cubify">Cubify</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle">Renderer</h3><ul class=""><li class="navListItem"><a class="navItem" href="/docs/renderer">Overview</a></li><li class="navListItem"><a class="navItem" href="/docs/renderer_getting_started">Getting Started</a></li><li class="navListItem navListItemActive"><a class="navItem" href="/docs/cameras">Cameras</a></li></ul></div></div></section></div><script>
            var coll = document.getElementsByClassName('collapsible');
            var checkActiveCategory = true;
            for (var i = 0; i < coll.length; i++) {
              var links = coll[i].nextElementSibling.getElementsByTagName('*');
              if (checkActiveCategory){
                for (var j = 0; j < links.length; j++) {
                  if (links[j].classList.contains('navListItemActive')){
                    coll[i].nextElementSibling.classList.toggle('hide');
                    coll[i].childNodes[1].classList.toggle('rotate');
                    checkActiveCategory = false;
                    break;
                  }
                }
              }

              coll[i].addEventListener('click', function() {
                var arrow = this.childNodes[1];
                arrow.classList.toggle('rotate');
                var content = this.nextElementSibling;
                content.classList.toggle('hide');
              });
            }

            document.addEventListener('DOMContentLoaded', function() {
              createToggler('#navToggler', '#docsNav', 'docsSliderActive');
              createToggler('#tocToggler', 'body', 'tocActive');

              var headings = document.querySelector('.toc-headings');
              headings && headings.addEventListener('click', function(event) {
                var el = event.target;
                while(el !== headings){
                  if (el.tagName === 'A') {
                    document.body.classList.remove('tocActive');
                    break;
                  } else{
                    el = el.parentNode;
                  }
                }
              }, false);

              function createToggler(togglerSelector, targetSelector, className) {
                var toggler = document.querySelector(togglerSelector);
                var target = document.querySelector(targetSelector);

                if (!toggler) {
                  return;
                }

                toggler.onclick = function(event) {
                  event.preventDefault();

                  target.classList.toggle(className);
                };
              }
            });
        </script></nav></div><div class="container mainContainer docsContainer"><div class="wrapper"><div class="post"><header class="postHeader"></header><article><div><span><h1><a class="anchor" aria-hidden="true" id="cameras"></a><a href="#cameras" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Cameras</h1>
<h2><a class="anchor" aria-hidden="true" id="camera-coordinate-systems"></a><a href="#camera-coordinate-systems" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Camera Coordinate Systems</h2>
<p>When working with 3D data, there are 4 coordinate systems users need to know</p>
<ul>
<li><strong>World coordinate system</strong>
This is the system the object/scene lives - the world.</li>
<li><strong>Camera view coordinate system</strong>
This is the system that has its origin on the image plane and the <code>Z</code>-axis perpendicular to the image plane. In PyTorch3D, we assume that <code>+X</code> points left, and <code>+Y</code> points up and <code>+Z</code> points out from the image plane. The transformation from world to view happens after applying a rotation (<code>R</code>) and translation (<code>T</code>).</li>
<li><strong>NDC coordinate system</strong>
This is the normalized coordinate system that confines in a volume the renderered part of the object/scene. Also known as view volume. Under the PyTorch3D convention, <code>(+1, +1, znear)</code> is the top left near corner, and <code>(-1, -1, zfar)</code> is the bottom right far corner of the volume. The transformation from view to NDC happens after applying the camera projection matrix (<code>P</code>).</li>
<li><strong>Screen coordinate system</strong>
This is another representation of the view volume with the <code>XY</code> coordinates defined in pixel space instead of a normalized space.</li>
</ul>
<p>An illustration of the 4 coordinate systems is shown below
<img src="https://user-images.githubusercontent.com/4369065/90317960-d9b8db80-dee1-11ea-8088-39c414b1e2fa.png" alt="cameras"></p>
<h2><a class="anchor" aria-hidden="true" id="defining-cameras-in-pytorch3d"></a><a href="#defining-cameras-in-pytorch3d" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Defining Cameras in PyTorch3D</h2>
<p>Cameras in PyTorch3D transform an object/scene from world to NDC by first transforming the object/scene to view (via transforms <code>R</code> and <code>T</code>) and then projecting the 3D object/scene to NDC (via the projection matrix <code>P</code>, else known as camera matrix). Thus, the camera parameters in <code>P</code> are assumed to be in NDC space. If the user has camera parameters in screen space, which is a common use case, the parameters should transformed to NDC (see below for an example)</p>
<p>We describe the camera types in PyTorch3D and the convention for the camera parameters provided at construction time.</p>
<h3><a class="anchor" aria-hidden="true" id="camera-types"></a><a href="#camera-types" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Camera Types</h3>
<p>All cameras inherit from <code>CamerasBase</code> which is a base class for all cameras. PyTorch3D provides four different camera types. The <code>CamerasBase</code> defines methods that are common to all camera models:</p>
<ul>
<li><code>get_camera_center</code> that returns the optical center of the camera in world coordinates</li>
<li><code>get_world_to_view_transform</code> which returns a 3D transform from world coordinates to the camera view coordinates (R, T)</li>
<li><code>get_full_projection_transform</code> which composes the projection transform (P) with the world-to-view transform (R, T)</li>
<li><code>transform_points</code> which takes a set of input points in world coordinates and projects to NDC coordinates ranging from [-1, -1, znear] to  [+1, +1, zfar].</li>
<li><code>transform_points_screen</code> which takes a set of input points in world coordinates and projects them to the screen coordinates ranging from [0, 0, znear] to [W-1, H-1, zfar]</li>
</ul>
<p>Users can easily customize their own cameras. For each new camera, users should implement the <code>get_projection_transform</code> routine that returns the mapping <code>P</code> from camera view coordinates to NDC coordinates.</p>
<h4><a class="anchor" aria-hidden="true" id="fovperspectivecameras-fovorthographiccameras"></a><a href="#fovperspectivecameras-fovorthographiccameras" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>FoVPerspectiveCameras, FoVOrthographicCameras</h4>
<p>These two cameras follow the OpenGL convention for perspective and orthographic cameras respectively. The user provides the near <code>znear</code> and far <code>zfar</code> field which confines the view volume in the <code>Z</code> axis. The view volume in the <code>XY</code> plane is defined by field of view angle (<code>fov</code>) in the case of <code>FoVPerspectiveCameras</code> and by <code>min_x, min_y, max_x, max_y</code> in the case of <code>FoVOrthographicCameras</code>.</p>
<h4><a class="anchor" aria-hidden="true" id="perspectivecameras-orthographiccameras"></a><a href="#perspectivecameras-orthographiccameras" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>PerspectiveCameras, OrthographicCameras</h4>
<p>These two cameras follow the Multi-View Geometry convention for cameras. The user provides the focal length (<code>fx</code>, <code>fy</code>) and the principal point (<code>px</code>, <code>py</code>). For example, <code>camera = PerspectiveCameras(focal_length=((fx, fy),), principal_point=((px, py),))</code></p>
<p>As mentioned above, the focal length and principal point are used to convert a point <code>(X, Y, Z)</code> from view coordinates to NDC coordinates, as follows</p>
<pre><code class="hljs"><span class="hljs-comment"># for perspective</span>
<span class="hljs-attr">x_ndc</span> = fx * X / Z + px
<span class="hljs-attr">y_ndc</span> = fy * Y / Z + py
<span class="hljs-attr">z_ndc</span> = <span class="hljs-number">1</span> / Z

<span class="hljs-comment"># for orthographic</span>
<span class="hljs-attr">x_ndc</span> = fx * X + px
<span class="hljs-attr">y_ndc</span> = fy * Y + py
<span class="hljs-attr">z_ndc</span> = Z
</code></pre>
<p>Commonly, users have access to the focal length (<code>fx_screen</code>, <code>fy_screen</code>) and the principal point (<code>px_screen</code>, <code>py_screen</code>) in screen space. In that case, to construct the camera the user needs to additionally provide the <code>image_size = ((image_width, image_height),)</code>. More precisely, <code>camera = PerspectiveCameras(focal_length=((fx_screen, fy_screen),), principal_point=((px_screen, py_screen),), image_size = ((image_width, image_height),))</code>. Internally, the camera parameters are converted from screen to NDC as follows:</p>
<pre><code class="hljs"><span class="hljs-attr">fx</span> = fx_screen * <span class="hljs-number">2.0</span> / image_width
<span class="hljs-attr">fy</span> = fy_screen * <span class="hljs-number">2.0</span> / image_height

<span class="hljs-attr">px</span> = - (px_screen - image_width / <span class="hljs-number">2.0</span>) * <span class="hljs-number">2.0</span> / image_width
<span class="hljs-attr">py</span> = - (py_screen - image_height / <span class="hljs-number">2.0</span>) * <span class="hljs-number">2.0</span>/ image_height
</code></pre>
</span></div></article></div><div class="docLastUpdate"><em>Last updated by Nikhila Ravi</em></div><div class="docs-prevnext"><a class="docs-prev button" href="/docs/renderer_getting_started"><span class="arrow-prev">← </span><span>Getting Started</span></a></div></div></div><nav class="onPageNav"><ul class="toc-headings"><li><a href="#camera-coordinate-systems">Camera Coordinate Systems</a></li><li><a href="#defining-cameras-in-pytorch3d">Defining Cameras in PyTorch3D</a><ul class="toc-headings"><li><a href="#camera-types">Camera Types</a></li></ul></li></ul></nav></div><footer class="nav-footer" id="footer"><section class="sitemap"><div class="footerSection"><div class="social"><a class="github-button" href="https://github.com/facebookresearch/pytorch3d" data-count-href="https://github.com/facebookresearch/pytorch3d/stargazers" data-show-count="true" data-count-aria-label="# stargazers on GitHub" aria-label="Star PyTorch3D on GitHub">pytorch3d</a></div></div></section><a href="https://opensource.facebook.com/" target="_blank" rel="noreferrer noopener" class="fbOpenSource"><img src="/img/oss_logo.png" alt="Facebook Open Source" width="170" height="45"/></a><section class="copyright">Copyright © 2020 Facebook Inc<br/>Legal:<a href="https://opensource.facebook.com/legal/privacy/" target="_blank" rel="noreferrer noopener">Privacy</a><a href="https://opensource.facebook.com/legal/terms/" target="_blank" rel="noreferrer noopener">Terms</a></section></footer></div></body></html>