pytorch3d/docs/cameras/index.html
2020-08-26 22:27:07 -07:00

115 lines
17 KiB
HTML
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<!DOCTYPE html><html lang="en"><head><meta charSet="utf-8"/><meta http-equiv="X-UA-Compatible" content="IE=edge"/><title>cameras · PyTorch3D</title><meta name="viewport" content="width=device-width"/><meta name="generator" content="Docusaurus"/><meta name="description" content="# Cameras"/><meta name="docsearch:language" content="en"/><meta property="og:title" content="cameras · PyTorch3D"/><meta property="og:type" content="website"/><meta property="og:url" content="https://pytorch3d.org/"/><meta property="og:description" content="# Cameras"/><meta property="og:image" content="https://pytorch3d.org/img/pytorch3dlogoicon.svg"/><meta name="twitter:card" content="summary"/><meta name="twitter:image" content="https://pytorch3d.org/img/pytorch3dlogoicon.svg"/><link rel="shortcut icon" href="/img/pytorch3dfavicon.png"/><link rel="stylesheet" href="//cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/default.min.css"/><script>
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
})(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
ga('create', 'UA-157376881-1', 'auto');
ga('send', 'pageview');
</script><script type="text/javascript" src="https://buttons.github.io/buttons.js"></script><script src="/js/scrollSpy.js"></script><link rel="stylesheet" href="/css/main.css"/><script src="/js/codetabs.js"></script></head><body class="sideNavVisible separateOnPageNav"><div class="fixedHeaderContainer"><div class="headerWrapper wrapper"><header><a href="/"><img class="logo" src="/img/pytorch3dfavicon.png" alt="PyTorch3D"/><h2 class="headerTitleWithLogo">PyTorch3D</h2></a><div class="navigationWrapper navigationSlider"><nav class="slidingNav"><ul class="nav-site nav-site-internal"><li class="siteNavGroupActive"><a href="/docs/why_pytorch3d" target="_self">Docs</a></li><li class=""><a href="/tutorials" target="_self">Tutorials</a></li><li class=""><a href="https://pytorch3d.readthedocs.io/" target="_self">API</a></li><li class=""><a href="https://github.com/facebookresearch/pytorch3d" target="_self">GitHub</a></li></ul></nav></div></header></div></div><div class="navPusher"><div class="docMainWrapper wrapper"><div class="docsNavContainer" id="docsNav"><nav class="toc"><div class="toggleNav"><section class="navWrapper wrapper"><div class="navBreadcrumb wrapper"><div class="navToggle" id="navToggler"><div class="hamburger-menu"><div class="line1"></div><div class="line2"></div><div class="line3"></div></div></div><h2><i></i><span>Renderer</span></h2><div class="tocToggler" id="tocToggler"><i class="icon-toc"></i></div></div><div class="navGroups"><div class="navGroup"><h3 class="navGroupCategoryTitle">Introduction</h3><ul class=""><li class="navListItem"><a class="navItem" href="/docs/why_pytorch3d">Why PyTorch3D</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle">Data</h3><ul class=""><li class="navListItem"><a class="navItem" href="/docs/meshes_io">Loading from file</a></li><li class="navListItem"><a class="navItem" href="/docs/datasets">Data loaders</a></li><li class="navListItem"><a class="navItem" href="/docs/batching">Batching</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle">Ops</h3><ul class=""><li class="navListItem"><a class="navItem" href="/docs/cubify">Cubify</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle">Renderer</h3><ul class=""><li class="navListItem"><a class="navItem" href="/docs/renderer">Overview</a></li><li class="navListItem"><a class="navItem" href="/docs/renderer_getting_started">Getting Started</a></li><li class="navListItem navListItemActive"><a class="navItem" href="/docs/cameras">Cameras</a></li></ul></div></div></section></div><script>
var coll = document.getElementsByClassName('collapsible');
var checkActiveCategory = true;
for (var i = 0; i < coll.length; i++) {
var links = coll[i].nextElementSibling.getElementsByTagName('*');
if (checkActiveCategory){
for (var j = 0; j < links.length; j++) {
if (links[j].classList.contains('navListItemActive')){
coll[i].nextElementSibling.classList.toggle('hide');
coll[i].childNodes[1].classList.toggle('rotate');
checkActiveCategory = false;
break;
}
}
}
coll[i].addEventListener('click', function() {
var arrow = this.childNodes[1];
arrow.classList.toggle('rotate');
var content = this.nextElementSibling;
content.classList.toggle('hide');
});
}
document.addEventListener('DOMContentLoaded', function() {
createToggler('#navToggler', '#docsNav', 'docsSliderActive');
createToggler('#tocToggler', 'body', 'tocActive');
var headings = document.querySelector('.toc-headings');
headings && headings.addEventListener('click', function(event) {
var el = event.target;
while(el !== headings){
if (el.tagName === 'A') {
document.body.classList.remove('tocActive');
break;
} else{
el = el.parentNode;
}
}
}, false);
function createToggler(togglerSelector, targetSelector, className) {
var toggler = document.querySelector(togglerSelector);
var target = document.querySelector(targetSelector);
if (!toggler) {
return;
}
toggler.onclick = function(event) {
event.preventDefault();
target.classList.toggle(className);
};
}
});
</script></nav></div><div class="container mainContainer docsContainer"><div class="wrapper"><div class="post"><header class="postHeader"></header><article><div><span><h1><a class="anchor" aria-hidden="true" id="cameras"></a><a href="#cameras" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Cameras</h1>
<h2><a class="anchor" aria-hidden="true" id="camera-coordinate-systems"></a><a href="#camera-coordinate-systems" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Camera Coordinate Systems</h2>
<p>When working with 3D data, there are 4 coordinate systems users need to know</p>
<ul>
<li><strong>World coordinate system</strong>
This is the system the object/scene lives - the world.</li>
<li><strong>Camera view coordinate system</strong>
This is the system that has its origin on the image plane and the <code>Z</code>-axis perpendicular to the image plane. In PyTorch3D, we assume that <code>+X</code> points left, and <code>+Y</code> points up and <code>+Z</code> points out from the image plane. The transformation from world to view happens after applying a rotation (<code>R</code>) and translation (<code>T</code>).</li>
<li><strong>NDC coordinate system</strong>
This is the normalized coordinate system that confines in a volume the renderered part of the object/scene. Also known as view volume. Under the PyTorch3D convention, <code>(+1, +1, znear)</code> is the top left near corner, and <code>(-1, -1, zfar)</code> is the bottom right far corner of the volume. The transformation from view to NDC happens after applying the camera projection matrix (<code>P</code>).</li>
<li><strong>Screen coordinate system</strong>
This is another representation of the view volume with the <code>XY</code> coordinates defined in pixel space instead of a normalized space.</li>
</ul>
<p>An illustration of the 4 coordinate systems is shown below
<img src="https://user-images.githubusercontent.com/4369065/90317960-d9b8db80-dee1-11ea-8088-39c414b1e2fa.png" alt="cameras"></p>
<h2><a class="anchor" aria-hidden="true" id="defining-cameras-in-pytorch3d"></a><a href="#defining-cameras-in-pytorch3d" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Defining Cameras in PyTorch3D</h2>
<p>Cameras in PyTorch3D transform an object/scene from world to NDC by first transforming the object/scene to view (via transforms <code>R</code> and <code>T</code>) and then projecting the 3D object/scene to NDC (via the projection matrix <code>P</code>, else known as camera matrix). Thus, the camera parameters in <code>P</code> are assumed to be in NDC space. If the user has camera parameters in screen space, which is a common use case, the parameters should transformed to NDC (see below for an example)</p>
<p>We describe the camera types in PyTorch3D and the convention for the camera parameters provided at construction time.</p>
<h3><a class="anchor" aria-hidden="true" id="camera-types"></a><a href="#camera-types" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Camera Types</h3>
<p>All cameras inherit from <code>CamerasBase</code> which is a base class for all cameras. PyTorch3D provides four different camera types. The <code>CamerasBase</code> defines methods that are common to all camera models:</p>
<ul>
<li><code>get_camera_center</code> that returns the optical center of the camera in world coordinates</li>
<li><code>get_world_to_view_transform</code> which returns a 3D transform from world coordinates to the camera view coordinates (R, T)</li>
<li><code>get_full_projection_transform</code> which composes the projection transform (P) with the world-to-view transform (R, T)</li>
<li><code>transform_points</code> which takes a set of input points in world coordinates and projects to NDC coordinates ranging from [-1, -1, znear] to [+1, +1, zfar].</li>
<li><code>transform_points_screen</code> which takes a set of input points in world coordinates and projects them to the screen coordinates ranging from [0, 0, znear] to [W-1, H-1, zfar]</li>
</ul>
<p>Users can easily customize their own cameras. For each new camera, users should implement the <code>get_projection_transform</code> routine that returns the mapping <code>P</code> from camera view coordinates to NDC coordinates.</p>
<h4><a class="anchor" aria-hidden="true" id="fovperspectivecameras-fovorthographiccameras"></a><a href="#fovperspectivecameras-fovorthographiccameras" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>FoVPerspectiveCameras, FoVOrthographicCameras</h4>
<p>These two cameras follow the OpenGL convention for perspective and orthographic cameras respectively. The user provides the near <code>znear</code> and far <code>zfar</code> field which confines the view volume in the <code>Z</code> axis. The view volume in the <code>XY</code> plane is defined by field of view angle (<code>fov</code>) in the case of <code>FoVPerspectiveCameras</code> and by <code>min_x, min_y, max_x, max_y</code> in the case of <code>FoVOrthographicCameras</code>.</p>
<h4><a class="anchor" aria-hidden="true" id="perspectivecameras-orthographiccameras"></a><a href="#perspectivecameras-orthographiccameras" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>PerspectiveCameras, OrthographicCameras</h4>
<p>These two cameras follow the Multi-View Geometry convention for cameras. The user provides the focal length (<code>fx</code>, <code>fy</code>) and the principal point (<code>px</code>, <code>py</code>). For example, <code>camera = PerspectiveCameras(focal_length=((fx, fy),), principal_point=((px, py),))</code></p>
<p>As mentioned above, the focal length and principal point are used to convert a point <code>(X, Y, Z)</code> from view coordinates to NDC coordinates, as follows</p>
<pre><code class="hljs"><span class="hljs-comment"># for perspective</span>
<span class="hljs-attr">x_ndc</span> = fx * X / Z + px
<span class="hljs-attr">y_ndc</span> = fy * Y / Z + py
<span class="hljs-attr">z_ndc</span> = <span class="hljs-number">1</span> / Z
<span class="hljs-comment"># for orthographic</span>
<span class="hljs-attr">x_ndc</span> = fx * X + px
<span class="hljs-attr">y_ndc</span> = fy * Y + py
<span class="hljs-attr">z_ndc</span> = Z
</code></pre>
<p>Commonly, users have access to the focal length (<code>fx_screen</code>, <code>fy_screen</code>) and the principal point (<code>px_screen</code>, <code>py_screen</code>) in screen space. In that case, to construct the camera the user needs to additionally provide the <code>image_size = ((image_width, image_height),)</code>. More precisely, <code>camera = PerspectiveCameras(focal_length=((fx_screen, fy_screen),), principal_point=((px_screen, py_screen),), image_size = ((image_width, image_height),))</code>. Internally, the camera parameters are converted from screen to NDC as follows:</p>
<pre><code class="hljs"><span class="hljs-attr">fx</span> = fx_screen * <span class="hljs-number">2.0</span> / image_width
<span class="hljs-attr">fy</span> = fy_screen * <span class="hljs-number">2.0</span> / image_height
<span class="hljs-attr">px</span> = - (px_screen - image_width / <span class="hljs-number">2.0</span>) * <span class="hljs-number">2.0</span> / image_width
<span class="hljs-attr">py</span> = - (py_screen - image_height / <span class="hljs-number">2.0</span>) * <span class="hljs-number">2.0</span>/ image_height
</code></pre>
</span></div></article></div><div class="docLastUpdate"><em>Last updated by Nikhila Ravi</em></div><div class="docs-prevnext"><a class="docs-prev button" href="/docs/renderer_getting_started"><span class="arrow-prev"></span><span>Getting Started</span></a></div></div></div><nav class="onPageNav"><ul class="toc-headings"><li><a href="#camera-coordinate-systems">Camera Coordinate Systems</a></li><li><a href="#defining-cameras-in-pytorch3d">Defining Cameras in PyTorch3D</a><ul class="toc-headings"><li><a href="#camera-types">Camera Types</a></li></ul></li></ul></nav></div><footer class="nav-footer" id="footer"><section class="sitemap"><div class="footerSection"><div class="social"><a class="github-button" href="https://github.com/facebookresearch/pytorch3d" data-count-href="https://github.com/facebookresearch/pytorch3d/stargazers" data-show-count="true" data-count-aria-label="# stargazers on GitHub" aria-label="Star PyTorch3D on GitHub">pytorch3d</a></div></div></section><a href="https://opensource.facebook.com/" target="_blank" rel="noreferrer noopener" class="fbOpenSource"><img src="/img/oss_logo.png" alt="Facebook Open Source" width="170" height="45"/></a><section class="copyright">Copyright © 2020 Facebook Inc<br/>Legal:<a href="https://opensource.facebook.com/legal/privacy/" target="_blank" rel="noreferrer noopener">Privacy</a><a href="https://opensource.facebook.com/legal/terms/" target="_blank" rel="noreferrer noopener">Terms</a></section></footer></div></body></html>