pytorch3d/docs/renderer.html

<!DOCTYPE html><html lang="en"><head><meta charSet="utf-8"/><meta http-equiv="X-UA-Compatible" content="IE=edge"/><title>renderer · PyTorch3D</title><meta name="viewport" content="width=device-width, initial-scale=1.0"/><meta name="generator" content="Docusaurus"/><meta name="description" content="# Rendering Overview"/><meta name="docsearch:language" content="en"/><meta property="og:title" content="renderer · PyTorch3D"/><meta property="og:type" content="website"/><meta property="og:url" content="https://pytorch3d.org/"/><meta property="og:description" content="# Rendering Overview"/><meta property="og:image" content="https://pytorch3d.org/img/pytorch3dlogoicon.svg"/><meta name="twitter:card" content="summary"/><meta name="twitter:image" content="https://pytorch3d.org/img/pytorch3dlogoicon.svg"/><link rel="shortcut icon" href="/img/pytorch3dfavicon.png"/><link rel="stylesheet" href="//cdnjs.cloudflare.com/ajax/libs/highlight.js/9.12.0/styles/default.min.css"/><script>
              (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
              (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
              m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
              })(window,document,'script','https://www.google-analytics.com/analytics.js','ga');

              ga('create', 'UA-157376881-1', 'auto');
              ga('send', 'pageview');
            </script><script type="text/javascript" src="https://buttons.github.io/buttons.js"></script><script src="/js/scrollSpy.js"></script><link rel="stylesheet" href="/css/main.css"/><script src="/js/codetabs.js"></script></head><body class="sideNavVisible separateOnPageNav"><div class="fixedHeaderContainer"><div class="headerWrapper wrapper"><header><a href="/"><img class="logo" src="/img/pytorch3dfavicon.png" alt="PyTorch3D"/><h2 class="headerTitleWithLogo">PyTorch3D</h2></a><div class="navigationWrapper navigationSlider"><nav class="slidingNav"><ul class="nav-site nav-site-internal"><li class="siteNavGroupActive"><a href="/docs/why_pytorch3d" target="_self">Docs</a></li><li class=""><a href="/tutorials" target="_self">Tutorials</a></li><li class=""><a href="https://pytorch3d.readthedocs.io/" target="_self">API</a></li><li class=""><a href="https://github.com/facebookresearch/pytorch3d" target="_self">GitHub</a></li></ul></nav></div></header></div></div><div class="navPusher"><div class="docMainWrapper wrapper"><div class="docsNavContainer" id="docsNav"><nav class="toc"><div class="toggleNav"><section class="navWrapper wrapper"><div class="navBreadcrumb wrapper"><div class="navToggle" id="navToggler"><div class="hamburger-menu"><div class="line1"></div><div class="line2"></div><div class="line3"></div></div></div><h2><i>›</i><span>Renderer</span></h2><div class="tocToggler" id="tocToggler"><i class="icon-toc"></i></div></div><div class="navGroups"><div class="navGroup"><h3 class="navGroupCategoryTitle">Introduction</h3><ul class=""><li class="navListItem"><a class="navItem" href="/docs/why_pytorch3d">Why PyTorch3D</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle">Data</h3><ul class=""><li class="navListItem"><a class="navItem" href="/docs/io">File IO</a></li><li class="navListItem"><a class="navItem" href="/docs/meshes_io">Loading from file</a></li><li class="navListItem"><a class="navItem" href="/docs/datasets">Data loaders</a></li><li class="navListItem"><a class="navItem" href="/docs/batching">Batching</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle">Ops</h3><ul class=""><li class="navListItem"><a class="navItem" href="/docs/cubify">Cubify</a></li><li class="navListItem"><a class="navItem" href="/docs/iou3d">IoU3D</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle">Visualization</h3><ul class=""><li class="navListItem"><a class="navItem" href="/docs/visualization">Plotly Visualization</a></li></ul></div><div class="navGroup"><h3 class="navGroupCategoryTitle">Renderer</h3><ul class=""><li class="navListItem navListItemActive"><a class="navItem" href="/docs/renderer">Overview</a></li><li class="navListItem"><a class="navItem" href="/docs/renderer_getting_started">Getting Started</a></li><li class="navListItem"><a class="navItem" href="/docs/cameras">Cameras</a></li></ul></div></div></section></div><script>
            var coll = document.getElementsByClassName('collapsible');
            var checkActiveCategory = true;
            for (var i = 0; i < coll.length; i++) {
              var links = coll[i].nextElementSibling.getElementsByTagName('*');
              if (checkActiveCategory){
                for (var j = 0; j < links.length; j++) {
                  if (links[j].classList.contains('navListItemActive')){
                    coll[i].nextElementSibling.classList.toggle('hide');
                    coll[i].childNodes[1].classList.toggle('rotate');
                    checkActiveCategory = false;
                    break;
                  }
                }
              }

              coll[i].addEventListener('click', function() {
                var arrow = this.childNodes[1];
                arrow.classList.toggle('rotate');
                var content = this.nextElementSibling;
                content.classList.toggle('hide');
              });
            }

            document.addEventListener('DOMContentLoaded', function() {
              createToggler('#navToggler', '#docsNav', 'docsSliderActive');
              createToggler('#tocToggler', 'body', 'tocActive');

              var headings = document.querySelector('.toc-headings');
              headings && headings.addEventListener('click', function(event) {
                var el = event.target;
                while(el !== headings){
                  if (el.tagName === 'A') {
                    document.body.classList.remove('tocActive');
                    break;
                  } else{
                    el = el.parentNode;
                  }
                }
              }, false);

              function createToggler(togglerSelector, targetSelector, className) {
                var toggler = document.querySelector(togglerSelector);
                var target = document.querySelector(targetSelector);

                if (!toggler) {
                  return;
                }

                toggler.onclick = function(event) {
                  event.preventDefault();

                  target.classList.toggle(className);
                };
              }
            });
        </script></nav></div><div class="container mainContainer docsContainer"><div class="wrapper"><div class="post"><header class="postHeader"></header><article><div><span><h1><a class="anchor" aria-hidden="true" id="rendering-overview"></a><a href="#rendering-overview" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>Rendering Overview</h1>
<p>Differentiable rendering is a relatively new and exciting research area in computer vision, bridging the gap between 2D and 3D by allowing 2D image pixels to be related back to 3D properties of a scene.</p>
<p>For example, by rendering an image from a 3D shape predicted by a neural network, it is possible to compute a 2D loss with a reference image. Inverting the rendering step means we can relate the 2D loss from the pixels back to the 3D properties of the shape such as the positions of mesh vertices, enabling 3D shapes to be learnt without any explicit 3D supervision.</p>
<p>We extensively researched existing codebases for differentiable rendering and found that:</p>
<ul>
<li>the rendering pipeline is complex with more than 7 separate components which need to interoperate and be differentiable</li>
<li>popular existing approaches [<a href="#1">1</a>, <a href="#2">2</a>] are based on the same core implementation which bundles many of the key components into large CUDA kernels which require significant expertise to understand, and has limited scope for extensions</li>
<li>existing methods either do not support batching or assume that meshes in a batch have the same number of vertices and faces</li>
<li>existing projects only provide CUDA implementations so they cannot be used without GPUs</li>
</ul>
<p>In order to experiment with different approaches, we wanted a modular implementation that is easy to use and extend, and supports <a href="/docs/batching">heterogeneous batching</a>. Taking inspiration from existing work [<a href="#1">1</a>, <a href="#2">2</a>], we have created a new, modular, differentiable renderer with <strong>parallel implementations in PyTorch, C++ and CUDA</strong>, as well as comprehensive documentation and tests, with the aim of helping to further research in this field.</p>
<p>Our implementation decouples the rasterization and shading steps of rendering. The core rasterization step (based on <a href="#2">[2]</a>) returns several intermediate variables and has an optimized implementation in CUDA. The rest of the pipeline is implemented purely in PyTorch, and is designed to be customized and extended. With this approach, the PyTorch3D differentiable renderer can be imported as a library.</p>
<h2><a class="anchor" aria-hidden="true" id="uget-startedu"></a><a href="#uget-startedu" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a><u>Get started</u></h2>
<p>To learn about more the implementation and start using the renderer refer to <a href="/docs/renderer_getting_started">getting started with renderer</a>, which also contains the <a href="/docs/assets/architecture_renderer.jpg">architecture overview</a> and <a href="/docs/assets/transforms_overview.jpg">coordinate transformation conventions</a>.</p>
<h2><a class="anchor" aria-hidden="true" id="utech-reportu"></a><a href="#utech-reportu" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a><u>Tech Report</u></h2>
<p>For an in depth explanation of the renderer design, key features and benchmarks please refer to the PyTorch3D Technical Report on ArXiv: <a href="https://arxiv.org/abs/2007.08501">Accelerating 3D Deep Learning with PyTorch3D</a>, for the pulsar backend see here: <a href="https://arxiv.org/abs/2004.07484">Fast Differentiable Raycasting for Neural Rendering using Sphere-based Representations</a>.</p>
<hr>
<p><strong>NOTE: CUDA Memory usage</strong></p>
<p>The main comparison in the Technical Report is with SoftRasterizer [<a href="#2">2</a>]. The SoftRasterizer forward CUDA kernel only outputs one <code>(N, H, W, 4)</code> FloatTensor compared with the PyTorch3D rasterizer forward CUDA kernel which outputs 4 tensors:</p>
<ul>
<li><code>pix_to_face</code>, LongTensor <code>(N, H, W, K)</code></li>
<li><code>zbuf</code>, FloatTensor <code>(N, H, W, K)</code></li>
<li><code>dist</code>, FloatTensor <code>(N, H, W, K)</code></li>
<li><code>bary_coords</code>, FloatTensor <code>(N, H, W, K, 3)</code></li>
</ul>
<p>where <strong>N</strong> = batch size, <strong>H/W</strong> are image height/width, <strong>K</strong> is the faces per pixel. The PyTorch3D backward pass returns gradients for <code>zbuf</code>, <code>dist</code> and <code>bary_coords</code>.</p>
<p>Returning intermediate variables from rasterization has an associated memory cost. We can calculate the theoretical lower bound on the memory usage for the forward and backward pass as follows:</p>
<pre><code class="hljs"># Assume <span class="hljs-number">4</span> bytes per <span class="hljs-built_in">float</span>, <span class="hljs-keyword">and</span> <span class="hljs-number">8</span> bytes <span class="hljs-keyword">for</span> long

memory_forward_pass = ((N * H * W * K) * <span class="hljs-number">2</span> + (N * H * W * K * <span class="hljs-number">3</span>)) * <span class="hljs-number">4</span> + (N * H * W * K) * <span class="hljs-number">8</span>
memory_backward_pass = ((N * H * W * K) * <span class="hljs-number">2</span> + (N * H * W * K * <span class="hljs-number">3</span>)) * <span class="hljs-number">4</span>

total_memory = memory_forward_pass + memory_backward_pass
             = (N * H * W * K) * (<span class="hljs-number">5</span> * <span class="hljs-number">4</span> * <span class="hljs-number">2</span> + <span class="hljs-number">8</span>)
             = (N * H * W * K) * <span class="hljs-number">48</span>
</code></pre>
<p>We need 48 bytes per face per pixel of the rasterized output. In order to remain within bounds for memory usage we can vary the batch size (<strong>N</strong>), image size (<strong>H/W</strong>) and faces per pixel (<strong>K</strong>).  For example, for a fixed batch size, if using a larger image size, try reducing the faces per pixel.</p>
<hr>
<h3><a class="anchor" aria-hidden="true" id="references"></a><a href="#references" aria-hidden="true" class="hash-link"><svg class="hash-link-icon" aria-hidden="true" height="16" version="1.1" viewBox="0 0 16 16" width="16"><path fill-rule="evenodd" d="M4 9h1v1H4c-1.5 0-3-1.69-3-3.5S2.55 3 4 3h4c1.45 0 3 1.69 3 3.5 0 1.41-.91 2.72-2 3.25V8.59c.58-.45 1-1.27 1-2.09C10 5.22 8.98 4 8 4H4c-.98 0-2 1.22-2 2.5S3 9 4 9zm9-3h-1v1h1c1 0 2 1.22 2 2.5S13.98 12 13 12H9c-.98 0-2-1.22-2-2.5 0-.83.42-1.64 1-2.09V6.25c-1.09.53-2 1.84-2 3.25C6 11.31 7.55 13 9 13h4c1.45 0 3-1.69 3-3.5S14.5 6 13 6z"></path></svg></a>References</h3>
<p><a id="1">[1]</a> Kato et al, 'Neural 3D Mesh Renderer', CVPR 2018</p>
<p><a id="2">[2]</a> Liu et al, 'Soft Rasterizer: A Differentiable Renderer for Image-based 3D Reasoning', ICCV 2019</p>
<p><a id="3">[3]</a> Loper et al, 'OpenDR: An Approximate Differentiable Renderer', ECCV 2014</p>
<p><a id="4">[4]</a> De La Gorce et al, 'Model-based 3D Hand Pose Estimation from Monocular Video', PAMI 2011</p>
<p><a id="5">[5]</a> Li et al, 'Differentiable Monte Carlo Ray Tracing through Edge Sampling', SIGGRAPH Asia 2018</p>
<p><a id="6">[6]</a> Yifan et al, 'Differentiable Surface Splatting for Point-based Geometry Processing', SIGGRAPH Asia 2019</p>
<p><a id="7">[7]</a> Loubet et al, 'Reparameterizing Discontinuous Integrands for Differentiable Rendering', SIGGRAPH Asia 2019</p>
<p><a id="8">[8]</a> Chen et al, 'Learning to Predict 3D Objects with an Interpolation-based Differentiable Renderer', NeurIPS 2019</p>
</span></div></article></div><div class="docs-prevnext"><a class="docs-prev button" href="/docs/visualization"><span class="arrow-prev">← </span><span>Plotly Visualization</span></a><a class="docs-next button" href="/docs/renderer_getting_started"><span>Getting Started</span><span class="arrow-next"> →</span></a></div></div></div><nav class="onPageNav"><ul class="toc-headings"><li><a href="#uget-startedu"><u>Get started</u></a></li><li><a href="#utech-reportu"><u>Tech Report</u></a><ul class="toc-headings"><li><a href="#references">References</a></li></ul></li></ul></nav></div><footer class="nav-footer" id="footer"><section class="sitemap"><div class="footerSection"><div class="social"><a class="github-button" href="https://github.com/facebookresearch/pytorch3d" data-count-href="https://github.com/facebookresearch/pytorch3d/stargazers" data-show-count="true" data-count-aria-label="# stargazers on GitHub" aria-label="Star PyTorch3D on GitHub">pytorch3d</a></div></div></section><a href="https://opensource.facebook.com/" target="_blank" rel="noreferrer noopener" class="fbOpenSource"><img src="/img/oss_logo.png" alt="Facebook Open Source" width="170" height="45"/></a><section class="copyright">Copyright © 2024 Meta Platforms, Inc<br/>Legal:<a href="https://opensource.facebook.com/legal/privacy/" target="_blank" rel="noreferrer noopener">Privacy</a><a href="https://opensource.facebook.com/legal/terms/" target="_blank" rel="noreferrer noopener">Terms</a></section></footer></div></body></html>