Initial commit

fbshipit-source-id: ad58e416e3ceeca85fae0583308968d04e78fe0d
2026-02-06 14:02:19 +08:00 · 2020-01-23 11:53:41 -08:00
commit dbf06b504b
211 changed files with 47362 additions and 0 deletions
--- a/docs/.gitignore
+++ b/docs/.gitignore
@@ -0,0 +1,7 @@
+source
+_build
+_static
+_template
+*-checkpoint.ipynb
+.ipynb_checkpoints
+.ipynb_checkpoints/**
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -0,0 +1,21 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+# Minimal makefile for Sphinx documentation
+
+# You can set these variables from the command line, and also
+# from the environment for the first two.
+SPHINXOPTS    =
+SPHINXBUILD   = sphinx-build
+SOURCEDIR     = .
+BUILDDIR      = _build
+
+# Put it first so that "make" without argument is like "make help".
+help:
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
--- a/docs/README.md
+++ b/docs/README.md
@@ -0,0 +1,78 @@
+
+## Setup
+
+### Install dependencies
+
+```
+pip install -U recommonmark mock sphinx sphinx_rtd_theme sphinx_markdown_tables
+```
+
+### Add symlink to the root README.md
+
+We want to include the root readme as an overview. Before generating the docs create a symlink to the root readme.
+
+```
+cd docs        
+ln -s ../README.md  overview.md
+```
+
+In `conf.py` for deployment this is done using `subprocess.call`.
+
+### Add a new file
+
+Add a new `.md` or `.rst` file and add the name to the doc tree in `index.rst` e.g
+
+```
+.. toctree::
+   :maxdepth: 1
+   :caption: Intro Documentation
+
+   overview
+```
+
+To autogenerate docs from docstrings in the source code, add the import path for the function e.g.
+
+```
+Chamfer Loss
+--------------------
+
+.. autoclass:: loss.chamfer.chamfer_distance
+    :members:
+    :undoc-members:
+
+    .. automethod:: __init__
+
+````
+
+### Build
+
+From `pytorch3d/docs` run:
+
+```
+> make html
+```
+
+The website is generated in `_build/html`.
+
+### Common Issues
+
+Sphinx can be fussy, and sometimes about things you weren’t expecting. For example, you might encounter something like:
+
+WARNING: toctree contains reference to nonexisting document u'overview'
+...
+checking consistency...
+<pytorch3d>/docs/overview.rst::
+WARNING: document isn't included in any toctree
+
+You might have indented overview in the .. toctree:: in index.rst with four spaces, when Sphinx is expecting three.
+
+
+### View
+
+Start a python simple server:
+
+```
+> python -m http.server
+```
+
+Navigate to: `http://0.0.0.0:8000/`
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -0,0 +1,200 @@
+# -*- coding: utf-8 -*-
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+
+# flake8: noqa
+
+# Configuration file for the Sphinx documentation builder.
+#
+# This file only contains a selection of the most common options. For a full
+# list see the documentation:
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#
+import os
+import sys
+
+import pytorch3d  # isort: skip
+
+import mock
+from recommonmark.parser import CommonMarkParser
+from recommonmark.states import DummyStateMachine
+from sphinx.builders.html import StandaloneHTMLBuilder
+from sphinx.ext.autodoc import between
+
+# Monkey patch to fix recommonmark 0.4 doc reference issues.
+orig_run_role = DummyStateMachine.run_role
+
+
+def run_role(self, name, options=None, content=None):
+    if name == "doc":
+        name = "any"
+    return orig_run_role(self, name, options, content)
+
+
+DummyStateMachine.run_role = run_role
+
+
+StandaloneHTMLBuilder.supported_image_types = [
+    "image/svg+xml",
+    "image/gif",
+    "image/png",
+    "image/jpeg",
+]
+
+# -- Path setup --------------------------------------------------------------
+
+
+sys.path.insert(0, os.path.abspath("../"))
+sys.path.insert(0, os.path.abspath("../pytorch3d"))
+sys.path.insert(0, os.path.abspath("../../"))
+
+DEPLOY = os.environ.get("READTHEDOCS") == "True"
+needs_sphinx = "1.7"
+
+
+# The short X.Y version
+version = pytorch3d.__version__
+# The full version, including alpha/beta/rc tags
+release = version
+
+try:
+    import torch  # noqa
+except ImportError:
+    for m in [
+        "torch",
+        "torchvision",
+        "torch.nn",
+        "torch.autograd",
+        "torch.autograd.function",
+        "torch.nn.modules",
+        "torch.nn.modules.utils",
+        "torch.utils",
+        "torch.utils.data",
+        "torchvision",
+        "torchvision.ops",
+    ]:
+        sys.modules[m] = mock.Mock(name=m)
+
+for m in ["cv2", "scipy", "numpy", "pytorch3d._C", "np.eye", "np.zeros"]:
+    sys.modules[m] = mock.Mock(name=m)
+
+# -- Project information -----------------------------------------------------
+
+project = "PyTorch3D"
+copyright = "2019, facebookresearch"
+author = "facebookresearch"
+
+# The full version, including alpha/beta/rc tags
+release = "v0.1"
+
+# -- General configuration ---------------------------------------------------
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+
+extensions = [
+    "sphinx_markdown_tables",
+    "sphinx.ext.autodoc",
+    "sphinx.ext.mathjax",
+    "sphinx.ext.napoleon",
+    "sphinx.ext.intersphinx",
+    "sphinx.ext.todo",
+    "sphinx.ext.coverage",
+    "sphinx.ext.viewcode",
+    "sphinx.ext.githubpages",
+]
+
+# -- Configurations for plugins ------------
+napoleon_google_docstring = True
+napoleon_include_init_with_doc = True
+napoleon_include_special_with_doc = True
+napoleon_numpy_docstring = False
+# napoleon_use_param = False
+napoleon_use_rtype = False
+autodoc_inherit_docstrings = False
+autodoc_member_order = "bysource"
+
+source_parsers = {".md": CommonMarkParser}
+
+
+# The suffix(es) of source filenames.
+# You can specify multiple suffix as a list of string:
+#
+source_suffix = [".rst", ".md"]
+
+# The master toctree document.
+master_doc = "index"
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ["_templates"]
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This pattern also affects html_static_path and html_extra_path.
+exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", "build", "README.md"]
+
+# The name of the Pygments (syntax highlighting) style to use.
+pygments_style = "sphinx"
+
+# -- Options for HTML output -------------------------------------------------
+
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+#
+html_theme = "sphinx_rtd_theme"
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ["_static"]
+
+html_theme_options = {"collapse_navigation": True}
+
+
+def url_resolver(url):
+    if ".html" not in url:
+        url = url.replace("../", "")
+        return (
+            "https://github.com/facebookresearch/pytorch3d/blob/master/" + url
+        )
+    else:
+        if DEPLOY:
+            return "http://pytorch3d.readthedocs.io/" + url
+        else:
+            return "/" + url
+
+
+def setup(app):
+    # Add symlink to root README
+    if DEPLOY:
+        import subprocess
+
+        subprocess.call(["ln", "-s", "../README.md", "overview.md"])
+
+    from recommonmark.transform import AutoStructify
+
+    app.add_config_value(
+        "recommonmark_config",
+        {
+            "url_resolver": url_resolver,
+            "auto_toc_tree_section": "Contents",
+            "enable_math": True,
+            "enable_inline_math": True,
+            "enable_eval_rst": True,
+            "enable_auto_toc_tree": True,
+        },
+        True,
+    )
+
+    # Register a sphinx.ext.autodoc.between listener to ignore everything
+    # between lines that contain the word IGNORE
+    app.connect(
+        "autodoc-process-docstring", between("^.*IGNORE.*$", exclude=True)
+    )
+    app.add_transform(AutoStructify)
+
+    return app
--- a/docs/figs/architecture_overview.png
+++ b/docs/figs/architecture_overview.png
--- a/docs/figs/batch_intro.png
+++ b/docs/figs/batch_intro.png
--- a/docs/figs/batch_modes.gif
+++ b/docs/figs/batch_modes.gif
--- a/docs/figs/fullset_batch_size_16.png
+++ b/docs/figs/fullset_batch_size_16.png
--- a/docs/figs/meshrcnn.png
+++ b/docs/figs/meshrcnn.png
--- a/docs/figs/opengl_coordframes.png
+++ b/docs/figs/opengl_coordframes.png
--- a/docs/figs/p3d_naive_vs_coarse.png
+++ b/docs/figs/p3d_naive_vs_coarse.png
--- a/docs/figs/p3d_vs_softras.png
+++ b/docs/figs/p3d_vs_softras.png
--- a/docs/figs/transformations_overview.png
+++ b/docs/figs/transformations_overview.png
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -0,0 +1,17 @@
+Welcome to PyTorch3D's documentation!
+=====================================
+
+PyTorch3D is a library of reusable components for Deep Learning with 3D data.
+
+Table of Contents
+=================
+
+.. toctree::
+   :maxdepth: 2
+
+   overview
+
+.. toctree::
+   :maxdepth: 2
+
+   modules/index
--- a/docs/modules/index.rst
+++ b/docs/modules/index.rst
@@ -0,0 +1,13 @@
+API Documentation
+==================
+
+.. toctree::
+
+    structures
+    io
+    loss
+    ops
+    renderer/index
+    transforms
+    utils 
+    
--- a/docs/modules/io.rst
+++ b/docs/modules/io.rst
@@ -0,0 +1,7 @@
+pytorch3d.io 
+===========================
+
+.. automodule:: pytorch3d.io
+    :members:
+    :undoc-members:
+    :show-inheritance:
--- a/docs/modules/loss.rst
+++ b/docs/modules/loss.rst
@@ -0,0 +1,9 @@
+pytorch3d.loss
+====================
+
+Loss functions for meshes and point clouds.
+
+.. automodule:: pytorch3d.loss
+    :members:
+    :undoc-members:
+    :show-inheritance:
--- a/docs/modules/ops.rst
+++ b/docs/modules/ops.rst
@@ -0,0 +1,6 @@
+pytorch3d.ops 
+===========================
+
+.. automodule:: pytorch3d.ops
+    :members:
+    :undoc-members:
--- a/docs/modules/renderer/blending.rst
+++ b/docs/modules/renderer/blending.rst
@@ -0,0 +1,7 @@
+blending
+===========================
+
+.. automodule:: pytorch3d.renderer.blending
+    :members:
+    :undoc-members:
+    :show-inheritance:
--- a/docs/modules/renderer/cameras.rst
+++ b/docs/modules/renderer/cameras.rst
@@ -0,0 +1,7 @@
+cameras
+===========================
+
+.. automodule:: pytorch3d.renderer.cameras
+    :members:
+    :undoc-members:
+    :show-inheritance:
--- a/docs/modules/renderer/index.rst
+++ b/docs/modules/renderer/index.rst
@@ -0,0 +1,15 @@
+pytorch3d.renderer 
+===========================
+
+.. toctree::
+
+    rasterizer
+    cameras
+    lighting
+    materials
+    texturing
+    blending
+    shading
+    shader
+    renderer
+    utils
--- a/docs/modules/renderer/lighting.rst
+++ b/docs/modules/renderer/lighting.rst
@@ -0,0 +1,6 @@
+lighting
+===========================
+
+.. automodule:: pytorch3d.renderer.lighting
+    :members:
+    :undoc-members:
--- a/docs/modules/renderer/materials.rst
+++ b/docs/modules/renderer/materials.rst
@@ -0,0 +1,7 @@
+materials
+===========================
+
+.. automodule:: pytorch3d.renderer.materials
+    :members:
+    :undoc-members:
+    :show-inheritance:
--- a/docs/modules/renderer/rasterizer.rst
+++ b/docs/modules/renderer/rasterizer.rst
@@ -0,0 +1,10 @@
+rasterizer
+===========================
+
+.. automodule:: pytorch3d.renderer.mesh.rasterize_meshes
+    :members:
+    :undoc-members:
+    
+.. automodule:: pytorch3d.renderer.mesh.rasterizer
+    :members:
+    :undoc-members:
--- a/docs/modules/renderer/shader.rst
+++ b/docs/modules/renderer/shader.rst
@@ -0,0 +1,7 @@
+shader
+===========================
+
+.. automodule:: pytorch3d.renderer.mesh.shader
+    :members:
+    :undoc-members:
+    
--- a/docs/modules/renderer/shading.rst
+++ b/docs/modules/renderer/shading.rst
@@ -0,0 +1,7 @@
+shading
+===========================
+
+.. automodule:: pytorch3d.renderer.mesh.shading
+    :members:
+    :undoc-members:
+    
--- a/docs/modules/renderer/texturing.rst
+++ b/docs/modules/renderer/texturing.rst
@@ -0,0 +1,7 @@
+texturing
+===========================
+
+.. automodule:: pytorch3d.renderer.mesh.texturing
+    :members:
+    :undoc-members:
+    
--- a/docs/modules/renderer/utils.rst
+++ b/docs/modules/renderer/utils.rst
@@ -0,0 +1,6 @@
+utils
+===========================
+
+.. automodule:: pytorch3d.renderer.utils
+    :members:
+    :undoc-members:
--- a/docs/modules/structures.rst
+++ b/docs/modules/structures.rst
@@ -0,0 +1,8 @@
+pytorch3d.structures 
+====================
+
+.. automodule:: pytorch3d.structures
+    :members:
+    :undoc-members:
+
+
--- a/docs/modules/transforms.rst
+++ b/docs/modules/transforms.rst
@@ -0,0 +1,7 @@
+pytorch3d.transforms 
+===========================
+
+.. automodule:: pytorch3d.transforms
+    :members:
+    :undoc-members:
+    :show-inheritance:
--- a/docs/modules/utils.rst
+++ b/docs/modules/utils.rst
@@ -0,0 +1,7 @@
+pytorch3d.utils
+====================
+
+.. automodule:: pytorch3d.utils
+    :members:
+    :undoc-members:
+    :show-inheritance:
--- a/docs/notes/batching.md
+++ b/docs/notes/batching.md
@@ -0,0 +1,27 @@
+# Batching
+
+In deep learning, every optimization step operates on multiple input examples for robust training. Thus, efficient batching is crucial. For image inputs, batching is straighforward; N images are resized to the same height and width and stacked as a 4 dimensional tensor of shape `N x 3 x H x W`. For meshes, batching is less straighforward.
+
+<img src="../figs/batch_intro.png" alt="batch_intro" align="middle"/>
+
+## Batch modes for meshes
+
+Assume you want to construct a batch containing two meshes, with `mesh1 = (v1: V1 x 3, f1: F1 x 3)` containing `V1` vertices and `F1` faces, and `mesh2 = (v2: V2 x 3, f2: F2 x 3)` with `V2 (!= V1)` vertices and `F2 (!= F1)` faces. The [Meshes][meshes] data structure provides three different ways to batch *heterogeneous* meshes. If `meshes = Meshes(verts = [v1, v2], faces = [f1, f2])` is an instantiation of the data structure, then
+
+* List: Returns the examples in the batch as a list of tensors. Specifically, `meshes.verts_list()` returns the list of vertices `[v1, v2]`. Similarly, `meshes.faces_list()` returns the list of faces `[f1, f2]`.
+* Padded: The padded representation constructs a tensor by padding the extra values. Specifically, `meshes.verts_padded()` returns a tensor of shape `2 x max(V1, V2) x 3` and pads the extra vertices with `0`s. Similarly, `meshes.faces_padded()` returns a tensor of shape `2 x max(F1, F2) x 3` and pads the extra faces with `-1`s.
+* Packed: The packed representation concatenates the examples in the batch into a tensor. In particular, `meshes.verts_packed()` returns a tensor of shape `(V1 + V2) x 3`. Similarly, `meshes.faces_packed()` returns a tensor of shape `(F1 + F2) x 3` for the faces. In the packed mode, auxiliary variables are computed that enable efficient conversion between packed and padded or list modes.
+
+<img src="../figs/batch_modes.gif" alt="batch_modes" height="450" align="middle" />
+
+## Use cases for batch modes
+
+The need for different mesh batch modes is inherent to the way pytorch operators are implemented. To fully utilize the optimized pytorch ops, the [Meshes][meshes] data structure allows for efficient conversion between the different batch modes. This is crucial when aiming for a fast and efficient training cycle. An example of this is [Mesh R-CNN][meshrcnn]. Here, in the same forward pass different parts of the network assume different inputs, which are computed by converting between the different batch modes. In particular, [vert_align][vert_align] assumes a *padded* input tensor while immediately after [graph_conv][graphconv] assumes a *packed* input tensor.
+
+<img src="../figs/meshrcnn.png" alt="meshrcnn" width="700" align="middle" />
+
+
+[meshes]: https://github.com/facebookresearch/pytorch3d/blob/master/pytorch3d/structures/meshes.py
+[graphconv]: https://github.com/facebookresearch/pytorch3d/blob/master/pytorch3d/ops/graph_conv.py
+[vert_align]: https://github.com/facebookresearch/pytorch3d/blob/master/pytorch3d/ops/vert_align.py
+[meshrcnn]: https://github.com/facebookresearch/meshrcnn
--- a/docs/notes/meshes_io.md
+++ b/docs/notes/meshes_io.md
@@ -0,0 +1,67 @@
+# Meshes and IO
+
+The Meshes object represents a batch of triangulated meshes, and is central to
+much of the functionality of pytorch3d. There is no insistence that each mesh in
+the batch has the same number of vertices or faces. When available, it can store
+other data which pertains to the mesh, for example face normals, face areas
+and textures.
+
+Two common file formats for storing single meshes are ".obj" and ".ply" files,
+and pytorch3d has functions for reading these.
+
+## OBJ
+
+Obj files have a standard way to store extra information about a mesh. Given an
+obj file, it can be read with
+
+```
+  verts, faces, aux = load_obj(filename)
+```
+
+which sets `verts` to be a (V,3)-tensor of vertices and `faces.verts_idx` to be
+an (F,3)- tensor of the vertex-indices of each of the corners of the faces.
+Faces which are not triangles will be split into triangles. `aux` is an object
+which may contain normals, uv coordinates, material colors and textures if they
+are present, and `faces` may additionally contain indices into these normals,
+textures and materials in its NamedTuple structure. A Meshes object containing a
+single mesh can be created from just the vertices and faces using
+```
+    meshes = Meshes(verts=[verts], faces=[faces.verts_idx])
+```
+
+If there is texture information in the `.obj` it can be used to initialize a
+`Textures` class which is passed into the `Meshes` constructor.  Currently we
+support loading of texture maps for meshes which have one texture map for the
+entire mesh e.g.
+
+```
+verts_uvs = aux.verts_uvs[None, ...]  # (1, V, 2)
+faces_uvs = faces.textures_idx[None, ...]  # (1, F, 3)
+tex_maps = aux.texture_images
+
+# tex_maps is a dictionary of {material name: texture image}.
+# Take the first image:
+texture_image = list(tex_maps.values())[0]
+texture_image = texture_image[None, ...]  # (1, H, W, 3)
+
+# Create a textures object
+tex = Textures(verts_uvs=verts_uvs, faces_uvs=faces_uvs, maps=texture_image)
+
+# Initialise the mesh with textures
+meshes = Meshes(verts=[verts], faces=[faces.verts_idx], textures=tex)
+```
+## PLY
+
+Ply files are flexible in the way they store additional information, pytorch3d
+provides a function just to read the vertices and faces from a ply file.
+The call
+```
+    verts, faces = load_ply(filename)
+```
+sets `verts` to be a (V,3)-tensor of vertices and `faces` to be an (F,3)-
+tensor of the vertex-indices of each of the corners of the faces. Faces which
+are not triangles will be split into triangles. A Meshes object containing a
+single mesh can be created from this data using
+```
+    meshes = Meshes(verts=[verts], faces=[faces])
+```
--- a/docs/notes/renderer.md
+++ b/docs/notes/renderer.md
@@ -0,0 +1,115 @@
+# Differentiable Rendering
+
+Differentiable rendering is a relatively new and exciting research area in computer vision, bridging the gap between 2D and 3D by allowing 2D image pixels to be related back to 3D properties of a scene.
+
+For example, by rendering an image from a 3D shape predicted by a neural network, it is possible to compute a 2D loss with a reference image. Inverting the rendering step means we can relate the 2D loss from the pixels back to the 3D properties of the shape such as the positions of mesh vertices, enabling 3D shapes to be learnt without any explicit 3D supervision.
+
+We extensively researched existing codebases for differentiable rendering and found that:
+- the rendering pipeline is complex with more than 7 separate components which need to interoperate and be differentiable
+- popular existing approaches [[1](#1), [2](#2)] are based on the same core implementation which bundles many of the key components into large CUDA kernels which require significant expertise to understand, and has limited scope for extensions  
+- existing methods either do not support batching or assume that meshes in a batch have the same number of vertices and faces
+- existing projects only provide CUDA implementations so they cannot be used without GPUs
+
+In order to experiment with different approaches, we wanted a modular implementation that is easy to use and extend, and supports [heterogeneous batching](batching.md).
+
+Taking inspiration from existing work [[1](#1), [2](#2)], we have created a new, modular, differentiable renderer with **parallel implementations in PyTorch, C++ and CUDA**, as well as comprehensive documentation and tests, with the aim of helping to further research in this field.
+
+Our implementation decouples the rasterization and shading steps of rendering. The core rasterization step (based on [[2]](#2)) returns several intermediate variables and has an optimized implementation in CUDA. The rest of the pipeline is implemented purely in PyTorch, and is designed to be customized and extended. With this approach, the PyTorch3d differentiable renderer can be imported as a library.
+
+## <u>Get started</u>
+
+To learn about more the implementation and start using the renderer refer to [renderer_getting_started.md](renderer_getting_started.md), which also contains the [architecture overview](../figs/architecture_overview.png) and [coordinate transformation conventions](../figs/transformations_overview.png).
+
+
+## <u>Key features</u>
+
+### 1. CUDA support for fast rasterization of large meshes
+
+We implemented modular CUDA kernels for the forward and backward pass of rasterization, adaptating a traditional graphics approach known as "coarse-to-fine" rasterization.
+
+First, the image is divided into a coarse grid and mesh faces are allocated to the grid cell in which they occur. This is followed by a refinement step which does pixel wise rasterization of the reduced subset of faces per grid cell. The grid cell size is a parameter which can be varied (`bin_size`).
+
+We additionally introduce a parameter `faces_per_pixel` which allows users to specify the top K faces which should be returned per pixel in the image (as opposed to traditional rasterization which returns only the index of the closest face in the mesh per pixel). The top K face properties can then be aggregated using different methods (such as the sigmoid/softmax approach proposed by Li et at in SoftRasterizer [[2]](#2)).
+
+We compared PyTorch3d with SoftRasterizer to measure the effect of both these design changes on the speed of rasterization. We selected a set of meshes of different sizes from ShapeNetV1 core, and rasterized one mesh in each batch to produce images of different sizes. We report the speed of the forward and backward passes.
+
+**Fig 1: PyTorch3d Naive vs Coarse-to-fine**
+
+This figure shows how the coarse-to-fine strategy for rasterization results in significant speed up compared to naive rasterization for large image size and large mesh sizes.
+
+<img src="../figs/p3d_naive_vs_coarse.png" width="1000">
+
+
+For small mesh and image sizes, the naive approach is slightly faster. We advise that you understand the data you are using and choose the rasterization setting which suits your performance requirements. It is easy to switch between the naive and coarse-to-fine options by adjusting the `bin_size` value when initializing the [rasterization settings](https://github.com/facebookresearch/pytorch3d/blob/master/pytorch3d/renderer/mesh/rasterizer.py#L26).
+
+Setting `bin_size = 0` will enable naive rasterization. If `bin_size > 0`, the coarse-to-fine approach is used. The default is `bin_size = None` in which case we set the bin size based on [heuristics](https://github.com/facebookresearch/pytorch3d/blob/master/pytorch3d/renderer/mesh/rasterize_meshes.py#L92).
+
+**Fig 2: PyTorch3d Coarse-to-fine vs SoftRasterizer**
+
+This figure shows the effect of the _combination_ of coarse-to-fine rasterization and caching the faces rasterized per pixel returned from the forward pass. For large meshes and image sizes, we again observe that the PyTorch3d rasterizer is significantly faster, noting that the speed is dominated by the forward pass and the backward pass is very fast.
+
+In the SoftRasterizer implementation, in both the forward and backward pass, there is a loop over every single face in the mesh for every pixel in the image. Therefore, the time for the full forward plus backward pass is ~2x the time for the forward pass. For small mesh and image sizes, the SoftRasterizer approach is slightly faster.
+
+<img src="../figs/p3d_vs_softras.png" width="1000">
+
+
+
+### 2. Support for Heterogeneous Batches
+
+PyTorch3d supports efficient rendering of batches of meshes where each mesh has different numbers of vertices and faces. This is done without using padded inputs.
+
+We again compare with SoftRasterizer which only supports batches of homogeneous meshes and test two cases: 1) a for loop over meshes in the batch, 2) padded inputs, and compare with the native heterogeneous batching support in PyTorch3d.
+
+We group meshes from ShapeNet into bins based on the number of faces in the mesh, and sample to compose a batch. We then render images of fixed size and measure the speed of the forward and backward passes.
+
+We tested with a range of increasingly large meshes and bin sizes.
+
+**Fig 3: PyTorch3d heterogeneous batching compared with SoftRasterizer**
+
+<img src="../figs/fullset_batch_size_16.png" width="700"/>
+
+This shows that for large meshes and large bin width (i.e. more variation in mesh size in the batch) the heterogeneous batching approach in PyTorch3d is faster than either of the workarounds with SoftRasterizer.
+
+(settings: batch size = 16, mesh sizes in bins ranging from 500-350k faces, image size = 64, faces per pixel = 100)
+
+---
+
+**NOTE: CUDA Memory usage**
+
+The SoftRasterizer forward CUDA kernel only outputs one `(N, H, W, 4)` FloatTensor compared with the PyTorch3d rasterizer forward CUDA kernel which outputs 4 tensors:
+
+  - `pix_to_face`, LongTensor `(N, H, W, K)`  
+  - `zbuf`, FloatTensor `(N, H, W, K)`
+  - `dist`, FloatTensor `(N, H, W, K)`
+  - `bary_coords`, FloatTensor `(N, H, W, K, 3)`
+
+where **N** = batch size, **H/W** are image height/width, **K** is the faces per pixel. The PyTorch3d backward pass returns gradients for `zbuf`, `dist` and `bary_coords`.
+
+Returning intermediate variables from rasterization has an associated memory cost. We can calculate the theoretical lower bound on the memory usage for the forward and backward pass as follows:
+
+```
+# Assume 4 bytes per float, and 8 bytes for long
+
+memory_forward_pass = ((N * H * W * K) * 2 + (N * H * W * K * 3)) * 4 + (N * H * W * K) * 8
+memory_backward_pass = ((N * H * W * K) * 2 + (N * H * W * K * 3)) * 4
+
+total_memory = memory_forward_pass + memory_backward_pass
+             = (N * H * W * K) * (5 * 4 * 2 + 8)
+             = (N * H * W * K) * 48
+```
+
+We need 48 bytes per face per pixel of the rasterized output. In order to remain within bounds for memory usage we can vary the batch size (**N**), image size (**H/W**) and faces per pixel (**K**).  For example, for a fixed batch size, if using a larger image size, try reducing the faces per pixel.
+
+---
+
+
+### 3. Modular design for easy experimentation and extensibility.
+
+We redesigned the rendering pipeline from the ground up to be modular and extensible and challenged many of the limitations in existing libraries. Refer to [renderer_getting_started.md](renderer_getting_started.md) for a detailed description of the architecture.
+
+
+### References
+
+<a id="1">[1]</a> Kato et al, 'Neural 3D Mesh Renderer', CVPR 2018
+
+<a id="2">[2]</a> Liu et al, 'Soft Rasterizer: A Differentiable Renderer for Image-based 3D Reasoning', ICCV 2019
--- a/docs/notes/renderer_getting_started.md
+++ b/docs/notes/renderer_getting_started.md
@@ -0,0 +1,81 @@
+# Renderer Getting Started
+
+### Architecture Overview
+
+The renderer is designed to be modular, extensible and support batching and gradients for all inputs. The following figure describes all the components of the rendering pipeline.
+
+<img src="../figs/architecture_overview.png" width="1000">
+
+##### Fragments
+
+The **rasterizer** returns 4 output tensors in a named tuple.
+
+- **`pix_to_face`**: LongTensor of shape `(N, image_size, image_size, faces_per_pixel)` specifying the indices of the faces (in the packed faces) which overlap each pixel in the image.
+- **`zbuf`**: FloatTensor of shape `(N, image_size, image_size, faces_per_pixel)` giving the z-coordinates of the nearest faces at each pixel in world coordinates, sorted in ascending z-order.
+- **`bary_coords`**: FloatTensor of shape `(N, image_size, image_size, faces_per_pixel, 3)`
+  giving the barycentric coordinates in NDC units of the nearest faces at each pixel, sorted in ascending z-order.
+- **`pix_dists`**: FloatTensor of shape `(N, image_size, image_size, faces_per_pixel)` giving the signed Euclidean distance (in NDC units) in the x/y plane of each point closest to the pixel.
+
+
+See the renderer API reference for more details about each component in the pipeline.
+
+---
+
+**NOTE:**
+
+The differentiable renderer API is experimental and subject to change!.
+
+---
+
+### Coordinate transformation conventions
+
+Rendering requires transformations between several different coordinate frames: world space, view/camera space, NDC space and screen space. At each step it is important to know where the camera is located, how the x,y,z axes are aligned and the possible range of values. The following figure outlines the conventions used PyTorch3d.
+
+<img src="../figs/transformations_overview.png" width="1000">
+
+
+
+---
+
+**NOTE: PyTorch3d vs OpenGL**
+
+While we tried to emulate several aspects of OpenGL, the NDC coordinate system in PyTorch3d is **right-handed** compared with a **left-handed** NDC coordinate system in OpenGL (the projection matrix switches the handedness).
+
+In OpenGL, the camera at the origin is looking along `-z` axis in camera space, but it is looking along the `+z` axis in NDC space.
+
+<img align="center" src="../figs/opengl_coordframes.png" width="300">
+
+---
+### A simple renderer
+
+A renderer in PyTorch3d is composed of a **rasterizer** and a **shader**. Create a renderer in a few simple steps:
+
+```
+# Imports
+from pytorch3d.renderer import (
+    OpenGLPerspectiveCameras, look_at_view_transform,
+    RasterizationSettings, BlendParams,
+    MeshRenderer, MeshRasterizer, PhongShader
+)
+
+# Initialize an OpenGL perspective camera.
+R, T = look_at_view_transform(2.7, 10, 20)
+cameras = OpenGLPerspectiveCameras(device=device, R=R, T=T)
+
+# Define the settings for rasterization and shading. Here we set the output image to be of size
+# 512x512. As we are rendering images for visualization purposes only we will set faces_per_pixel=1
+# and blur_radius=0.0. Refer to rasterize_meshes.py for explanations of these parameters.
+raster_settings = RasterizationSettings(
+    image_size=512,
+    blur_radius=0.0,
+    faces_per_pixel=1,
+    bin_size=0
+)
+
+# Create a phong renderer by composing a rasterizer and a shader. Here we can use a predefined
+# PhongShader, passing in the device on which to initialize the default parameters
+renderer = MeshRenderer(
+    rasterizer=MeshRasterizer(cameras=cameras, raster_settings=raster_settings),
+    shader=PhongShader(device=device, cameras=cameras)
+)
+```
--- a/docs/tutorials/bundle_adjustment.ipynb
+++ b/docs/tutorials/bundle_adjustment.ipynb
--- a/docs/tutorials/camera_position_optimization_with_differentiable_rendering.ipynb
+++ b/docs/tutorials/camera_position_optimization_with_differentiable_rendering.ipynb
--- a/docs/tutorials/data/bundle_adjustment_final.png
+++ b/docs/tutorials/data/bundle_adjustment_final.png
--- a/docs/tutorials/data/bundle_adjustment_initialization.png
+++ b/docs/tutorials/data/bundle_adjustment_initialization.png
--- a/docs/tutorials/data/camera_graph.pth
+++ b/docs/tutorials/data/camera_graph.pth
--- a/docs/tutorials/data/cow_mesh/README.md
+++ b/docs/tutorials/data/cow_mesh/README.md
@@ -0,0 +1,6 @@
+
+# Acknowledgements
+
+Thank you to Keenen Crane for allowing the cow mesh model to be used freely in the public domain.
+
+###### Source: http://www.cs.cmu.edu/~kmcrane/Projects/ModelRepository/
--- a/docs/tutorials/data/cow_mesh/cow.mtl
+++ b/docs/tutorials/data/cow_mesh/cow.mtl
@@ -0,0 +1,9 @@
+newmtl material_1
+map_Kd cow_texture.png
+
+# Test colors
+
+Ka 1.000 1.000 1.000  # white
+Kd 1.000 1.000 1.000  # white
+Ks 0.000 0.000 0.000  # black
+Ns 10.0
--- a/docs/tutorials/data/cow_mesh/cow.obj
+++ b/docs/tutorials/data/cow_mesh/cow.obj
--- a/docs/tutorials/data/cow_mesh/cow_texture.png
+++ b/docs/tutorials/data/cow_mesh/cow_texture.png
--- a/docs/tutorials/data/teapot.obj
+++ b/docs/tutorials/data/teapot.obj
--- a/docs/tutorials/deform_source_mesh_to_target_mesh.ipynb
+++ b/docs/tutorials/deform_source_mesh_to_target_mesh.ipynb
--- a/docs/tutorials/render_textured_meshes.ipynb
+++ b/docs/tutorials/render_textured_meshes.ipynb
--- a/docs/tutorials/utils/init.py
+++ b/docs/tutorials/utils/init.py
@@ -0,0 +1,8 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.
+
+from .camera_visualisation import (
+    get_camera_wireframe,
+    plot_camera_scene,
+    plot_cameras,
+)
+from .plot_image_grid import image_grid
--- a/docs/tutorials/utils/camera_visualisation.py
+++ b/docs/tutorials/utils/camera_visualisation.py
@@ -0,0 +1,71 @@
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.
+
+import matplotlib.pyplot as plt
+import torch
+from mpl_toolkits.mplot3d import Axes3D  # noqa: F401 unused import
+
+
+def get_camera_wireframe(scale: float = 0.3):
+    """
+    Returns a wireframe of a 3D line-plot of a camera symbol.
+    """
+    a = 0.5 * torch.tensor([-2, 1.5, 4])
+    b = 0.5 * torch.tensor([2, 1.5, 4])
+    c = 0.5 * torch.tensor([-2, -1.5, 4])
+    d = 0.5 * torch.tensor([2, -1.5, 4])
+    C = torch.zeros(3)
+    F = torch.tensor([0, 0, 3])
+    camera_points = [a, b, d, c, a, C, b, d, C, c, C, F]
+    lines = torch.stack([x.float() for x in camera_points]) * scale
+    return lines
+
+
+def plot_cameras(ax, cameras, color: str = "blue"):
+    """
+    Plots a set of `cameras` objects into the maplotlib axis `ax` with
+    color `color`.
+    """
+    cam_wires_canonical = get_camera_wireframe().cuda()[None]
+    cam_trans = cameras.get_world_to_view_transform().inverse()
+    cam_wires_trans = cam_trans.transform_points(cam_wires_canonical)
+    plot_handles = []
+    for wire in cam_wires_trans:
+        # the Z and Y axes are flipped intentionally here!
+        x_, z_, y_ = wire.detach().cpu().numpy().T.astype(float)
+        (h,) = ax.plot(x_, y_, z_, color=color, linewidth=0.3)
+        plot_handles.append(h)
+    return plot_handles
+
+
+def plot_camera_scene(cameras, cameras_gt, status: str):
+    """
+    Plots a set of predicted cameras `cameras` and their corresponding
+    ground truth locations `cameras_gt`. The plot is named with
+    a string passed inside the `status` argument.
+    """
+    fig = plt.figure()
+    ax = fig.gca(projection="3d")
+    ax.clear()
+    ax.set_title(status)
+    handle_cam = plot_cameras(ax, cameras, color="#FF7D1E")
+    handle_cam_gt = plot_cameras(ax, cameras_gt, color="#812CE5")
+    plot_radius = 3
+    ax.set_xlim3d([-plot_radius, plot_radius])
+    ax.set_ylim3d([3 - plot_radius, 3 + plot_radius])
+    ax.set_zlim3d([-plot_radius, plot_radius])
+    ax.set_xlabel("x")
+    ax.set_ylabel("z")
+    ax.set_zlabel("y")
+    labels_handles = {
+        "Estimated cameras": handle_cam[0],
+        "GT cameras": handle_cam_gt[0],
+    }
+    ax.legend(
+        labels_handles.values(),
+        labels_handles.keys(),
+        loc="upper center",
+        bbox_to_anchor=(0.5, 0),
+    )
+    plt.show()
+    return fig
--- a/docs/tutorials/utils/plot_image_grid.py
+++ b/docs/tutorials/utils/plot_image_grid.py
@@ -0,0 +1,54 @@
+#!/usr/bin/env python3
+# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.
+
+import matplotlib.pyplot as plt
+
+
+def image_grid(
+    images,
+    rows=None,
+    cols=None,
+    fill: bool = True,
+    show_axes: bool = False,
+    rgb: bool = True,
+):
+    """
+    A util function for plotting a grid of images.
+
+    Args:
+        images: (N, H, W, 4) array of RGBA images
+        rows: number of rows in the grid
+        cols: number of columns in the grid
+        fill: boolean indicating if the space between images should be filled
+        show_axes: boolean indicating if the axes of the plots should be visible
+        rgb: boolean, If True, only RGB channels are plotted.
+            If False, only the alpha channel is plotted.
+
+    Returns:
+        None
+    """
+    if (rows is None) != (cols is None):
+        raise ValueError("Specify either both rows and cols or neither.")
+
+    if rows is None:
+        rows = len(images)
+        cols = 1
+
+    gridspec_kw = {"wspace": 0.0, "hspace": 0.0} if fill else {}
+    fig, axarr = plt.subplots(
+        rows, cols, gridspec_kw=gridspec_kw, figsize=(15, 9)
+    )
+    bleed = 0
+    fig.subplots_adjust(
+        left=bleed, bottom=bleed, right=(1 - bleed), top=(1 - bleed)
+    )
+
+    for ax, im in zip(axarr.ravel(), images):
+        if rgb:
+            # only render RGB channels
+            ax.imshow(im[..., :3])
+        else:
+            # only render Alpha channel
+            ax.imshow(im[..., 3])
+        if not show_axes:
+            ax.set_axis_off()