mirror of
https://github.com/facebookresearch/pytorch3d.git
synced 2025-12-21 23:00:34 +08:00
Update latest version of site
This commit is contained in:
File diff suppressed because one or more lines are too long
@@ -1,4 +1,4 @@
|
||||
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
|
||||
# In[ ]:
|
||||
@@ -37,14 +37,19 @@
|
||||
|
||||
# If `torch`, `torchvision` and `pytorch3d` are not installed, run the following cell:
|
||||
|
||||
# In[1]:
|
||||
# In[ ]:
|
||||
|
||||
|
||||
get_ipython().system('pip install torch torchvision')
|
||||
get_ipython().system("pip install 'git+https://github.com/facebookresearch/pytorch3d.git@stable'")
|
||||
import sys
|
||||
import torch
|
||||
if torch.__version__=='1.6.0+cu101' and sys.platform.startswith('linux'):
|
||||
get_ipython().system('pip install pytorch3d')
|
||||
else:
|
||||
get_ipython().system("pip install 'git+https://github.com/facebookresearch/pytorch3d.git@stable'")
|
||||
|
||||
|
||||
# In[3]:
|
||||
# In[ ]:
|
||||
|
||||
|
||||
# imports
|
||||
@@ -64,11 +69,16 @@ sys.path.append(os.path.abspath(''))
|
||||
|
||||
# set for reproducibility
|
||||
torch.manual_seed(42)
|
||||
if torch.cuda.is_available():
|
||||
device = torch.device("cuda:0")
|
||||
else:
|
||||
device = torch.device("cpu")
|
||||
print("WARNING: CPU only, this will be slow!")
|
||||
|
||||
|
||||
# If using **Google Colab**, fetch the utils file for plotting the camera scene, and the ground truth camera positions:
|
||||
|
||||
# In[2]:
|
||||
# In[ ]:
|
||||
|
||||
|
||||
get_ipython().system('wget https://raw.githubusercontent.com/facebookresearch/pytorch3d/master/docs/tutorials/utils/camera_visualization.py')
|
||||
@@ -97,16 +107,16 @@ camera_graph_file = './data/camera_graph.pth'
|
||||
|
||||
# create the relative cameras
|
||||
cameras_relative = SfMPerspectiveCameras(
|
||||
R = R_relative.cuda(),
|
||||
T = T_relative.cuda(),
|
||||
device = "cuda",
|
||||
R = R_relative.to(device),
|
||||
T = T_relative.to(device),
|
||||
device = device,
|
||||
)
|
||||
|
||||
# create the absolute ground truth cameras
|
||||
cameras_absolute_gt = SfMPerspectiveCameras(
|
||||
R = R_absolute_gt.cuda(),
|
||||
T = T_absolute_gt.cuda(),
|
||||
device = "cuda",
|
||||
R = R_absolute_gt.to(device),
|
||||
T = T_absolute_gt.to(device),
|
||||
device = device,
|
||||
)
|
||||
|
||||
# the number of absolute camera positions
|
||||
@@ -152,7 +162,7 @@ def get_relative_camera(cams, edges):
|
||||
SfMPerspectiveCameras(
|
||||
R = cams.R[edges[:, i]],
|
||||
T = cams.T[edges[:, i]],
|
||||
device = "cuda",
|
||||
device = device,
|
||||
).get_world_to_view_transform()
|
||||
for i in (0, 1)
|
||||
]
|
||||
@@ -165,7 +175,7 @@ def get_relative_camera(cams, edges):
|
||||
cams_relative = SfMPerspectiveCameras(
|
||||
R = matrix_rel[:, :3, :3],
|
||||
T = matrix_rel[:, 3, :3],
|
||||
device = "cuda",
|
||||
device = device,
|
||||
)
|
||||
return cams_relative
|
||||
|
||||
@@ -180,12 +190,12 @@ def get_relative_camera(cams, edges):
|
||||
# `R_absolute = so3_exponential_map(log_R_absolute)`
|
||||
#
|
||||
|
||||
# In[8]:
|
||||
# In[ ]:
|
||||
|
||||
|
||||
# initialize the absolute log-rotations/translations with random entries
|
||||
log_R_absolute_init = torch.randn(N, 3).float().cuda()
|
||||
T_absolute_init = torch.randn(N, 3).float().cuda()
|
||||
log_R_absolute_init = torch.randn(N, 3, dtype=torch.float32, device=device)
|
||||
T_absolute_init = torch.randn(N, 3, dtype=torch.float32, device=device)
|
||||
|
||||
# furthermore, we know that the first camera is a trivial one
|
||||
# (see the description above)
|
||||
@@ -201,7 +211,7 @@ T_absolute.requires_grad = True
|
||||
# the mask the specifies which cameras are going to be optimized
|
||||
# (since we know the first camera is already correct,
|
||||
# we only optimize over the 2nd-to-last cameras)
|
||||
camera_mask = torch.ones(N, 1).float().cuda()
|
||||
camera_mask = torch.ones(N, 1, dtype=torch.float32, device=device)
|
||||
camera_mask[0] = 0.
|
||||
|
||||
# init the optimizer
|
||||
@@ -222,7 +232,7 @@ for it in range(n_iter):
|
||||
cameras_absolute = SfMPerspectiveCameras(
|
||||
R = R_absolute,
|
||||
T = T_absolute * camera_mask,
|
||||
device = "cuda",
|
||||
device = device,
|
||||
)
|
||||
|
||||
# compute the relative cameras as a compositon of the absolute cameras
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -1,4 +1,4 @@
|
||||
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
|
||||
# In[ ]:
|
||||
@@ -24,20 +24,25 @@
|
||||
|
||||
# If `torch`, `torchvision` and `pytorch3d` are not installed, run the following cell:
|
||||
|
||||
# In[1]:
|
||||
# In[ ]:
|
||||
|
||||
|
||||
get_ipython().system('pip install torch torchvision')
|
||||
get_ipython().system("pip install 'git+https://github.com/facebookresearch/pytorch3d.git@stable'")
|
||||
import sys
|
||||
import torch
|
||||
if torch.__version__=='1.6.0+cu101' and sys.platform.startswith('linux'):
|
||||
get_ipython().system('pip install pytorch3d')
|
||||
else:
|
||||
get_ipython().system("pip install 'git+https://github.com/facebookresearch/pytorch3d.git@stable'")
|
||||
|
||||
|
||||
# In[2]:
|
||||
# In[ ]:
|
||||
|
||||
|
||||
import os
|
||||
import torch
|
||||
import numpy as np
|
||||
from tqdm import tqdm_notebook
|
||||
from tqdm.notebook import tqdm
|
||||
import imageio
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
@@ -48,16 +53,16 @@ from skimage import img_as_ubyte
|
||||
from pytorch3d.io import load_obj
|
||||
|
||||
# datastructures
|
||||
from pytorch3d.structures import Meshes, Textures
|
||||
from pytorch3d.structures import Meshes
|
||||
|
||||
# 3D transformations functions
|
||||
from pytorch3d.transforms import Rotate, Translate
|
||||
|
||||
# rendering components
|
||||
from pytorch3d.renderer import (
|
||||
OpenGLPerspectiveCameras, look_at_view_transform, look_at_rotation,
|
||||
FoVPerspectiveCameras, look_at_view_transform, look_at_rotation,
|
||||
RasterizationSettings, MeshRenderer, MeshRasterizer, BlendParams,
|
||||
SoftSilhouetteShader, HardPhongShader, PointLights
|
||||
SoftSilhouetteShader, HardPhongShader, PointLights, TexturesVertex,
|
||||
)
|
||||
|
||||
|
||||
@@ -67,19 +72,22 @@ from pytorch3d.renderer import (
|
||||
|
||||
# If you are running this notebook locally after cloning the PyTorch3D repository, the mesh will already be available. **If using Google Colab, fetch the mesh and save it at the path `data/`**:
|
||||
|
||||
# In[2]:
|
||||
# In[ ]:
|
||||
|
||||
|
||||
get_ipython().system('mkdir -p data')
|
||||
get_ipython().system('wget -P data https://dl.fbaipublicfiles.com/pytorch3d/data/teapot/teapot.obj')
|
||||
|
||||
|
||||
# In[3]:
|
||||
# In[ ]:
|
||||
|
||||
|
||||
# Set the cuda device
|
||||
device = torch.device("cuda:0")
|
||||
torch.cuda.set_device(device)
|
||||
if torch.cuda.is_available():
|
||||
device = torch.device("cuda:0")
|
||||
torch.cuda.set_device(device)
|
||||
else:
|
||||
device = torch.device("cpu")
|
||||
|
||||
# Load the obj and ignore the textures and materials.
|
||||
verts, faces_idx, _ = load_obj("./data/teapot.obj")
|
||||
@@ -87,7 +95,7 @@ faces = faces_idx.verts_idx
|
||||
|
||||
# Initialize each vertex to be white in color.
|
||||
verts_rgb = torch.ones_like(verts)[None] # (1, V, 3)
|
||||
textures = Textures(verts_rgb=verts_rgb.to(device))
|
||||
textures = TexturesVertex(verts_features=verts_rgb.to(device))
|
||||
|
||||
# Create a Meshes object for the teapot. Here we have only one mesh in the batch.
|
||||
teapot_mesh = Meshes(
|
||||
@@ -107,11 +115,11 @@ teapot_mesh = Meshes(
|
||||
#
|
||||
# For optimizing the camera position we will use a renderer which produces a **silhouette** of the object only and does not apply any **lighting** or **shading**. We will also initialize another renderer which applies full **phong shading** and use this for visualizing the outputs.
|
||||
|
||||
# In[4]:
|
||||
# In[ ]:
|
||||
|
||||
|
||||
# Initialize an OpenGL perspective camera.
|
||||
cameras = OpenGLPerspectiveCameras(device=device)
|
||||
# Initialize a perspective camera.
|
||||
cameras = FoVPerspectiveCameras(device=device)
|
||||
|
||||
# To blend the 100 faces we set a few parameters which control the opacity and the sharpness of
|
||||
# edges. Refer to blending.py for more details.
|
||||
@@ -126,8 +134,6 @@ raster_settings = RasterizationSettings(
|
||||
image_size=256,
|
||||
blur_radius=np.log(1. / 1e-4 - 1.) * blend_params.sigma,
|
||||
faces_per_pixel=100,
|
||||
bin_size = None, # this setting controls whether naive or coarse-to-fine rasterization is used
|
||||
max_faces_per_bin = None # this setting is for coarse rasterization
|
||||
)
|
||||
|
||||
# Create a silhouette mesh renderer by composing a rasterizer and a shader.
|
||||
@@ -145,7 +151,6 @@ raster_settings = RasterizationSettings(
|
||||
image_size=256,
|
||||
blur_radius=0.0,
|
||||
faces_per_pixel=1,
|
||||
bin_size=0
|
||||
)
|
||||
# We can add a point light in front of the object.
|
||||
lights = PointLights(device=device, location=((2.0, 2.0, -2.0),))
|
||||
@@ -154,7 +159,7 @@ phong_renderer = MeshRenderer(
|
||||
cameras=cameras,
|
||||
raster_settings=raster_settings
|
||||
),
|
||||
shader=HardPhongShader(device=device, lights=lights)
|
||||
shader=HardPhongShader(device=device, cameras=cameras, lights=lights)
|
||||
)
|
||||
|
||||
|
||||
@@ -166,7 +171,7 @@ phong_renderer = MeshRenderer(
|
||||
#
|
||||
# We defined a camera which is positioned on the positive z axis hence sees the spout to the right.
|
||||
|
||||
# In[5]:
|
||||
# In[ ]:
|
||||
|
||||
|
||||
# Select the viewpoint using spherical angles
|
||||
@@ -197,7 +202,7 @@ plt.grid(False)
|
||||
#
|
||||
# Here we create a simple model class and initialize a parameter for the camera position.
|
||||
|
||||
# In[17]:
|
||||
# In[ ]:
|
||||
|
||||
|
||||
class Model(nn.Module):
|
||||
@@ -234,7 +239,7 @@ class Model(nn.Module):
|
||||
#
|
||||
# Now we can create an instance of the **model** above and set up an **optimizer** for the camera position parameter.
|
||||
|
||||
# In[18]:
|
||||
# In[ ]:
|
||||
|
||||
|
||||
# We will save images periodically and compose them into a GIF.
|
||||
@@ -250,7 +255,7 @@ optimizer = torch.optim.Adam(model.parameters(), lr=0.05)
|
||||
|
||||
# ### Visualize the starting position and the reference position
|
||||
|
||||
# In[19]:
|
||||
# In[ ]:
|
||||
|
||||
|
||||
plt.figure(figsize=(10, 10))
|
||||
@@ -264,17 +269,17 @@ plt.title("Starting position")
|
||||
plt.subplot(1, 2, 2)
|
||||
plt.imshow(model.image_ref.cpu().numpy().squeeze())
|
||||
plt.grid(False)
|
||||
plt.title("Reference silhouette")
|
||||
plt.title("Reference silhouette");
|
||||
|
||||
|
||||
# ## 4. Run the optimization
|
||||
#
|
||||
# We run several iterations of the forward and backward pass and save outputs every 10 iterations. When this has finished take a look at `./teapot_optimization_demo.gif` for a cool gif of the optimization process!
|
||||
|
||||
# In[20]:
|
||||
# In[ ]:
|
||||
|
||||
|
||||
loop = tqdm_notebook(range(200))
|
||||
loop = tqdm(range(200))
|
||||
for i in loop:
|
||||
optimizer.zero_grad()
|
||||
loss, _ = model()
|
||||
|
||||
541
files/dataloaders_ShapeNetCore_R2N2.ipynb
Normal file
541
files/dataloaders_ShapeNetCore_R2N2.ipynb
Normal file
@@ -0,0 +1,541 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Dataloaders for ShapeNetCore and R2N2\n",
|
||||
"This tutorial shows how to:\n",
|
||||
"- Load models from ShapeNetCore and R2N2 using PyTorch3D's data loaders.\n",
|
||||
"- Pass the loaded datasets to `torch.utils.data.DataLoader`.\n",
|
||||
"- Render ShapeNetCore models with PyTorch3D's renderer.\n",
|
||||
"- Render R2N2 models with the same orientations as the original renderings in the dataset.\n",
|
||||
"- Visualize R2N2 model voxels."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 0. Install and import modules"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If `torch`, `torchvision` and `pytorch3d` are not installed, run the following cell:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install torch torchvision\n",
|
||||
"import sys\n",
|
||||
"import torch\n",
|
||||
"if torch.__version__=='1.6.0+cu101' and sys.platform.startswith('linux'):\n",
|
||||
" !pip install pytorch3d\n",
|
||||
"else:\n",
|
||||
" !pip install 'git+https://github.com/facebookresearch/pytorch3d.git@stable'"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import numpy as np\n",
|
||||
"import torch\n",
|
||||
"\n",
|
||||
"from pytorch3d.datasets import (\n",
|
||||
" R2N2,\n",
|
||||
" ShapeNetCore,\n",
|
||||
" collate_batched_meshes,\n",
|
||||
" render_cubified_voxels,\n",
|
||||
")\n",
|
||||
"from pytorch3d.renderer import (\n",
|
||||
" OpenGLPerspectiveCameras,\n",
|
||||
" PointLights,\n",
|
||||
" RasterizationSettings,\n",
|
||||
" TexturesVertex,\n",
|
||||
" look_at_view_transform,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"from pytorch3d.structures import Meshes\n",
|
||||
"from torch.utils.data import DataLoader\n",
|
||||
"\n",
|
||||
"# add path for demo utils functions \n",
|
||||
"import sys\n",
|
||||
"import os\n",
|
||||
"sys.path.append(os.path.abspath(''))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If using **Google Colab**, fetch the utils file for plotting image grids:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!wget https://raw.githubusercontent.com/facebookresearch/pytorch3d/master/docs/tutorials/utils/plot_image_grid.py\n",
|
||||
"from plot_image_grid import image_grid"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"OR if running locally uncomment and run the following cell:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# from utils import image_grid"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 1. Load the datasets"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If you haven't already downloaded the ShapeNetCore dataset, first do that following the instructions here: https://www.shapenet.org/. ShapeNetCore is a subset of the ShapeNet dataset. In PyTorch3D we support both version 1 (57 categories) and version 2 (55 categories).\n",
|
||||
"\n",
|
||||
"Then modify `SHAPENET_PATH` below to you local path to the ShapeNetCore dataset folder. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Setup\n",
|
||||
"if torch.cuda.is_available():\n",
|
||||
" device = torch.device(\"cuda:0\")\n",
|
||||
" torch.cuda.set_device(device)\n",
|
||||
"else:\n",
|
||||
" device = torch.device(\"cpu\")\n",
|
||||
" \n",
|
||||
"SHAPENET_PATH = \"\"\n",
|
||||
"shapenet_dataset = ShapeNetCore(SHAPENET_PATH)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"The R2N2 dataset can be downloaded using the instructions here: http://3d-r2n2.stanford.edu/. Look at the links for `ShapeNetRendering` and `ShapeNetVox32`. The R2N2 dataset contains 13 categories that are a subset of the ShapeNetCore v.1\n",
|
||||
"dataset. The R2N2 dataset also contains its own 24 renderings of each object and voxelized models.\n",
|
||||
"\n",
|
||||
"Then modify `R2N2_PATH` and `SPLITS_PATH` below to your local R2N2 dataset folder path and splits file path respectively. Here we will load the `train` split of R2N2 and ask the voxels of each model to be returned."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"R2N2_PATH = \"\"\n",
|
||||
"SPLITS_PATH = \"None\"\n",
|
||||
"r2n2_dataset = R2N2(\"train\", SHAPENET_PATH, R2N2_PATH, SPLITS_PATH, return_voxels=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We can retrieve a model by indexing into the loaded dataset. For both ShapeNetCore and R2N2, we can examine the category this model belongs to (in the form of a synset id, equivalend to wnid described in ImageNet's API: http://image-net.org/download-API), its model id, and its vertices and faces."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"shapenet_model = shapenet_dataset[6]\n",
|
||||
"print(\"This model belongs to the category \" + shapenet_model[\"synset_id\"] + \".\")\n",
|
||||
"print(\"This model has model id \" + shapenet_model[\"model_id\"] + \".\")\n",
|
||||
"model_verts, model_faces = shapenet_model[\"verts\"], shapenet_model[\"faces\"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We can use its vertices and faces to form a `Meshes` object which is a PyTorch3D datastructure for working with batched meshes."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"model_textures = TexturesVertex(verts_features=torch.ones_like(model_verts, device=device)[None])\n",
|
||||
"shapenet_model_mesh = Meshes(\n",
|
||||
" verts=[model_verts.to(device)], \n",
|
||||
" faces=[model_faces.to(device)],\n",
|
||||
" textures=model_textures\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"With R2N2, we can further examine R2N2's original renderings. For instance, if we would like to see the second and third views of the eleventh objects in the R2N2 dataset, we can do the following:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"r2n2_renderings = r2n2_dataset[10,[1,2]]\n",
|
||||
"image_grid(r2n2_renderings.numpy(), rows=1, cols=2, rgb=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 2. Use the datasets with `torch.utils.data.DataLoader`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Training deep learning models, usually requires passing in batches of inputs. The `torch.utils.data.DataLoader` from Pytorch helps us do this. PyTorch3D provides a function `collate_batched_meshes` to group the input meshes into a single `Meshes` object which represents the batch. The `Meshes` datastructure can then be used directly by other PyTorch3D ops which might be part of the deep learning model (e.g. `graph_conv`).\n",
|
||||
"\n",
|
||||
"For R2N2, if all the models in the batch have the same number of views, the views, rotation matrices, translation matrices, intrinsic matrices and voxels will also be stacked into batched tensors.\n",
|
||||
"\n",
|
||||
"**NOTE**: All models in the `val` split of R2N2 have 24 views, but there are 8 models that split their 24 views between `train` and `test` splits, in which case `collate_batched_meshes` will only be able to join the matrices, views and voxels as lists. However, this can be avoided by laoding only one view of each model by setting `return_all_views = False`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"batch_size = 12\n",
|
||||
"r2n2_single_view = R2N2(\"train\", SHAPENET_PATH, R2N2_PATH, SPLITS_PATH, return_all_views=False, return_voxels=True)\n",
|
||||
"r2n2_loader = DataLoader(r2n2_single_view, batch_size=batch_size, collate_fn=collate_batched_meshes)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Let's visualize all the views (one for each model) in the batch:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"it = iter(r2n2_loader)\n",
|
||||
"r2n2_batch = next(it)\n",
|
||||
"batch_renderings = r2n2_batch[\"images\"] # (N, V, H, W, 3), and in this case V is 1.\n",
|
||||
"image_grid(batch_renderings.squeeze().numpy(), rows=3, cols=4, rgb=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 3. Render ShapeNetCore models with PyTorch3D's differntiable renderer"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Both `ShapeNetCore` and `R2N2` dataloaders have customized `render` functions that support rendering models by specifying their model ids, categories or indices using PyTorch3D's differentiable renderer implementation."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Rendering settings.\n",
|
||||
"R, T = look_at_view_transform(1.0, 1.0, 90)\n",
|
||||
"cameras = OpenGLPerspectiveCameras(R=R, T=T, device=device)\n",
|
||||
"raster_settings = RasterizationSettings(image_size=512)\n",
|
||||
"lights = PointLights(location=torch.tensor([0.0, 1.0, -2.0], device=device)[None],device=device)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"First we will try to render three models by their model ids:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"images_by_model_ids = shapenet_dataset.render(\n",
|
||||
" model_ids=[\n",
|
||||
" \"13394ca47c89f91525a3aaf903a41c90\",\n",
|
||||
" \"14755c2ee8e693aba508f621166382b0\",\n",
|
||||
" \"156c4207af6d2c8f1fdc97905708b8ea\",\n",
|
||||
" ],\n",
|
||||
" device=device,\n",
|
||||
" cameras=cameras,\n",
|
||||
" raster_settings=raster_settings,\n",
|
||||
" lights=lights,\n",
|
||||
")\n",
|
||||
"image_grid(images_by_model_ids.cpu().numpy(), rows=1, cols=3, rgb=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Suppose we would like to render the first three models in the dataset, we can render models by their indices:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"images_by_idxs = shapenet_dataset.render(\n",
|
||||
" idxs=list(range(3)),\n",
|
||||
" device=device,\n",
|
||||
" cameras=cameras,\n",
|
||||
" raster_settings=raster_settings,\n",
|
||||
" lights=lights,\n",
|
||||
")\n",
|
||||
"image_grid(images_by_idxs.cpu().numpy(), rows=1, cols=3, rgb=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Alternatively, if we are not interested in any particular models but would like see random models from some specific categories, we can do that by specifying `categories` and `sample_nums`. For example, if we would like to render 2 models from the category \"faucet\" and 3 models from the category \"chair\", we can do the following:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"images_by_categories = shapenet_dataset.render(\n",
|
||||
" categories=[\"faucet\", \"chair\"],\n",
|
||||
" sample_nums=[2, 3],\n",
|
||||
" device=device,\n",
|
||||
" cameras=cameras,\n",
|
||||
" raster_settings=raster_settings,\n",
|
||||
" lights=lights,\n",
|
||||
")\n",
|
||||
"image_grid(images_by_categories.cpu().numpy(), rows=1, cols=5, rgb=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"If we are not interested in any particular categories and just would like to render some random models from the whole dataset, we can set the number of models to be rendered in `sample_nums` and not specify any `categories`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"random_model_images = shapenet_dataset.render(\n",
|
||||
" sample_nums=[3],\n",
|
||||
" device=device,\n",
|
||||
" cameras=cameras,\n",
|
||||
" raster_settings=raster_settings,\n",
|
||||
" lights=lights,\n",
|
||||
")\n",
|
||||
"image_grid(random_model_images.cpu().numpy(), rows=1, cols=5, rgb=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 4. Render R2N2 models with the same orientations as the original renderings in the dataset"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"We can render R2N2 models the same way as we rendered ShapeNetCore models above. In addition, we can also render R2N2 models with the same orientations as the original renderings in the dataset. For this we will use R2N2's customized `render` function and a different type of PyTorch3D camera called `BlenderCamera`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"In this example, we will render the seventh model with the same orientations as its second and third views. First we will retrieve R2N2's original renderings to compare with the result."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"original_rendering = r2n2_dataset[6,[1,2]][\"images\"]\n",
|
||||
"image_grid(original_rendering.numpy(), rows=1, cols=2, rgb=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Next, we will visualize PyTorch3d's renderings:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"r2n2_oriented_images = r2n2_dataset.render(\n",
|
||||
" idxs=[6],\n",
|
||||
" view_idxs=[1,2],\n",
|
||||
" device=device,\n",
|
||||
" raster_settings=raster_settings,\n",
|
||||
" lights=lights,\n",
|
||||
")\n",
|
||||
"image_grid(r2n2_oriented_images.cpu().numpy(), rows=1, cols=2, rgb=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 5. Visualize R2N2 models' voxels"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"R2N2 dataloader also returns models' voxels. We can visualize them by utilizing R2N2's `render_vox_to_mesh` function. This will cubify the voxels to a Meshes object, which will then be rendered."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"In this example we will visualize the tenth model in the dataset with the same orientation of its second and third views. First we will retrieve R2N2's original renderings to compare with the result."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"r2n2_model = r2n2_dataset[9,[1,2]]\n",
|
||||
"original_rendering = r2n2_model[\"images\"]\n",
|
||||
"image_grid(original_rendering.numpy(), rows=1, cols=2, rgb=True)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Next, we will pass the voxels to `render_vox_to_mesh`:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"vox_render = render_cubified_voxels(r2n2_model[\"voxels\"], device=device)\n",
|
||||
"image_grid(vox_render.cpu().numpy(), rows=1, cols=2, rgb=True)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"anp_metadata": {
|
||||
"path": "fbsource/fbcode/vision/fair/pytorch3d/docs/tutorials/Dataloaders_ShapeNetCore_R2N2.ipynb"
|
||||
},
|
||||
"bento_stylesheets": {
|
||||
"bento/extensions/flow/main.css": true,
|
||||
"bento/extensions/kernel_selector/main.css": true,
|
||||
"bento/extensions/kernel_ui/main.css": true,
|
||||
"bento/extensions/new_kernel/main.css": true,
|
||||
"bento/extensions/system_usage/main.css": true,
|
||||
"bento/extensions/theme/main.css": true
|
||||
},
|
||||
"disseminate_notebook_info": {
|
||||
"backup_notebook_id": "669429066983805"
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "intro_to_cv",
|
||||
"language": "python",
|
||||
"name": "bento_kernel_intro_to_cv"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.5+"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
301
files/dataloaders_ShapeNetCore_R2N2.py
Normal file
301
files/dataloaders_ShapeNetCore_R2N2.py
Normal file
@@ -0,0 +1,301 @@
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.
|
||||
|
||||
|
||||
# # Dataloaders for ShapeNetCore and R2N2
|
||||
# This tutorial shows how to:
|
||||
# - Load models from ShapeNetCore and R2N2 using PyTorch3D's data loaders.
|
||||
# - Pass the loaded datasets to `torch.utils.data.DataLoader`.
|
||||
# - Render ShapeNetCore models with PyTorch3D's renderer.
|
||||
# - Render R2N2 models with the same orientations as the original renderings in the dataset.
|
||||
# - Visualize R2N2 model voxels.
|
||||
|
||||
# ## 0. Install and import modules
|
||||
|
||||
# If `torch`, `torchvision` and `pytorch3d` are not installed, run the following cell:
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
get_ipython().system('pip install torch torchvision')
|
||||
import sys
|
||||
import torch
|
||||
if torch.__version__=='1.6.0+cu101' and sys.platform.startswith('linux'):
|
||||
get_ipython().system('pip install pytorch3d')
|
||||
else:
|
||||
get_ipython().system("pip install 'git+https://github.com/facebookresearch/pytorch3d.git@stable'")
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
|
||||
from pytorch3d.datasets import (
|
||||
R2N2,
|
||||
ShapeNetCore,
|
||||
collate_batched_meshes,
|
||||
render_cubified_voxels,
|
||||
)
|
||||
from pytorch3d.renderer import (
|
||||
OpenGLPerspectiveCameras,
|
||||
PointLights,
|
||||
RasterizationSettings,
|
||||
TexturesVertex,
|
||||
look_at_view_transform,
|
||||
)
|
||||
|
||||
from pytorch3d.structures import Meshes
|
||||
from torch.utils.data import DataLoader
|
||||
|
||||
# add path for demo utils functions
|
||||
import sys
|
||||
import os
|
||||
sys.path.append(os.path.abspath(''))
|
||||
|
||||
|
||||
# If using **Google Colab**, fetch the utils file for plotting image grids:
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
get_ipython().system('wget https://raw.githubusercontent.com/facebookresearch/pytorch3d/master/docs/tutorials/utils/plot_image_grid.py')
|
||||
from plot_image_grid import image_grid
|
||||
|
||||
|
||||
# OR if running locally uncomment and run the following cell:
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
# from utils import image_grid
|
||||
|
||||
|
||||
# ## 1. Load the datasets
|
||||
|
||||
# If you haven't already downloaded the ShapeNetCore dataset, first do that following the instructions here: https://www.shapenet.org/. ShapeNetCore is a subset of the ShapeNet dataset. In PyTorch3D we support both version 1 (57 categories) and version 2 (55 categories).
|
||||
#
|
||||
# Then modify `SHAPENET_PATH` below to you local path to the ShapeNetCore dataset folder.
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
# Setup
|
||||
if torch.cuda.is_available():
|
||||
device = torch.device("cuda:0")
|
||||
torch.cuda.set_device(device)
|
||||
else:
|
||||
device = torch.device("cpu")
|
||||
|
||||
SHAPENET_PATH = ""
|
||||
shapenet_dataset = ShapeNetCore(SHAPENET_PATH)
|
||||
|
||||
|
||||
# The R2N2 dataset can be downloaded using the instructions here: http://3d-r2n2.stanford.edu/. Look at the links for `ShapeNetRendering` and `ShapeNetVox32`. The R2N2 dataset contains 13 categories that are a subset of the ShapeNetCore v.1
|
||||
# dataset. The R2N2 dataset also contains its own 24 renderings of each object and voxelized models.
|
||||
#
|
||||
# Then modify `R2N2_PATH` and `SPLITS_PATH` below to your local R2N2 dataset folder path and splits file path respectively. Here we will load the `train` split of R2N2 and ask the voxels of each model to be returned.
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
R2N2_PATH = ""
|
||||
SPLITS_PATH = "None"
|
||||
r2n2_dataset = R2N2("train", SHAPENET_PATH, R2N2_PATH, SPLITS_PATH, return_voxels=True)
|
||||
|
||||
|
||||
# We can retrieve a model by indexing into the loaded dataset. For both ShapeNetCore and R2N2, we can examine the category this model belongs to (in the form of a synset id, equivalend to wnid described in ImageNet's API: http://image-net.org/download-API), its model id, and its vertices and faces.
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
shapenet_model = shapenet_dataset[6]
|
||||
print("This model belongs to the category " + shapenet_model["synset_id"] + ".")
|
||||
print("This model has model id " + shapenet_model["model_id"] + ".")
|
||||
model_verts, model_faces = shapenet_model["verts"], shapenet_model["faces"]
|
||||
|
||||
|
||||
# We can use its vertices and faces to form a `Meshes` object which is a PyTorch3D datastructure for working with batched meshes.
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
model_textures = TexturesVertex(verts_features=torch.ones_like(model_verts, device=device)[None])
|
||||
shapenet_model_mesh = Meshes(
|
||||
verts=[model_verts.to(device)],
|
||||
faces=[model_faces.to(device)],
|
||||
textures=model_textures
|
||||
)
|
||||
|
||||
|
||||
# With R2N2, we can further examine R2N2's original renderings. For instance, if we would like to see the second and third views of the eleventh objects in the R2N2 dataset, we can do the following:
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
r2n2_renderings = r2n2_dataset[10,[1,2]]
|
||||
image_grid(r2n2_renderings.numpy(), rows=1, cols=2, rgb=True)
|
||||
|
||||
|
||||
# ## 2. Use the datasets with `torch.utils.data.DataLoader`
|
||||
|
||||
# Training deep learning models, usually requires passing in batches of inputs. The `torch.utils.data.DataLoader` from Pytorch helps us do this. PyTorch3D provides a function `collate_batched_meshes` to group the input meshes into a single `Meshes` object which represents the batch. The `Meshes` datastructure can then be used directly by other PyTorch3D ops which might be part of the deep learning model (e.g. `graph_conv`).
|
||||
#
|
||||
# For R2N2, if all the models in the batch have the same number of views, the views, rotation matrices, translation matrices, intrinsic matrices and voxels will also be stacked into batched tensors.
|
||||
#
|
||||
# **NOTE**: All models in the `val` split of R2N2 have 24 views, but there are 8 models that split their 24 views between `train` and `test` splits, in which case `collate_batched_meshes` will only be able to join the matrices, views and voxels as lists. However, this can be avoided by laoding only one view of each model by setting `return_all_views = False`.
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
batch_size = 12
|
||||
r2n2_single_view = R2N2("train", SHAPENET_PATH, R2N2_PATH, SPLITS_PATH, return_all_views=False, return_voxels=True)
|
||||
r2n2_loader = DataLoader(r2n2_single_view, batch_size=batch_size, collate_fn=collate_batched_meshes)
|
||||
|
||||
|
||||
# Let's visualize all the views (one for each model) in the batch:
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
it = iter(r2n2_loader)
|
||||
r2n2_batch = next(it)
|
||||
batch_renderings = r2n2_batch["images"] # (N, V, H, W, 3), and in this case V is 1.
|
||||
image_grid(batch_renderings.squeeze().numpy(), rows=3, cols=4, rgb=True)
|
||||
|
||||
|
||||
# ## 3. Render ShapeNetCore models with PyTorch3D's differntiable renderer
|
||||
|
||||
# Both `ShapeNetCore` and `R2N2` dataloaders have customized `render` functions that support rendering models by specifying their model ids, categories or indices using PyTorch3D's differentiable renderer implementation.
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
# Rendering settings.
|
||||
R, T = look_at_view_transform(1.0, 1.0, 90)
|
||||
cameras = OpenGLPerspectiveCameras(R=R, T=T, device=device)
|
||||
raster_settings = RasterizationSettings(image_size=512)
|
||||
lights = PointLights(location=torch.tensor([0.0, 1.0, -2.0], device=device)[None],device=device)
|
||||
|
||||
|
||||
# First we will try to render three models by their model ids:
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
images_by_model_ids = shapenet_dataset.render(
|
||||
model_ids=[
|
||||
"13394ca47c89f91525a3aaf903a41c90",
|
||||
"14755c2ee8e693aba508f621166382b0",
|
||||
"156c4207af6d2c8f1fdc97905708b8ea",
|
||||
],
|
||||
device=device,
|
||||
cameras=cameras,
|
||||
raster_settings=raster_settings,
|
||||
lights=lights,
|
||||
)
|
||||
image_grid(images_by_model_ids.cpu().numpy(), rows=1, cols=3, rgb=True)
|
||||
|
||||
|
||||
# Suppose we would like to render the first three models in the dataset, we can render models by their indices:
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
images_by_idxs = shapenet_dataset.render(
|
||||
idxs=list(range(3)),
|
||||
device=device,
|
||||
cameras=cameras,
|
||||
raster_settings=raster_settings,
|
||||
lights=lights,
|
||||
)
|
||||
image_grid(images_by_idxs.cpu().numpy(), rows=1, cols=3, rgb=True)
|
||||
|
||||
|
||||
# Alternatively, if we are not interested in any particular models but would like see random models from some specific categories, we can do that by specifying `categories` and `sample_nums`. For example, if we would like to render 2 models from the category "faucet" and 3 models from the category "chair", we can do the following:
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
images_by_categories = shapenet_dataset.render(
|
||||
categories=["faucet", "chair"],
|
||||
sample_nums=[2, 3],
|
||||
device=device,
|
||||
cameras=cameras,
|
||||
raster_settings=raster_settings,
|
||||
lights=lights,
|
||||
)
|
||||
image_grid(images_by_categories.cpu().numpy(), rows=1, cols=5, rgb=True)
|
||||
|
||||
|
||||
# If we are not interested in any particular categories and just would like to render some random models from the whole dataset, we can set the number of models to be rendered in `sample_nums` and not specify any `categories`:
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
random_model_images = shapenet_dataset.render(
|
||||
sample_nums=[3],
|
||||
device=device,
|
||||
cameras=cameras,
|
||||
raster_settings=raster_settings,
|
||||
lights=lights,
|
||||
)
|
||||
image_grid(random_model_images.cpu().numpy(), rows=1, cols=5, rgb=True)
|
||||
|
||||
|
||||
# ## 4. Render R2N2 models with the same orientations as the original renderings in the dataset
|
||||
|
||||
# We can render R2N2 models the same way as we rendered ShapeNetCore models above. In addition, we can also render R2N2 models with the same orientations as the original renderings in the dataset. For this we will use R2N2's customized `render` function and a different type of PyTorch3D camera called `BlenderCamera`.
|
||||
|
||||
# In this example, we will render the seventh model with the same orientations as its second and third views. First we will retrieve R2N2's original renderings to compare with the result.
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
original_rendering = r2n2_dataset[6,[1,2]]["images"]
|
||||
image_grid(original_rendering.numpy(), rows=1, cols=2, rgb=True)
|
||||
|
||||
|
||||
# Next, we will visualize PyTorch3d's renderings:
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
r2n2_oriented_images = r2n2_dataset.render(
|
||||
idxs=[6],
|
||||
view_idxs=[1,2],
|
||||
device=device,
|
||||
raster_settings=raster_settings,
|
||||
lights=lights,
|
||||
)
|
||||
image_grid(r2n2_oriented_images.cpu().numpy(), rows=1, cols=2, rgb=True)
|
||||
|
||||
|
||||
# ## 5. Visualize R2N2 models' voxels
|
||||
|
||||
# R2N2 dataloader also returns models' voxels. We can visualize them by utilizing R2N2's `render_vox_to_mesh` function. This will cubify the voxels to a Meshes object, which will then be rendered.
|
||||
|
||||
# In this example we will visualize the tenth model in the dataset with the same orientation of its second and third views. First we will retrieve R2N2's original renderings to compare with the result.
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
r2n2_model = r2n2_dataset[9,[1,2]]
|
||||
original_rendering = r2n2_model["images"]
|
||||
image_grid(original_rendering.numpy(), rows=1, cols=2, rgb=True)
|
||||
|
||||
|
||||
# Next, we will pass the voxels to `render_vox_to_mesh`:
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
vox_render = render_cubified_voxels(r2n2_model["voxels"], device=device)
|
||||
image_grid(vox_render.cpu().numpy(), rows=1, cols=2, rgb=True)
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -1,4 +1,4 @@
|
||||
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
|
||||
# In[ ]:
|
||||
@@ -40,7 +40,12 @@
|
||||
|
||||
|
||||
get_ipython().system('pip install torch torchvision')
|
||||
get_ipython().system("pip install 'git+https://github.com/facebookresearch/pytorch3d.git@stable'")
|
||||
import sys
|
||||
import torch
|
||||
if torch.__version__=='1.6.0+cu101' and sys.platform.startswith('linux'):
|
||||
get_ipython().system('pip install pytorch3d')
|
||||
else:
|
||||
get_ipython().system("pip install 'git+https://github.com/facebookresearch/pytorch3d.git@stable'")
|
||||
|
||||
|
||||
# In[ ]:
|
||||
@@ -59,7 +64,7 @@ from pytorch3d.loss import (
|
||||
mesh_normal_consistency,
|
||||
)
|
||||
import numpy as np
|
||||
from tqdm import tqdm_notebook
|
||||
from tqdm.notebook import tqdm
|
||||
get_ipython().run_line_magic('matplotlib', 'notebook')
|
||||
from mpl_toolkits.mplot3d import Axes3D
|
||||
import matplotlib.pyplot as plt
|
||||
@@ -68,14 +73,18 @@ mpl.rcParams['savefig.dpi'] = 80
|
||||
mpl.rcParams['figure.dpi'] = 80
|
||||
|
||||
# Set the device
|
||||
device = torch.device("cuda:0")
|
||||
if torch.cuda.is_available():
|
||||
device = torch.device("cuda:0")
|
||||
else:
|
||||
device = torch.device("cpu")
|
||||
print("WARNING: CPU only, this will be slow!")
|
||||
|
||||
|
||||
# ## 1. Load an obj file and create a Meshes object
|
||||
|
||||
# Download the target 3D model of a dolphin. It will be saved locally as a file called `dolphin.obj`.
|
||||
|
||||
# In[1]:
|
||||
# In[ ]:
|
||||
|
||||
|
||||
get_ipython().system('wget https://dl.fbaipublicfiles.com/pytorch3d/data/dolphin/dolphin.obj')
|
||||
@@ -139,7 +148,7 @@ def plot_pointcloud(mesh, title=""):
|
||||
plt.show()
|
||||
|
||||
|
||||
# In[75]:
|
||||
# In[ ]:
|
||||
|
||||
|
||||
# %matplotlib notebook
|
||||
@@ -164,7 +173,7 @@ deform_verts = torch.full(src_mesh.verts_packed().shape, 0.0, device=device, req
|
||||
optimizer = torch.optim.SGD([deform_verts], lr=1.0, momentum=0.9)
|
||||
|
||||
|
||||
# In[78]:
|
||||
# In[ ]:
|
||||
|
||||
|
||||
# Number of optimization steps
|
||||
@@ -179,7 +188,7 @@ w_normal = 0.01
|
||||
w_laplacian = 0.1
|
||||
# Plot period for the losses
|
||||
plot_period = 250
|
||||
loop = tqdm_notebook(range(Niter))
|
||||
loop = tqdm(range(Niter))
|
||||
|
||||
chamfer_losses = []
|
||||
laplacian_losses = []
|
||||
@@ -234,7 +243,7 @@ for i in loop:
|
||||
|
||||
# ## 4. Visualize the loss
|
||||
|
||||
# In[79]:
|
||||
# In[ ]:
|
||||
|
||||
|
||||
fig = plt.figure(figsize=(13, 5))
|
||||
@@ -246,7 +255,7 @@ ax.plot(laplacian_losses, label="laplacian loss")
|
||||
ax.legend(fontsize="16")
|
||||
ax.set_xlabel("Iteration", fontsize="16")
|
||||
ax.set_ylabel("Loss", fontsize="16")
|
||||
ax.set_title("Loss vs iterations", fontsize="16")
|
||||
ax.set_title("Loss vs iterations", fontsize="16");
|
||||
|
||||
|
||||
# ## 5. Save the predicted mesh
|
||||
|
||||
940
files/fit_textured_mesh.ipynb
Normal file
940
files/fit_textured_mesh.ipynb
Normal file
@@ -0,0 +1,940 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"colab": {},
|
||||
"colab_type": "code",
|
||||
"id": "_Ip8kp4TfBLZ"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"colab_type": "text",
|
||||
"id": "kuXHJv44fBLe"
|
||||
},
|
||||
"source": [
|
||||
"# Fit a mesh via rendering\n",
|
||||
"\n",
|
||||
"This tutorial shows how to:\n",
|
||||
"- Load a mesh and textures from an `.obj` file. \n",
|
||||
"- Create a synthetic dataset by rendering a textured mesh from multiple viewpoints\n",
|
||||
"- Fit a mesh to the observed synthetic images using differential silhouette rendering\n",
|
||||
"- Fit a mesh and its textures using differential textured rendering"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"colab_type": "text",
|
||||
"id": "Bnj3THhzfBLf"
|
||||
},
|
||||
"source": [
|
||||
"## 0. Install and Import modules"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"colab_type": "text",
|
||||
"id": "okLalbR_g7NS"
|
||||
},
|
||||
"source": [
|
||||
"If `torch`, `torchvision` and `pytorch3d` are not installed, run the following cell:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"colab": {},
|
||||
"colab_type": "code",
|
||||
"id": "musUWTglgxSB"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install torch torchvision\n",
|
||||
"import sys\n",
|
||||
"import torch\n",
|
||||
"if torch.__version__=='1.6.0+cu101' and sys.platform.startswith('linux'):\n",
|
||||
" !pip install pytorch3d\n",
|
||||
"else:\n",
|
||||
" !pip install 'git+https://github.com/facebookresearch/pytorch3d.git@stable'"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"colab": {},
|
||||
"colab_type": "code",
|
||||
"id": "nX99zdoffBLg"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import torch\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"from skimage.io import imread\n",
|
||||
"\n",
|
||||
"from pytorch3d.utils import ico_sphere\n",
|
||||
"import numpy as np\n",
|
||||
"from tqdm.notebook import tqdm\n",
|
||||
"\n",
|
||||
"# Util function for loading meshes\n",
|
||||
"from pytorch3d.io import load_objs_as_meshes, save_obj\n",
|
||||
"\n",
|
||||
"from pytorch3d.loss import (\n",
|
||||
" chamfer_distance, \n",
|
||||
" mesh_edge_loss, \n",
|
||||
" mesh_laplacian_smoothing, \n",
|
||||
" mesh_normal_consistency,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Data structures and functions for rendering\n",
|
||||
"from pytorch3d.structures import Meshes\n",
|
||||
"from pytorch3d.renderer import (\n",
|
||||
" look_at_view_transform,\n",
|
||||
" OpenGLPerspectiveCameras, \n",
|
||||
" PointLights, \n",
|
||||
" DirectionalLights, \n",
|
||||
" Materials, \n",
|
||||
" RasterizationSettings, \n",
|
||||
" MeshRenderer, \n",
|
||||
" MeshRasterizer, \n",
|
||||
" SoftPhongShader,\n",
|
||||
" SoftSilhouetteShader,\n",
|
||||
" SoftPhongShader,\n",
|
||||
" TexturesVertex\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# add path for demo utils functions \n",
|
||||
"import sys\n",
|
||||
"import os\n",
|
||||
"sys.path.append(os.path.abspath(''))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"colab_type": "text",
|
||||
"id": "Lxmehq6Zhrzv"
|
||||
},
|
||||
"source": [
|
||||
"If using **Google Colab**, fetch the utils file for plotting image grids:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"colab": {},
|
||||
"colab_type": "code",
|
||||
"id": "HZozr3Pmho-5"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!wget https://raw.githubusercontent.com/facebookresearch/pytorch3d/master/docs/tutorials/utils/plot_image_grid.py\n",
|
||||
"from plot_image_grid import image_grid"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"colab_type": "text",
|
||||
"id": "g4B62MzYiJUM"
|
||||
},
|
||||
"source": [
|
||||
"OR if running **locally** uncomment and run the following cell:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"colab": {},
|
||||
"colab_type": "code",
|
||||
"id": "paJ4Im8ahl7O"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# from utils.plot_image_grid import image_grid"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"colab_type": "text",
|
||||
"collapsed": true,
|
||||
"id": "5jGq772XfBLk"
|
||||
},
|
||||
"source": [
|
||||
"### 1. Load a mesh and texture file\n",
|
||||
"\n",
|
||||
"Load an `.obj` file and it's associated `.mtl` file and create a **Textures** and **Meshes** object. \n",
|
||||
"\n",
|
||||
"**Meshes** is a unique datastructure provided in PyTorch3D for working with batches of meshes of different sizes. \n",
|
||||
"\n",
|
||||
"**TexturesVertex** is an auxillary datastructure for storing vertex rgb texture information about meshes. \n",
|
||||
"\n",
|
||||
"**Meshes** has several class methods which are used throughout the rendering pipeline."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"colab_type": "text",
|
||||
"id": "a8eU4zo5jd_H"
|
||||
},
|
||||
"source": [
|
||||
"If running this notebook using **Google Colab**, run the following cell to fetch the mesh obj and texture files and save it at the path `data/cow_mesh`:\n",
|
||||
"If running locally, the data is already available at the correct path. "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"colab": {},
|
||||
"colab_type": "code",
|
||||
"id": "tTm0cVuOjb1W"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!mkdir -p data/cow_mesh\n",
|
||||
"!wget -P data/cow_mesh https://dl.fbaipublicfiles.com/pytorch3d/data/cow_mesh/cow.obj\n",
|
||||
"!wget -P data/cow_mesh https://dl.fbaipublicfiles.com/pytorch3d/data/cow_mesh/cow.mtl\n",
|
||||
"!wget -P data/cow_mesh https://dl.fbaipublicfiles.com/pytorch3d/data/cow_mesh/cow_texture.png"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"colab": {},
|
||||
"colab_type": "code",
|
||||
"id": "gi5Kd0GafBLl"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Setup\n",
|
||||
"if torch.cuda.is_available():\n",
|
||||
" device = torch.device(\"cuda:0\")\n",
|
||||
" torch.cuda.set_device(device)\n",
|
||||
"else:\n",
|
||||
" device = torch.device(\"cpu\")\n",
|
||||
"\n",
|
||||
"# Set paths\n",
|
||||
"DATA_DIR = \"./data\"\n",
|
||||
"obj_filename = os.path.join(DATA_DIR, \"cow_mesh/cow.obj\")\n",
|
||||
"\n",
|
||||
"# Load obj file\n",
|
||||
"mesh = load_objs_as_meshes([obj_filename], device=device)\n",
|
||||
"\n",
|
||||
"# We scale normalize and center the target mesh to fit in a sphere of radius 1 \n",
|
||||
"# centered at (0,0,0). (scale, center) will be used to bring the predicted mesh \n",
|
||||
"# to its original center and scale. Note that normalizing the target mesh, \n",
|
||||
"# speeds up the optimization but is not necessary!\n",
|
||||
"verts = mesh.verts_packed()\n",
|
||||
"N = verts.shape[0]\n",
|
||||
"center = verts.mean(0)\n",
|
||||
"scale = max((verts - center).abs().max(0)[0])\n",
|
||||
"mesh.offset_verts_(-center.expand(N, 3))\n",
|
||||
"mesh.scale_verts_((1.0 / float(scale)));"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"colab_type": "text",
|
||||
"id": "17c4xmtyfBMH"
|
||||
},
|
||||
"source": [
|
||||
"## 2. Dataset Creation\n",
|
||||
"\n",
|
||||
"We sample different camera positions that encode multiple viewpoints of the cow. We create a renderer with a shader that performs texture map interpolation. We render a synthetic dataset of images of the textured cow mesh from multiple viewpoints.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"colab": {},
|
||||
"colab_type": "code",
|
||||
"id": "CDQKebNNfBMI"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# the number of different viewpoints from which we want to render the mesh.\n",
|
||||
"num_views = 20\n",
|
||||
"\n",
|
||||
"# Get a batch of viewing angles. \n",
|
||||
"elev = torch.linspace(0, 360, num_views)\n",
|
||||
"azim = torch.linspace(-180, 180, num_views)\n",
|
||||
"\n",
|
||||
"# Place a point light in front of the object. As mentioned above, the front of \n",
|
||||
"# the cow is facing the -z direction. \n",
|
||||
"lights = PointLights(device=device, location=[[0.0, 0.0, -3.0]])\n",
|
||||
"\n",
|
||||
"# Initialize an OpenGL perspective camera that represents a batch of different \n",
|
||||
"# viewing angles. All the cameras helper methods support mixed type inputs and \n",
|
||||
"# broadcasting. So we can view the camera from the a distance of dist=2.7, and \n",
|
||||
"# then specify elevation and azimuth angles for each viewpoint as tensors. \n",
|
||||
"R, T = look_at_view_transform(dist=2.7, elev=elev, azim=azim)\n",
|
||||
"cameras = OpenGLPerspectiveCameras(device=device, R=R, T=T)\n",
|
||||
"\n",
|
||||
"# We arbitrarily choose one particular view that will be used to visualize \n",
|
||||
"# results\n",
|
||||
"camera = OpenGLPerspectiveCameras(device=device, R=R[None, 1, ...], \n",
|
||||
" T=T[None, 1, ...]) \n",
|
||||
"\n",
|
||||
"# Define the settings for rasterization and shading. Here we set the output \n",
|
||||
"# image to be of size 128X128. As we are rendering images for visualization \n",
|
||||
"# purposes only we will set faces_per_pixel=1 and blur_radius=0.0. Refer to \n",
|
||||
"# rasterize_meshes.py for explanations of these parameters. We also leave \n",
|
||||
"# bin_size and max_faces_per_bin to their default values of None, which sets \n",
|
||||
"# their values using huristics and ensures that the faster coarse-to-fine \n",
|
||||
"# rasterization method is used. Refer to docs/notes/renderer.md for an \n",
|
||||
"# explanation of the difference between naive and coarse-to-fine rasterization. \n",
|
||||
"raster_settings = RasterizationSettings(\n",
|
||||
" image_size=128, \n",
|
||||
" blur_radius=0.0, \n",
|
||||
" faces_per_pixel=1, \n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Create a phong renderer by composing a rasterizer and a shader. The textured \n",
|
||||
"# phong shader will interpolate the texture uv coordinates for each vertex, \n",
|
||||
"# sample from a texture image and apply the Phong lighting model\n",
|
||||
"renderer = MeshRenderer(\n",
|
||||
" rasterizer=MeshRasterizer(\n",
|
||||
" cameras=camera, \n",
|
||||
" raster_settings=raster_settings\n",
|
||||
" ),\n",
|
||||
" shader=SoftPhongShader(\n",
|
||||
" device=device, \n",
|
||||
" cameras=camera,\n",
|
||||
" lights=lights\n",
|
||||
" )\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Create a batch of meshes by repeating the cow mesh and associated textures. \n",
|
||||
"# Meshes has a useful `extend` method which allows us do this very easily. \n",
|
||||
"# This also extends the textures. \n",
|
||||
"meshes = mesh.extend(num_views)\n",
|
||||
"\n",
|
||||
"# Render the cow mesh from each viewing angle\n",
|
||||
"target_images = renderer(meshes, cameras=cameras, lights=lights)\n",
|
||||
"\n",
|
||||
"# Our multi-view cow dataset will be represented by these 2 lists of tensors,\n",
|
||||
"# each of length num_views.\n",
|
||||
"target_rgb = [target_images[i, ..., :3] for i in range(num_views)]\n",
|
||||
"target_cameras = [OpenGLPerspectiveCameras(device=device, R=R[None, i, ...], \n",
|
||||
" T=T[None, i, ...]) for i in range(num_views)]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"colab_type": "text",
|
||||
"id": "TppB4PVmR1Rc"
|
||||
},
|
||||
"source": [
|
||||
"Visualize the dataset:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"colab": {},
|
||||
"colab_type": "code",
|
||||
"id": "HHE0CnbVR1Rd"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# RGB images\n",
|
||||
"image_grid(target_images.cpu().numpy(), rows=4, cols=5, rgb=True)\n",
|
||||
"plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"colab_type": "text",
|
||||
"id": "gOb4rYx65E8z"
|
||||
},
|
||||
"source": [
|
||||
"Later in this tutorial, we will fit a mesh to the rendered RGB images, as well as to just images of just the cow silhouette. For the latter case, we will render a dataset of silhouette images. Most shaders in PyTorch3D will output an alpha channel along with the RGB image as a 4th channel in an RGBA image. The alpha channel encodes the probability that each pixel belongs to the foreground of the object. We contruct a soft silhouette shader to render this alpha channel."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"colab": {},
|
||||
"colab_type": "code",
|
||||
"id": "iP_g-nwX4exM"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Rasterization settings for silhouette rendering \n",
|
||||
"sigma = 1e-4\n",
|
||||
"raster_settings_silhouette = RasterizationSettings(\n",
|
||||
" image_size=128, \n",
|
||||
" blur_radius=np.log(1. / 1e-4 - 1.)*sigma, \n",
|
||||
" faces_per_pixel=50, \n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Silhouette renderer \n",
|
||||
"renderer_silhouette = MeshRenderer(\n",
|
||||
" rasterizer=MeshRasterizer(\n",
|
||||
" cameras=camera, \n",
|
||||
" raster_settings=raster_settings_silhouette\n",
|
||||
" ),\n",
|
||||
" shader=SoftSilhouetteShader()\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Render silhouette images. The 3rd channel of the rendering output is \n",
|
||||
"# the alpha/silhouette channel\n",
|
||||
"silhouette_images = renderer_silhouette(meshes, cameras=cameras, lights=lights)\n",
|
||||
"target_silhouette = [silhouette_images[i, ..., 3] for i in range(num_views)]\n",
|
||||
"\n",
|
||||
"# Visualize silhouette images\n",
|
||||
"image_grid(silhouette_images.cpu().numpy(), rows=4, cols=5, rgb=False)\n",
|
||||
"plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"colab_type": "text",
|
||||
"id": "t3qphI1ElUb5"
|
||||
},
|
||||
"source": [
|
||||
"## 3. Mesh prediction via silhouette rendering\n",
|
||||
"In the previous section, we created a dataset of images of multiple viewpoints of a cow. In this section, we predict a mesh by observing those target images without any knowledge of the ground truth cow mesh. We assume we know the position of the cameras and lighting.\n",
|
||||
"\n",
|
||||
"We first define some helper functions to visualize the results of our mesh prediction:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"colab": {},
|
||||
"colab_type": "code",
|
||||
"id": "eeWYHROrR1Rh"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Show a visualization comparing the rendered predicted mesh to the ground truth \n",
|
||||
"# mesh\n",
|
||||
"def visualize_prediction(predicted_mesh, renderer=renderer_silhouette, \n",
|
||||
" target_image=target_rgb[1], title='', \n",
|
||||
" silhouette=False):\n",
|
||||
" inds = 3 if silhouette else range(3)\n",
|
||||
" predicted_images = renderer(predicted_mesh)\n",
|
||||
" plt.figure(figsize=(20, 10))\n",
|
||||
" plt.subplot(1, 2, 1)\n",
|
||||
" plt.imshow(predicted_images[0, ..., inds].cpu().detach().numpy())\n",
|
||||
"\n",
|
||||
" plt.subplot(1, 2, 2)\n",
|
||||
" plt.imshow(target_image.cpu().detach().numpy())\n",
|
||||
" plt.title(title)\n",
|
||||
" plt.grid(\"off\")\n",
|
||||
" plt.axis(\"off\")\n",
|
||||
"\n",
|
||||
"# Plot losses as a function of optimization iteration\n",
|
||||
"def plot_losses(losses):\n",
|
||||
" fig = plt.figure(figsize=(13, 5))\n",
|
||||
" ax = fig.gca()\n",
|
||||
" for k, l in losses.items():\n",
|
||||
" ax.plot(l['values'], label=k + \" loss\")\n",
|
||||
" ax.legend(fontsize=\"16\")\n",
|
||||
" ax.set_xlabel(\"Iteration\", fontsize=\"16\")\n",
|
||||
" ax.set_ylabel(\"Loss\", fontsize=\"16\")\n",
|
||||
" ax.set_title(\"Loss vs iterations\", fontsize=\"16\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"colab_type": "text",
|
||||
"id": "PpsvBpuMR1Ri"
|
||||
},
|
||||
"source": [
|
||||
"Starting from a sphere mesh, we will learn offsets of each vertex such that the predicted mesh silhouette is more similar to the target silhouette image at each optimization step. We begin by loading our initial sphere mesh:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"colab": {},
|
||||
"colab_type": "code",
|
||||
"id": "i989ARH1R1Rj"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# We initialize the source shape to be a sphere of radius 1. \n",
|
||||
"src_mesh = ico_sphere(4, device)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"colab_type": "text",
|
||||
"id": "f5xVtgLNDvC5"
|
||||
},
|
||||
"source": [
|
||||
"We create a new differentiable renderer for rendering the silhouette of our predicted mesh:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"colab": {},
|
||||
"colab_type": "code",
|
||||
"id": "sXfjzgG4DsDJ"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Rasterization settings for differentiable rendering, where the blur_radius\n",
|
||||
"# initialization is based on Liu et al, 'Soft Rasterizer: A Differentiable \n",
|
||||
"# Renderer for Image-based 3D Reasoning', ICCV 2019\n",
|
||||
"sigma = 1e-4\n",
|
||||
"raster_settings_soft = RasterizationSettings(\n",
|
||||
" image_size=128, \n",
|
||||
" blur_radius=np.log(1. / 1e-4 - 1.)*sigma, \n",
|
||||
" faces_per_pixel=50, \n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Silhouette renderer \n",
|
||||
"renderer_silhouette = MeshRenderer(\n",
|
||||
" rasterizer=MeshRasterizer(\n",
|
||||
" cameras=camera, \n",
|
||||
" raster_settings=raster_settings_soft\n",
|
||||
" ),\n",
|
||||
" shader=SoftSilhouetteShader()\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"colab_type": "text",
|
||||
"id": "SGJKbCB6R1Rk"
|
||||
},
|
||||
"source": [
|
||||
"We initialize settings, losses, and the optimizer that will be used to iteratively fit our mesh to the target silhouettes:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"colab": {},
|
||||
"colab_type": "code",
|
||||
"id": "0sLrKv_MEULh"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Number of views to optimize over in each SGD iteration\n",
|
||||
"num_views_per_iteration = 2\n",
|
||||
"# Number of optimization steps\n",
|
||||
"Niter = 2000\n",
|
||||
"# Plot period for the losses\n",
|
||||
"plot_period = 250\n",
|
||||
"\n",
|
||||
"%matplotlib inline\n",
|
||||
"\n",
|
||||
"# Optimize using rendered silhouette image loss, mesh edge loss, mesh normal \n",
|
||||
"# consistency, and mesh laplacian smoothing\n",
|
||||
"losses = {\"silhouette\": {\"weight\": 1.0, \"values\": []},\n",
|
||||
" \"edge\": {\"weight\": 1.0, \"values\": []},\n",
|
||||
" \"normal\": {\"weight\": 0.01, \"values\": []},\n",
|
||||
" \"laplacian\": {\"weight\": 1.0, \"values\": []},\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
"# Losses to smooth / regularize the mesh shape\n",
|
||||
"def update_mesh_shape_prior_losses(mesh, loss):\n",
|
||||
" # and (b) the edge length of the predicted mesh\n",
|
||||
" loss[\"edge\"] = mesh_edge_loss(mesh)\n",
|
||||
" \n",
|
||||
" # mesh normal consistency\n",
|
||||
" loss[\"normal\"] = mesh_normal_consistency(mesh)\n",
|
||||
" \n",
|
||||
" # mesh laplacian smoothing\n",
|
||||
" loss[\"laplacian\"] = mesh_laplacian_smoothing(mesh, method=\"uniform\")\n",
|
||||
"\n",
|
||||
"# We will learn to deform the source mesh by offsetting its vertices\n",
|
||||
"# The shape of the deform parameters is equal to the total number of vertices in\n",
|
||||
"# src_mesh\n",
|
||||
"verts_shape = src_mesh.verts_packed().shape\n",
|
||||
"deform_verts = torch.full(verts_shape, 0.0, device=device, requires_grad=True)\n",
|
||||
"\n",
|
||||
"# The optimizer\n",
|
||||
"optimizer = torch.optim.SGD([deform_verts], lr=1.0, momentum=0.9)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"colab_type": "text",
|
||||
"id": "QLc9zK8lEqFS"
|
||||
},
|
||||
"source": [
|
||||
"We write an optimization loop to iteratively refine our predicted mesh from the sphere mesh into a mesh that matches the sillhouettes of the target images:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"colab": {},
|
||||
"colab_type": "code",
|
||||
"id": "gCfepfOoR1Rl"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loop = tqdm(range(Niter))\n",
|
||||
"\n",
|
||||
"for i in loop:\n",
|
||||
" # Initialize optimizer\n",
|
||||
" optimizer.zero_grad()\n",
|
||||
" \n",
|
||||
" # Deform the mesh\n",
|
||||
" new_src_mesh = src_mesh.offset_verts(deform_verts)\n",
|
||||
" \n",
|
||||
" # Losses to smooth /regularize the mesh shape\n",
|
||||
" loss = {k: torch.tensor(0.0, device=device) for k in losses}\n",
|
||||
" update_mesh_shape_prior_losses(new_src_mesh, loss)\n",
|
||||
" \n",
|
||||
" # Compute the average silhouette loss over two random views, as the average \n",
|
||||
" # squared L2 distance between the predicted silhouette and the target \n",
|
||||
" # silhouette from our dataset\n",
|
||||
" for j in np.random.permutation(num_views).tolist()[:num_views_per_iteration]:\n",
|
||||
" images_predicted = renderer_silhouette(new_src_mesh, cameras=target_cameras[j], lights=lights)\n",
|
||||
" predicted_silhouette = images_predicted[..., 3]\n",
|
||||
" loss_silhouette = ((predicted_silhouette - target_silhouette[j]) ** 2).mean()\n",
|
||||
" loss[\"silhouette\"] += loss_silhouette / num_views_per_iteration\n",
|
||||
" \n",
|
||||
" # Weighted sum of the losses\n",
|
||||
" sum_loss = torch.tensor(0.0, device=device)\n",
|
||||
" for k, l in loss.items():\n",
|
||||
" sum_loss += l * losses[k][\"weight\"]\n",
|
||||
" losses[k][\"values\"].append(l)\n",
|
||||
" \n",
|
||||
" # Print the losses\n",
|
||||
" loop.set_description(\"total_loss = %.6f\" % sum_loss)\n",
|
||||
" \n",
|
||||
" # Plot mesh\n",
|
||||
" if i % plot_period == 0:\n",
|
||||
" visualize_prediction(new_src_mesh, title=\"iter: %d\" % i, silhouette=True,\n",
|
||||
" target_image=target_silhouette[1])\n",
|
||||
" \n",
|
||||
" # Optimization step\n",
|
||||
" sum_loss.backward()\n",
|
||||
" optimizer.step()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"colab": {},
|
||||
"colab_type": "code",
|
||||
"id": "CX4huayKR1Rm",
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"visualize_prediction(new_src_mesh, silhouette=True, \n",
|
||||
" target_image=target_silhouette[1])\n",
|
||||
"plot_losses(losses)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"colab_type": "text",
|
||||
"id": "XJDsJQmrR1Ro"
|
||||
},
|
||||
"source": [
|
||||
"## 3. Mesh and texture prediction via textured rendering\n",
|
||||
"We can predict both the mesh and its texture if we add an additional loss based on the comparing a predicted rendered RGB image to the target image. As before, we start with a sphere mesh. We learn both translational offsets and RGB texture colors for each vertex in the sphere mesh. Since our loss is based on rendered RGB pixel values instead of just the silhouette, we use a **SoftPhongShader** instead of a **SoftSilhouetteShader**."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"colab": {},
|
||||
"colab_type": "code",
|
||||
"id": "aZObyIt9R1Ro"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Rasterization settings for differentiable rendering, where the blur_radius\n",
|
||||
"# initialization is based on Liu et al, 'Soft Rasterizer: A Differentiable \n",
|
||||
"# Renderer for Image-based 3D Reasoning', ICCV 2019\n",
|
||||
"sigma = 1e-4\n",
|
||||
"raster_settings_soft = RasterizationSettings(\n",
|
||||
" image_size=128, \n",
|
||||
" blur_radius=np.log(1. / 1e-4 - 1.)*sigma, \n",
|
||||
" faces_per_pixel=50, \n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Differentiable soft renderer using per vertex RGB colors for texture\n",
|
||||
"renderer_textured = MeshRenderer(\n",
|
||||
" rasterizer=MeshRasterizer(\n",
|
||||
" cameras=camera, \n",
|
||||
" raster_settings=raster_settings_soft\n",
|
||||
" ),\n",
|
||||
" shader=SoftPhongShader(device=device, \n",
|
||||
" cameras=camera,\n",
|
||||
" lights=lights)\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"colab_type": "text",
|
||||
"id": "NM7gJux8GMQX"
|
||||
},
|
||||
"source": [
|
||||
"We initialize settings, losses, and the optimizer that will be used to iteratively fit our mesh to the target RGB images:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"colab": {},
|
||||
"colab_type": "code",
|
||||
"id": "BS6LAQquF3wq"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Number of views to optimize over in each SGD iteration\n",
|
||||
"num_views_per_iteration = 2\n",
|
||||
"# Number of optimization steps\n",
|
||||
"Niter = 2000\n",
|
||||
"# Plot period for the losses\n",
|
||||
"plot_period = 250\n",
|
||||
"\n",
|
||||
"%matplotlib inline\n",
|
||||
"\n",
|
||||
"# Optimize using rendered RGB image loss, rendered silhouette image loss, mesh \n",
|
||||
"# edge loss, mesh normal consistency, and mesh laplacian smoothing\n",
|
||||
"losses = {\"rgb\": {\"weight\": 1.0, \"values\": []},\n",
|
||||
" \"silhouette\": {\"weight\": 1.0, \"values\": []},\n",
|
||||
" \"edge\": {\"weight\": 1.0, \"values\": []},\n",
|
||||
" \"normal\": {\"weight\": 0.01, \"values\": []},\n",
|
||||
" \"laplacian\": {\"weight\": 1.0, \"values\": []},\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
"# We will learn to deform the source mesh by offsetting its vertices\n",
|
||||
"# The shape of the deform parameters is equal to the total number of vertices in \n",
|
||||
"# src_mesh\n",
|
||||
"verts_shape = src_mesh.verts_packed().shape\n",
|
||||
"deform_verts = torch.full(verts_shape, 0.0, device=device, requires_grad=True)\n",
|
||||
"\n",
|
||||
"# We will also learn per vertex colors for our sphere mesh that define texture \n",
|
||||
"# of the mesh\n",
|
||||
"sphere_verts_rgb = torch.full([1, verts_shape[0], 3], 0.5, device=device, requires_grad=True)\n",
|
||||
"\n",
|
||||
"# The optimizer\n",
|
||||
"optimizer = torch.optim.SGD([deform_verts, sphere_verts_rgb], lr=1.0, momentum=0.9)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"colab_type": "text",
|
||||
"id": "tzIAycuUR1Rq"
|
||||
},
|
||||
"source": [
|
||||
"We write an optimization loop to iteratively refine our predicted mesh and its vertex colors from the sphere mesh into a mesh that matches the target images:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"colab": {},
|
||||
"colab_type": "code",
|
||||
"id": "EKEH2p8-R1Rr"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loop = tqdm(range(Niter))\n",
|
||||
"\n",
|
||||
"for i in loop:\n",
|
||||
" # Initialize optimizer\n",
|
||||
" optimizer.zero_grad()\n",
|
||||
" \n",
|
||||
" # Deform the mesh\n",
|
||||
" new_src_mesh = src_mesh.offset_verts(deform_verts)\n",
|
||||
" \n",
|
||||
" # Add per vertex colors to texture the mesh\n",
|
||||
" new_src_mesh.textures = TexturesVertex(verts_features=sphere_verts_rgb) \n",
|
||||
" \n",
|
||||
" # Losses to smooth /regularize the mesh shape\n",
|
||||
" loss = {k: torch.tensor(0.0, device=device) for k in losses}\n",
|
||||
" update_mesh_shape_prior_losses(new_src_mesh, loss)\n",
|
||||
" \n",
|
||||
" # Randomly select two views to optimize over in this iteration. Compared\n",
|
||||
" # to using just one view, this helps resolve ambiguities between updating\n",
|
||||
" # mesh shape vs. updating mesh texture\n",
|
||||
" for j in np.random.permutation(num_views).tolist()[:num_views_per_iteration]:\n",
|
||||
" images_predicted = renderer_textured(new_src_mesh, cameras=target_cameras[j], lights=lights)\n",
|
||||
"\n",
|
||||
" # Squared L2 distance between the predicted silhouette and the target \n",
|
||||
" # silhouette from our dataset\n",
|
||||
" predicted_silhouette = images_predicted[..., 3]\n",
|
||||
" loss_silhouette = ((predicted_silhouette - target_silhouette[j]) ** 2).mean()\n",
|
||||
" loss[\"silhouette\"] += loss_silhouette / num_views_per_iteration\n",
|
||||
" \n",
|
||||
" # Squared L2 distance between the predicted RGB image and the target \n",
|
||||
" # image from our dataset\n",
|
||||
" predicted_rgb = images_predicted[..., :3]\n",
|
||||
" loss_rgb = ((predicted_rgb - target_rgb[j]) ** 2).mean()\n",
|
||||
" loss[\"rgb\"] += loss_rgb / num_views_per_iteration\n",
|
||||
" \n",
|
||||
" # Weighted sum of the losses\n",
|
||||
" sum_loss = torch.tensor(0.0, device=device)\n",
|
||||
" for k, l in loss.items():\n",
|
||||
" sum_loss += l * losses[k][\"weight\"]\n",
|
||||
" losses[k][\"values\"].append(l)\n",
|
||||
" \n",
|
||||
" # Print the losses\n",
|
||||
" loop.set_description(\"total_loss = %.6f\" % sum_loss)\n",
|
||||
" \n",
|
||||
" # Plot mesh\n",
|
||||
" if i % plot_period == 0:\n",
|
||||
" visualize_prediction(new_src_mesh, renderer=renderer_textured, title=\"iter: %d\" % i, silhouette=False)\n",
|
||||
" \n",
|
||||
" # Optimization step\n",
|
||||
" sum_loss.backward()\n",
|
||||
" optimizer.step()\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"colab": {},
|
||||
"colab_type": "code",
|
||||
"id": "2qTcHO4rR1Rs",
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"visualize_prediction(new_src_mesh, renderer=renderer_textured, silhouette=False)\n",
|
||||
"plot_losses(losses)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"colab_type": "text",
|
||||
"id": "akBOm_xcNUms"
|
||||
},
|
||||
"source": [
|
||||
"Save the final predicted mesh:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"colab_type": "text",
|
||||
"id": "dXoIsGyhxRyK"
|
||||
},
|
||||
"source": [
|
||||
"## 4. Save the final predicted mesh"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"colab": {},
|
||||
"colab_type": "code",
|
||||
"id": "OQGhV-psKna8"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Fetch the verts and faces of the final predicted mesh\n",
|
||||
"final_verts, final_faces = new_src_mesh.get_mesh_verts_faces(0)\n",
|
||||
"\n",
|
||||
"# Scale normalize back to the original target size\n",
|
||||
"final_verts = final_verts * scale + center\n",
|
||||
"\n",
|
||||
"# Store the predicted mesh using save_obj\n",
|
||||
"final_obj = os.path.join('./', 'final_model.obj')\n",
|
||||
"save_obj(final_obj, final_verts, final_faces)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"colab_type": "text",
|
||||
"id": "MtKYp0B6R1Ru"
|
||||
},
|
||||
"source": [
|
||||
"## 5. Conclusion\n",
|
||||
"In this tutorial, we learned how to load a textured mesh from an obj file, create a synthetic dataset by rendering the mesh from multiple viewpoints. We showed how to set up an optimization loop to fit a mesh to the observed dataset images based on a rendered silhouette loss. We then augmented this optimization loop with an additional loss based on rendered RGB images, which allowed us to predict both a mesh and its texture."
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"accelerator": "GPU",
|
||||
"anp_metadata": {
|
||||
"path": "fbsource/fbcode/vision/fair/pytorch3d/docs/tutorials/fit_textured_mesh.ipynb"
|
||||
},
|
||||
"bento_stylesheets": {
|
||||
"bento/extensions/flow/main.css": true,
|
||||
"bento/extensions/kernel_selector/main.css": true,
|
||||
"bento/extensions/kernel_ui/main.css": true,
|
||||
"bento/extensions/new_kernel/main.css": true,
|
||||
"bento/extensions/system_usage/main.css": true,
|
||||
"bento/extensions/theme/main.css": true
|
||||
},
|
||||
"colab": {
|
||||
"name": "fit_textured_mesh.ipynb",
|
||||
"provenance": [],
|
||||
"toc_visible": true
|
||||
},
|
||||
"disseminate_notebook_info": {
|
||||
"backup_notebook_id": "781874812352022"
|
||||
},
|
||||
"kernelspec": {
|
||||
"display_name": "intro_to_cv",
|
||||
"language": "python",
|
||||
"name": "bento_kernel_intro_to_cv"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.7.5+"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 1
|
||||
}
|
||||
580
files/fit_textured_mesh.py
Normal file
580
files/fit_textured_mesh.py
Normal file
@@ -0,0 +1,580 @@
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.
|
||||
|
||||
|
||||
# # Fit a mesh via rendering
|
||||
#
|
||||
# This tutorial shows how to:
|
||||
# - Load a mesh and textures from an `.obj` file.
|
||||
# - Create a synthetic dataset by rendering a textured mesh from multiple viewpoints
|
||||
# - Fit a mesh to the observed synthetic images using differential silhouette rendering
|
||||
# - Fit a mesh and its textures using differential textured rendering
|
||||
|
||||
# ## 0. Install and Import modules
|
||||
|
||||
# If `torch`, `torchvision` and `pytorch3d` are not installed, run the following cell:
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
get_ipython().system('pip install torch torchvision')
|
||||
import sys
|
||||
import torch
|
||||
if torch.__version__=='1.6.0+cu101' and sys.platform.startswith('linux'):
|
||||
get_ipython().system('pip install pytorch3d')
|
||||
else:
|
||||
get_ipython().system("pip install 'git+https://github.com/facebookresearch/pytorch3d.git@stable'")
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
import os
|
||||
import torch
|
||||
import matplotlib.pyplot as plt
|
||||
from skimage.io import imread
|
||||
|
||||
from pytorch3d.utils import ico_sphere
|
||||
import numpy as np
|
||||
from tqdm.notebook import tqdm
|
||||
|
||||
# Util function for loading meshes
|
||||
from pytorch3d.io import load_objs_as_meshes, save_obj
|
||||
|
||||
from pytorch3d.loss import (
|
||||
chamfer_distance,
|
||||
mesh_edge_loss,
|
||||
mesh_laplacian_smoothing,
|
||||
mesh_normal_consistency,
|
||||
)
|
||||
|
||||
# Data structures and functions for rendering
|
||||
from pytorch3d.structures import Meshes
|
||||
from pytorch3d.renderer import (
|
||||
look_at_view_transform,
|
||||
OpenGLPerspectiveCameras,
|
||||
PointLights,
|
||||
DirectionalLights,
|
||||
Materials,
|
||||
RasterizationSettings,
|
||||
MeshRenderer,
|
||||
MeshRasterizer,
|
||||
SoftPhongShader,
|
||||
SoftSilhouetteShader,
|
||||
SoftPhongShader,
|
||||
TexturesVertex
|
||||
)
|
||||
|
||||
# add path for demo utils functions
|
||||
import sys
|
||||
import os
|
||||
sys.path.append(os.path.abspath(''))
|
||||
|
||||
|
||||
# If using **Google Colab**, fetch the utils file for plotting image grids:
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
get_ipython().system('wget https://raw.githubusercontent.com/facebookresearch/pytorch3d/master/docs/tutorials/utils/plot_image_grid.py')
|
||||
from plot_image_grid import image_grid
|
||||
|
||||
|
||||
# OR if running **locally** uncomment and run the following cell:
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
# from utils.plot_image_grid import image_grid
|
||||
|
||||
|
||||
# ### 1. Load a mesh and texture file
|
||||
#
|
||||
# Load an `.obj` file and it's associated `.mtl` file and create a **Textures** and **Meshes** object.
|
||||
#
|
||||
# **Meshes** is a unique datastructure provided in PyTorch3D for working with batches of meshes of different sizes.
|
||||
#
|
||||
# **TexturesVertex** is an auxillary datastructure for storing vertex rgb texture information about meshes.
|
||||
#
|
||||
# **Meshes** has several class methods which are used throughout the rendering pipeline.
|
||||
|
||||
# If running this notebook using **Google Colab**, run the following cell to fetch the mesh obj and texture files and save it at the path `data/cow_mesh`:
|
||||
# If running locally, the data is already available at the correct path.
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
get_ipython().system('mkdir -p data/cow_mesh')
|
||||
get_ipython().system('wget -P data/cow_mesh https://dl.fbaipublicfiles.com/pytorch3d/data/cow_mesh/cow.obj')
|
||||
get_ipython().system('wget -P data/cow_mesh https://dl.fbaipublicfiles.com/pytorch3d/data/cow_mesh/cow.mtl')
|
||||
get_ipython().system('wget -P data/cow_mesh https://dl.fbaipublicfiles.com/pytorch3d/data/cow_mesh/cow_texture.png')
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
# Setup
|
||||
if torch.cuda.is_available():
|
||||
device = torch.device("cuda:0")
|
||||
torch.cuda.set_device(device)
|
||||
else:
|
||||
device = torch.device("cpu")
|
||||
|
||||
# Set paths
|
||||
DATA_DIR = "./data"
|
||||
obj_filename = os.path.join(DATA_DIR, "cow_mesh/cow.obj")
|
||||
|
||||
# Load obj file
|
||||
mesh = load_objs_as_meshes([obj_filename], device=device)
|
||||
|
||||
# We scale normalize and center the target mesh to fit in a sphere of radius 1
|
||||
# centered at (0,0,0). (scale, center) will be used to bring the predicted mesh
|
||||
# to its original center and scale. Note that normalizing the target mesh,
|
||||
# speeds up the optimization but is not necessary!
|
||||
verts = mesh.verts_packed()
|
||||
N = verts.shape[0]
|
||||
center = verts.mean(0)
|
||||
scale = max((verts - center).abs().max(0)[0])
|
||||
mesh.offset_verts_(-center.expand(N, 3))
|
||||
mesh.scale_verts_((1.0 / float(scale)));
|
||||
|
||||
|
||||
# ## 2. Dataset Creation
|
||||
#
|
||||
# We sample different camera positions that encode multiple viewpoints of the cow. We create a renderer with a shader that performs texture map interpolation. We render a synthetic dataset of images of the textured cow mesh from multiple viewpoints.
|
||||
#
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
# the number of different viewpoints from which we want to render the mesh.
|
||||
num_views = 20
|
||||
|
||||
# Get a batch of viewing angles.
|
||||
elev = torch.linspace(0, 360, num_views)
|
||||
azim = torch.linspace(-180, 180, num_views)
|
||||
|
||||
# Place a point light in front of the object. As mentioned above, the front of
|
||||
# the cow is facing the -z direction.
|
||||
lights = PointLights(device=device, location=[[0.0, 0.0, -3.0]])
|
||||
|
||||
# Initialize an OpenGL perspective camera that represents a batch of different
|
||||
# viewing angles. All the cameras helper methods support mixed type inputs and
|
||||
# broadcasting. So we can view the camera from the a distance of dist=2.7, and
|
||||
# then specify elevation and azimuth angles for each viewpoint as tensors.
|
||||
R, T = look_at_view_transform(dist=2.7, elev=elev, azim=azim)
|
||||
cameras = OpenGLPerspectiveCameras(device=device, R=R, T=T)
|
||||
|
||||
# We arbitrarily choose one particular view that will be used to visualize
|
||||
# results
|
||||
camera = OpenGLPerspectiveCameras(device=device, R=R[None, 1, ...],
|
||||
T=T[None, 1, ...])
|
||||
|
||||
# Define the settings for rasterization and shading. Here we set the output
|
||||
# image to be of size 128X128. As we are rendering images for visualization
|
||||
# purposes only we will set faces_per_pixel=1 and blur_radius=0.0. Refer to
|
||||
# rasterize_meshes.py for explanations of these parameters. We also leave
|
||||
# bin_size and max_faces_per_bin to their default values of None, which sets
|
||||
# their values using huristics and ensures that the faster coarse-to-fine
|
||||
# rasterization method is used. Refer to docs/notes/renderer.md for an
|
||||
# explanation of the difference between naive and coarse-to-fine rasterization.
|
||||
raster_settings = RasterizationSettings(
|
||||
image_size=128,
|
||||
blur_radius=0.0,
|
||||
faces_per_pixel=1,
|
||||
)
|
||||
|
||||
# Create a phong renderer by composing a rasterizer and a shader. The textured
|
||||
# phong shader will interpolate the texture uv coordinates for each vertex,
|
||||
# sample from a texture image and apply the Phong lighting model
|
||||
renderer = MeshRenderer(
|
||||
rasterizer=MeshRasterizer(
|
||||
cameras=camera,
|
||||
raster_settings=raster_settings
|
||||
),
|
||||
shader=SoftPhongShader(
|
||||
device=device,
|
||||
cameras=camera,
|
||||
lights=lights
|
||||
)
|
||||
)
|
||||
|
||||
# Create a batch of meshes by repeating the cow mesh and associated textures.
|
||||
# Meshes has a useful `extend` method which allows us do this very easily.
|
||||
# This also extends the textures.
|
||||
meshes = mesh.extend(num_views)
|
||||
|
||||
# Render the cow mesh from each viewing angle
|
||||
target_images = renderer(meshes, cameras=cameras, lights=lights)
|
||||
|
||||
# Our multi-view cow dataset will be represented by these 2 lists of tensors,
|
||||
# each of length num_views.
|
||||
target_rgb = [target_images[i, ..., :3] for i in range(num_views)]
|
||||
target_cameras = [OpenGLPerspectiveCameras(device=device, R=R[None, i, ...],
|
||||
T=T[None, i, ...]) for i in range(num_views)]
|
||||
|
||||
|
||||
# Visualize the dataset:
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
# RGB images
|
||||
image_grid(target_images.cpu().numpy(), rows=4, cols=5, rgb=True)
|
||||
plt.show()
|
||||
|
||||
|
||||
# Later in this tutorial, we will fit a mesh to the rendered RGB images, as well as to just images of just the cow silhouette. For the latter case, we will render a dataset of silhouette images. Most shaders in PyTorch3D will output an alpha channel along with the RGB image as a 4th channel in an RGBA image. The alpha channel encodes the probability that each pixel belongs to the foreground of the object. We contruct a soft silhouette shader to render this alpha channel.
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
# Rasterization settings for silhouette rendering
|
||||
sigma = 1e-4
|
||||
raster_settings_silhouette = RasterizationSettings(
|
||||
image_size=128,
|
||||
blur_radius=np.log(1. / 1e-4 - 1.)*sigma,
|
||||
faces_per_pixel=50,
|
||||
)
|
||||
|
||||
# Silhouette renderer
|
||||
renderer_silhouette = MeshRenderer(
|
||||
rasterizer=MeshRasterizer(
|
||||
cameras=camera,
|
||||
raster_settings=raster_settings_silhouette
|
||||
),
|
||||
shader=SoftSilhouetteShader()
|
||||
)
|
||||
|
||||
# Render silhouette images. The 3rd channel of the rendering output is
|
||||
# the alpha/silhouette channel
|
||||
silhouette_images = renderer_silhouette(meshes, cameras=cameras, lights=lights)
|
||||
target_silhouette = [silhouette_images[i, ..., 3] for i in range(num_views)]
|
||||
|
||||
# Visualize silhouette images
|
||||
image_grid(silhouette_images.cpu().numpy(), rows=4, cols=5, rgb=False)
|
||||
plt.show()
|
||||
|
||||
|
||||
# ## 3. Mesh prediction via silhouette rendering
|
||||
# In the previous section, we created a dataset of images of multiple viewpoints of a cow. In this section, we predict a mesh by observing those target images without any knowledge of the ground truth cow mesh. We assume we know the position of the cameras and lighting.
|
||||
#
|
||||
# We first define some helper functions to visualize the results of our mesh prediction:
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
# Show a visualization comparing the rendered predicted mesh to the ground truth
|
||||
# mesh
|
||||
def visualize_prediction(predicted_mesh, renderer=renderer_silhouette,
|
||||
target_image=target_rgb[1], title='',
|
||||
silhouette=False):
|
||||
inds = 3 if silhouette else range(3)
|
||||
predicted_images = renderer(predicted_mesh)
|
||||
plt.figure(figsize=(20, 10))
|
||||
plt.subplot(1, 2, 1)
|
||||
plt.imshow(predicted_images[0, ..., inds].cpu().detach().numpy())
|
||||
|
||||
plt.subplot(1, 2, 2)
|
||||
plt.imshow(target_image.cpu().detach().numpy())
|
||||
plt.title(title)
|
||||
plt.grid("off")
|
||||
plt.axis("off")
|
||||
|
||||
# Plot losses as a function of optimization iteration
|
||||
def plot_losses(losses):
|
||||
fig = plt.figure(figsize=(13, 5))
|
||||
ax = fig.gca()
|
||||
for k, l in losses.items():
|
||||
ax.plot(l['values'], label=k + " loss")
|
||||
ax.legend(fontsize="16")
|
||||
ax.set_xlabel("Iteration", fontsize="16")
|
||||
ax.set_ylabel("Loss", fontsize="16")
|
||||
ax.set_title("Loss vs iterations", fontsize="16")
|
||||
|
||||
|
||||
# Starting from a sphere mesh, we will learn offsets of each vertex such that the predicted mesh silhouette is more similar to the target silhouette image at each optimization step. We begin by loading our initial sphere mesh:
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
# We initialize the source shape to be a sphere of radius 1.
|
||||
src_mesh = ico_sphere(4, device)
|
||||
|
||||
|
||||
# We create a new differentiable renderer for rendering the silhouette of our predicted mesh:
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
# Rasterization settings for differentiable rendering, where the blur_radius
|
||||
# initialization is based on Liu et al, 'Soft Rasterizer: A Differentiable
|
||||
# Renderer for Image-based 3D Reasoning', ICCV 2019
|
||||
sigma = 1e-4
|
||||
raster_settings_soft = RasterizationSettings(
|
||||
image_size=128,
|
||||
blur_radius=np.log(1. / 1e-4 - 1.)*sigma,
|
||||
faces_per_pixel=50,
|
||||
)
|
||||
|
||||
# Silhouette renderer
|
||||
renderer_silhouette = MeshRenderer(
|
||||
rasterizer=MeshRasterizer(
|
||||
cameras=camera,
|
||||
raster_settings=raster_settings_soft
|
||||
),
|
||||
shader=SoftSilhouetteShader()
|
||||
)
|
||||
|
||||
|
||||
# We initialize settings, losses, and the optimizer that will be used to iteratively fit our mesh to the target silhouettes:
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
# Number of views to optimize over in each SGD iteration
|
||||
num_views_per_iteration = 2
|
||||
# Number of optimization steps
|
||||
Niter = 2000
|
||||
# Plot period for the losses
|
||||
plot_period = 250
|
||||
|
||||
get_ipython().run_line_magic('matplotlib', 'inline')
|
||||
|
||||
# Optimize using rendered silhouette image loss, mesh edge loss, mesh normal
|
||||
# consistency, and mesh laplacian smoothing
|
||||
losses = {"silhouette": {"weight": 1.0, "values": []},
|
||||
"edge": {"weight": 1.0, "values": []},
|
||||
"normal": {"weight": 0.01, "values": []},
|
||||
"laplacian": {"weight": 1.0, "values": []},
|
||||
}
|
||||
|
||||
# Losses to smooth / regularize the mesh shape
|
||||
def update_mesh_shape_prior_losses(mesh, loss):
|
||||
# and (b) the edge length of the predicted mesh
|
||||
loss["edge"] = mesh_edge_loss(mesh)
|
||||
|
||||
# mesh normal consistency
|
||||
loss["normal"] = mesh_normal_consistency(mesh)
|
||||
|
||||
# mesh laplacian smoothing
|
||||
loss["laplacian"] = mesh_laplacian_smoothing(mesh, method="uniform")
|
||||
|
||||
# We will learn to deform the source mesh by offsetting its vertices
|
||||
# The shape of the deform parameters is equal to the total number of vertices in
|
||||
# src_mesh
|
||||
verts_shape = src_mesh.verts_packed().shape
|
||||
deform_verts = torch.full(verts_shape, 0.0, device=device, requires_grad=True)
|
||||
|
||||
# The optimizer
|
||||
optimizer = torch.optim.SGD([deform_verts], lr=1.0, momentum=0.9)
|
||||
|
||||
|
||||
# We write an optimization loop to iteratively refine our predicted mesh from the sphere mesh into a mesh that matches the sillhouettes of the target images:
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
loop = tqdm(range(Niter))
|
||||
|
||||
for i in loop:
|
||||
# Initialize optimizer
|
||||
optimizer.zero_grad()
|
||||
|
||||
# Deform the mesh
|
||||
new_src_mesh = src_mesh.offset_verts(deform_verts)
|
||||
|
||||
# Losses to smooth /regularize the mesh shape
|
||||
loss = {k: torch.tensor(0.0, device=device) for k in losses}
|
||||
update_mesh_shape_prior_losses(new_src_mesh, loss)
|
||||
|
||||
# Compute the average silhouette loss over two random views, as the average
|
||||
# squared L2 distance between the predicted silhouette and the target
|
||||
# silhouette from our dataset
|
||||
for j in np.random.permutation(num_views).tolist()[:num_views_per_iteration]:
|
||||
images_predicted = renderer_silhouette(new_src_mesh, cameras=target_cameras[j], lights=lights)
|
||||
predicted_silhouette = images_predicted[..., 3]
|
||||
loss_silhouette = ((predicted_silhouette - target_silhouette[j]) ** 2).mean()
|
||||
loss["silhouette"] += loss_silhouette / num_views_per_iteration
|
||||
|
||||
# Weighted sum of the losses
|
||||
sum_loss = torch.tensor(0.0, device=device)
|
||||
for k, l in loss.items():
|
||||
sum_loss += l * losses[k]["weight"]
|
||||
losses[k]["values"].append(l)
|
||||
|
||||
# Print the losses
|
||||
loop.set_description("total_loss = %.6f" % sum_loss)
|
||||
|
||||
# Plot mesh
|
||||
if i % plot_period == 0:
|
||||
visualize_prediction(new_src_mesh, title="iter: %d" % i, silhouette=True,
|
||||
target_image=target_silhouette[1])
|
||||
|
||||
# Optimization step
|
||||
sum_loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
visualize_prediction(new_src_mesh, silhouette=True,
|
||||
target_image=target_silhouette[1])
|
||||
plot_losses(losses)
|
||||
|
||||
|
||||
# ## 3. Mesh and texture prediction via textured rendering
|
||||
# We can predict both the mesh and its texture if we add an additional loss based on the comparing a predicted rendered RGB image to the target image. As before, we start with a sphere mesh. We learn both translational offsets and RGB texture colors for each vertex in the sphere mesh. Since our loss is based on rendered RGB pixel values instead of just the silhouette, we use a **SoftPhongShader** instead of a **SoftSilhouetteShader**.
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
# Rasterization settings for differentiable rendering, where the blur_radius
|
||||
# initialization is based on Liu et al, 'Soft Rasterizer: A Differentiable
|
||||
# Renderer for Image-based 3D Reasoning', ICCV 2019
|
||||
sigma = 1e-4
|
||||
raster_settings_soft = RasterizationSettings(
|
||||
image_size=128,
|
||||
blur_radius=np.log(1. / 1e-4 - 1.)*sigma,
|
||||
faces_per_pixel=50,
|
||||
)
|
||||
|
||||
# Differentiable soft renderer using per vertex RGB colors for texture
|
||||
renderer_textured = MeshRenderer(
|
||||
rasterizer=MeshRasterizer(
|
||||
cameras=camera,
|
||||
raster_settings=raster_settings_soft
|
||||
),
|
||||
shader=SoftPhongShader(device=device,
|
||||
cameras=camera,
|
||||
lights=lights)
|
||||
)
|
||||
|
||||
|
||||
# We initialize settings, losses, and the optimizer that will be used to iteratively fit our mesh to the target RGB images:
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
# Number of views to optimize over in each SGD iteration
|
||||
num_views_per_iteration = 2
|
||||
# Number of optimization steps
|
||||
Niter = 2000
|
||||
# Plot period for the losses
|
||||
plot_period = 250
|
||||
|
||||
get_ipython().run_line_magic('matplotlib', 'inline')
|
||||
|
||||
# Optimize using rendered RGB image loss, rendered silhouette image loss, mesh
|
||||
# edge loss, mesh normal consistency, and mesh laplacian smoothing
|
||||
losses = {"rgb": {"weight": 1.0, "values": []},
|
||||
"silhouette": {"weight": 1.0, "values": []},
|
||||
"edge": {"weight": 1.0, "values": []},
|
||||
"normal": {"weight": 0.01, "values": []},
|
||||
"laplacian": {"weight": 1.0, "values": []},
|
||||
}
|
||||
|
||||
# We will learn to deform the source mesh by offsetting its vertices
|
||||
# The shape of the deform parameters is equal to the total number of vertices in
|
||||
# src_mesh
|
||||
verts_shape = src_mesh.verts_packed().shape
|
||||
deform_verts = torch.full(verts_shape, 0.0, device=device, requires_grad=True)
|
||||
|
||||
# We will also learn per vertex colors for our sphere mesh that define texture
|
||||
# of the mesh
|
||||
sphere_verts_rgb = torch.full([1, verts_shape[0], 3], 0.5, device=device, requires_grad=True)
|
||||
|
||||
# The optimizer
|
||||
optimizer = torch.optim.SGD([deform_verts, sphere_verts_rgb], lr=1.0, momentum=0.9)
|
||||
|
||||
|
||||
# We write an optimization loop to iteratively refine our predicted mesh and its vertex colors from the sphere mesh into a mesh that matches the target images:
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
loop = tqdm(range(Niter))
|
||||
|
||||
for i in loop:
|
||||
# Initialize optimizer
|
||||
optimizer.zero_grad()
|
||||
|
||||
# Deform the mesh
|
||||
new_src_mesh = src_mesh.offset_verts(deform_verts)
|
||||
|
||||
# Add per vertex colors to texture the mesh
|
||||
new_src_mesh.textures = TexturesVertex(verts_features=sphere_verts_rgb)
|
||||
|
||||
# Losses to smooth /regularize the mesh shape
|
||||
loss = {k: torch.tensor(0.0, device=device) for k in losses}
|
||||
update_mesh_shape_prior_losses(new_src_mesh, loss)
|
||||
|
||||
# Randomly select two views to optimize over in this iteration. Compared
|
||||
# to using just one view, this helps resolve ambiguities between updating
|
||||
# mesh shape vs. updating mesh texture
|
||||
for j in np.random.permutation(num_views).tolist()[:num_views_per_iteration]:
|
||||
images_predicted = renderer_textured(new_src_mesh, cameras=target_cameras[j], lights=lights)
|
||||
|
||||
# Squared L2 distance between the predicted silhouette and the target
|
||||
# silhouette from our dataset
|
||||
predicted_silhouette = images_predicted[..., 3]
|
||||
loss_silhouette = ((predicted_silhouette - target_silhouette[j]) ** 2).mean()
|
||||
loss["silhouette"] += loss_silhouette / num_views_per_iteration
|
||||
|
||||
# Squared L2 distance between the predicted RGB image and the target
|
||||
# image from our dataset
|
||||
predicted_rgb = images_predicted[..., :3]
|
||||
loss_rgb = ((predicted_rgb - target_rgb[j]) ** 2).mean()
|
||||
loss["rgb"] += loss_rgb / num_views_per_iteration
|
||||
|
||||
# Weighted sum of the losses
|
||||
sum_loss = torch.tensor(0.0, device=device)
|
||||
for k, l in loss.items():
|
||||
sum_loss += l * losses[k]["weight"]
|
||||
losses[k]["values"].append(l)
|
||||
|
||||
# Print the losses
|
||||
loop.set_description("total_loss = %.6f" % sum_loss)
|
||||
|
||||
# Plot mesh
|
||||
if i % plot_period == 0:
|
||||
visualize_prediction(new_src_mesh, renderer=renderer_textured, title="iter: %d" % i, silhouette=False)
|
||||
|
||||
# Optimization step
|
||||
sum_loss.backward()
|
||||
optimizer.step()
|
||||
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
visualize_prediction(new_src_mesh, renderer=renderer_textured, silhouette=False)
|
||||
plot_losses(losses)
|
||||
|
||||
|
||||
# Save the final predicted mesh:
|
||||
|
||||
# ## 4. Save the final predicted mesh
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
# Fetch the verts and faces of the final predicted mesh
|
||||
final_verts, final_faces = new_src_mesh.get_mesh_verts_faces(0)
|
||||
|
||||
# Scale normalize back to the original target size
|
||||
final_verts = final_verts * scale + center
|
||||
|
||||
# Store the predicted mesh using save_obj
|
||||
final_obj = os.path.join('./', 'final_model.obj')
|
||||
save_obj(final_obj, final_verts, final_faces)
|
||||
|
||||
|
||||
# ## 5. Conclusion
|
||||
# In this tutorial, we learned how to load a textured mesh from an obj file, create a synthetic dataset by rendering the mesh from multiple viewpoints. We showed how to set up an optimization loop to fit a mesh to the observed dataset images based on a rendered silhouette loss. We then augmented this optimization loop with an additional loss based on rendered RGB images, which allowed us to predict both a mesh and its texture.
|
||||
File diff suppressed because one or more lines are too long
@@ -1,4 +1,4 @@
|
||||
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
|
||||
# In[ ]:
|
||||
@@ -20,14 +20,19 @@
|
||||
|
||||
# If `torch`, `torchvision` and `pytorch3d` are not installed, run the following cell:
|
||||
|
||||
# In[1]:
|
||||
# In[ ]:
|
||||
|
||||
|
||||
get_ipython().system('pip install torch torchvision')
|
||||
get_ipython().system("pip install 'git+https://github.com/facebookresearch/pytorch3d.git@stable'")
|
||||
import sys
|
||||
import torch
|
||||
if torch.__version__=='1.6.0+cu101' and sys.platform.startswith('linux'):
|
||||
get_ipython().system('pip install pytorch3d')
|
||||
else:
|
||||
get_ipython().system("pip install 'git+https://github.com/facebookresearch/pytorch3d.git@stable'")
|
||||
|
||||
|
||||
# In[1]:
|
||||
# In[ ]:
|
||||
|
||||
|
||||
import os
|
||||
@@ -36,20 +41,21 @@ import matplotlib.pyplot as plt
|
||||
from skimage.io import imread
|
||||
|
||||
# Util function for loading meshes
|
||||
from pytorch3d.io import load_objs_as_meshes
|
||||
from pytorch3d.io import load_objs_as_meshes, load_obj
|
||||
|
||||
# Data structures and functions for rendering
|
||||
from pytorch3d.structures import Meshes, Textures
|
||||
from pytorch3d.structures import Meshes
|
||||
from pytorch3d.renderer import (
|
||||
look_at_view_transform,
|
||||
OpenGLPerspectiveCameras,
|
||||
FoVPerspectiveCameras,
|
||||
PointLights,
|
||||
DirectionalLights,
|
||||
Materials,
|
||||
RasterizationSettings,
|
||||
MeshRenderer,
|
||||
MeshRasterizer,
|
||||
TexturedSoftPhongShader
|
||||
SoftPhongShader,
|
||||
TexturesUV
|
||||
)
|
||||
|
||||
# add path for demo utils functions
|
||||
@@ -60,7 +66,7 @@ sys.path.append(os.path.abspath(''))
|
||||
|
||||
# If using **Google Colab**, fetch the utils file for plotting image grids:
|
||||
|
||||
# In[2]:
|
||||
# In[ ]:
|
||||
|
||||
|
||||
get_ipython().system('wget https://raw.githubusercontent.com/facebookresearch/pytorch3d/master/docs/tutorials/utils/plot_image_grid.py')
|
||||
@@ -69,7 +75,7 @@ from plot_image_grid import image_grid
|
||||
|
||||
# OR if running **locally** uncomment and run the following cell:
|
||||
|
||||
# In[13]:
|
||||
# In[ ]:
|
||||
|
||||
|
||||
# from utils import image_grid
|
||||
@@ -81,14 +87,14 @@ from plot_image_grid import image_grid
|
||||
#
|
||||
# **Meshes** is a unique datastructure provided in PyTorch3D for working with batches of meshes of different sizes.
|
||||
#
|
||||
# **Textures** is an auxillary datastructure for storing texture information about meshes.
|
||||
# **TexturesUV** is an auxillary datastructure for storing vertex uv and texture maps for meshes.
|
||||
#
|
||||
# **Meshes** has several class methods which are used throughout the rendering pipeline.
|
||||
|
||||
# If running this notebook using **Google Colab**, run the following cell to fetch the mesh obj and texture files and save it at the path `data/cow_mesh`:
|
||||
# If running locally, the data is already available at the correct path.
|
||||
|
||||
# In[3]:
|
||||
# In[ ]:
|
||||
|
||||
|
||||
get_ipython().system('mkdir -p data/cow_mesh')
|
||||
@@ -97,12 +103,15 @@ get_ipython().system('wget -P data/cow_mesh https://dl.fbaipublicfiles.com/pytor
|
||||
get_ipython().system('wget -P data/cow_mesh https://dl.fbaipublicfiles.com/pytorch3d/data/cow_mesh/cow_texture.png')
|
||||
|
||||
|
||||
# In[2]:
|
||||
# In[ ]:
|
||||
|
||||
|
||||
# Setup
|
||||
device = torch.device("cuda:0")
|
||||
torch.cuda.set_device(device)
|
||||
if torch.cuda.is_available():
|
||||
device = torch.device("cuda:0")
|
||||
torch.cuda.set_device(device)
|
||||
else:
|
||||
device = torch.device("cpu")
|
||||
|
||||
# Set paths
|
||||
DATA_DIR = "./data"
|
||||
@@ -115,7 +124,7 @@ texture_image=mesh.textures.maps_padded()
|
||||
|
||||
# #### Let's visualize the texture map
|
||||
|
||||
# In[3]:
|
||||
# In[ ]:
|
||||
|
||||
|
||||
plt.figure(figsize=(7,7))
|
||||
@@ -130,14 +139,14 @@ plt.axis('off');
|
||||
#
|
||||
# In this example we will first create a **renderer** which uses a **perspective camera**, a **point light** and applies **phong shading**. Then we learn how to vary different components using the modular API.
|
||||
|
||||
# In[4]:
|
||||
# In[ ]:
|
||||
|
||||
|
||||
# Initialize an OpenGL perspective camera.
|
||||
# Initialize a camera.
|
||||
# With world coordinates +Y up, +X left and +Z in, the front of the cow is facing the -Z direction.
|
||||
# So we move the camera by 180 in the azimuth direction so it is facing the front of the cow.
|
||||
R, T = look_at_view_transform(2.7, 0, 180)
|
||||
cameras = OpenGLPerspectiveCameras(device=device, R=R, T=T)
|
||||
cameras = FoVPerspectiveCameras(device=device, R=R, T=T)
|
||||
|
||||
# Define the settings for rasterization and shading. Here we set the output image to be of size
|
||||
# 512x512. As we are rendering images for visualization purposes only we will set faces_per_pixel=1
|
||||
@@ -149,8 +158,6 @@ raster_settings = RasterizationSettings(
|
||||
image_size=512,
|
||||
blur_radius=0.0,
|
||||
faces_per_pixel=1,
|
||||
bin_size = None, # this setting controls whether naive or coarse-to-fine rasterization is used
|
||||
max_faces_per_bin = None # this setting is for coarse rasterization
|
||||
)
|
||||
|
||||
# Place a point light in front of the object. As mentioned above, the front of the cow is facing the
|
||||
@@ -165,7 +172,7 @@ renderer = MeshRenderer(
|
||||
cameras=cameras,
|
||||
raster_settings=raster_settings
|
||||
),
|
||||
shader=TexturedSoftPhongShader(
|
||||
shader=SoftPhongShader(
|
||||
device=device,
|
||||
cameras=cameras,
|
||||
lights=lights
|
||||
@@ -177,7 +184,7 @@ renderer = MeshRenderer(
|
||||
|
||||
# The light is in front of the object so it is bright and the image has specular highlights.
|
||||
|
||||
# In[5]:
|
||||
# In[ ]:
|
||||
|
||||
|
||||
images = renderer(mesh)
|
||||
@@ -195,7 +202,7 @@ plt.axis("off");
|
||||
#
|
||||
# The image is now dark as there is only ambient lighting, and there are no specular highlights.
|
||||
|
||||
# In[6]:
|
||||
# In[ ]:
|
||||
|
||||
|
||||
# Now move the light so it is on the +Z axis which will be behind the cow.
|
||||
@@ -203,7 +210,7 @@ lights.location = torch.tensor([0.0, 0.0, +1.0], device=device)[None]
|
||||
images = renderer(mesh, lights=lights)
|
||||
|
||||
|
||||
# In[7]:
|
||||
# In[ ]:
|
||||
|
||||
|
||||
plt.figure(figsize=(10, 10))
|
||||
@@ -220,12 +227,12 @@ plt.axis("off");
|
||||
# - change the **position** of the point light
|
||||
# - change the **material reflectance** properties of the mesh
|
||||
|
||||
# In[8]:
|
||||
# In[ ]:
|
||||
|
||||
|
||||
# Rotate the object by increasing the elevation and azimuth angles
|
||||
R, T = look_at_view_transform(dist=2.7, elev=10, azim=-150)
|
||||
cameras = OpenGLPerspectiveCameras(device=device, R=R, T=T)
|
||||
cameras = FoVPerspectiveCameras(device=device, R=R, T=T)
|
||||
|
||||
# Move the light location so the light is shining on the cow's face.
|
||||
lights.location = torch.tensor([[2.0, 2.0, -2.0]], device=device)
|
||||
@@ -241,7 +248,7 @@ materials = Materials(
|
||||
images = renderer(mesh, lights=lights, materials=materials, cameras=cameras)
|
||||
|
||||
|
||||
# In[9]:
|
||||
# In[ ]:
|
||||
|
||||
|
||||
plt.figure(figsize=(10, 10))
|
||||
@@ -256,7 +263,7 @@ plt.axis("off");
|
||||
# The renderer and associated components can take batched inputs and **render a batch of output images in one forward pass**. We will now use this feature to render the mesh from many different viewpoints.
|
||||
#
|
||||
|
||||
# In[10]:
|
||||
# In[ ]:
|
||||
|
||||
|
||||
# Set batch size - this is the number of different viewpoints from which we want to render the mesh.
|
||||
@@ -275,13 +282,13 @@ azim = torch.linspace(-180, 180, batch_size)
|
||||
# view the camera from the same distance and specify dist=2.7 as a float,
|
||||
# and then specify elevation and azimuth angles for each viewpoint as tensors.
|
||||
R, T = look_at_view_transform(dist=2.7, elev=elev, azim=azim)
|
||||
cameras = OpenGLPerspectiveCameras(device=device, R=R, T=T)
|
||||
cameras = FoVPerspectiveCameras(device=device, R=R, T=T)
|
||||
|
||||
# Move the light back in front of the cow which is facing the -z direction.
|
||||
lights.location = torch.tensor([[0.0, 0.0, -3.0]], device=device)
|
||||
|
||||
|
||||
# In[11]:
|
||||
# In[ ]:
|
||||
|
||||
|
||||
# We can pass arbirary keyword arguments to the rasterizer/shader via the renderer
|
||||
@@ -289,7 +296,7 @@ lights.location = torch.tensor([[0.0, 0.0, -3.0]], device=device)
|
||||
images = renderer(meshes, cameras=cameras, lights=lights)
|
||||
|
||||
|
||||
# In[14]:
|
||||
# In[ ]:
|
||||
|
||||
|
||||
image_grid(images.cpu().numpy(), rows=4, cols=5, rgb=True)
|
||||
|
||||
Reference in New Issue
Block a user