1 Commits

Author SHA1 Message Date
Christoph Lassner
e7c1f026ea [pulsar] Removing LOGGER.debug statements for performance gain.
We identified that these logging statements can deteriorate performance in certain cases. I propose removing them from the regular renderer implementation and letting individuals re-insert debug logging wherever needed on a case-by-case basis.
2022-07-25 09:08:58 -07:00
106 changed files with 1798 additions and 5266 deletions

View File

@@ -159,7 +159,7 @@ jobs:
binary_macos_wheel:
<<: *binary_common
macos:
xcode: "13.4.1"
xcode: "12.0"
steps:
- checkout
- run:

View File

@@ -159,7 +159,7 @@ jobs:
binary_macos_wheel:
<<: *binary_common
macos:
xcode: "13.4.1"
xcode: "12.0"
steps:
- checkout
- run:

View File

@@ -12,7 +12,6 @@ Key features include:
- Data structure for storing and manipulating triangle meshes
- Efficient operations on triangle meshes (projective transformations, graph convolution, sampling, loss functions)
- A differentiable mesh renderer
- Implicitron, see [its README](projects/implicitron_trainer), a framework for new-view synthesis via implicit representations.
PyTorch3D is designed to integrate smoothly with deep learning methods for predicting and manipulating 3D data.
For this reason, all operators in PyTorch3D:
@@ -94,7 +93,6 @@ In alphabetical order:
* Amitav Baruah
* Steve Branson
* Krzysztof Chalupka
* Luya Gao
* Georgia Gkioxari
* Taylor Gordon

View File

@@ -89,7 +89,7 @@
"except ModuleNotFoundError:\n",
" need_pytorch3d=True\n",
"if need_pytorch3d:\n",
" if torch.__version__.startswith(\"1.12.\") and sys.platform.startswith(\"linux\"):\n",
" if torch.__version__.startswith(\"1.11.\") and sys.platform.startswith(\"linux\"):\n",
" # We try to install PyTorch3D via a released wheel.\n",
" pyt_version_str=torch.__version__.split(\"+\")[0].replace(\".\", \"\")\n",
" version_str=\"\".join([\n",

View File

@@ -76,7 +76,7 @@
"except ModuleNotFoundError:\n",
" need_pytorch3d=True\n",
"if need_pytorch3d:\n",
" if torch.__version__.startswith(\"1.12.\") and sys.platform.startswith(\"linux\"):\n",
" if torch.__version__.startswith(\"1.11.\") and sys.platform.startswith(\"linux\"):\n",
" # We try to install PyTorch3D via a released wheel.\n",
" pyt_version_str=torch.__version__.split(\"+\")[0].replace(\".\", \"\")\n",
" version_str=\"\".join([\n",

View File

@@ -1,5 +1,5 @@
# Acknowledgements
Thank you to Keenan Crane for allowing the cow mesh model to be used freely in the public domain.
Thank you to Keenen Crane for allowing the cow mesh model to be used freely in the public domain.
###### Source: http://www.cs.cmu.edu/~kmcrane/Projects/ModelRepository/

View File

@@ -51,7 +51,7 @@
"except ModuleNotFoundError:\n",
" need_pytorch3d=True\n",
"if need_pytorch3d:\n",
" if torch.__version__.startswith(\"1.12.\") and sys.platform.startswith(\"linux\"):\n",
" if torch.__version__.startswith(\"1.11.\") and sys.platform.startswith(\"linux\"):\n",
" # We try to install PyTorch3D via a released wheel.\n",
" pyt_version_str=torch.__version__.split(\"+\")[0].replace(\".\", \"\")\n",
" version_str=\"\".join([\n",

View File

@@ -90,7 +90,7 @@
"except ModuleNotFoundError:\n",
" need_pytorch3d=True\n",
"if need_pytorch3d:\n",
" if torch.__version__.startswith(\"1.12.\") and sys.platform.startswith(\"linux\"):\n",
" if torch.__version__.startswith(\"1.11.\") and sys.platform.startswith(\"linux\"):\n",
" # We try to install PyTorch3D via a released wheel.\n",
" pyt_version_str=torch.__version__.split(\"+\")[0].replace(\".\", \"\")\n",
" version_str=\"\".join([\n",

View File

@@ -56,7 +56,7 @@
"except ModuleNotFoundError:\n",
" need_pytorch3d=True\n",
"if need_pytorch3d:\n",
" if torch.__version__.startswith(\"1.12.\") and sys.platform.startswith(\"linux\"):\n",
" if torch.__version__.startswith(\"1.11.\") and sys.platform.startswith(\"linux\"):\n",
" # We try to install PyTorch3D via a released wheel.\n",
" pyt_version_str=torch.__version__.split(\"+\")[0].replace(\".\", \"\")\n",
" version_str=\"\".join([\n",

View File

@@ -68,7 +68,7 @@
"except ModuleNotFoundError:\n",
" need_pytorch3d=True\n",
"if need_pytorch3d:\n",
" if torch.__version__.startswith(\"1.12.\") and sys.platform.startswith(\"linux\"):\n",
" if torch.__version__.startswith(\"1.11.\") and sys.platform.startswith(\"linux\"):\n",
" # We try to install PyTorch3D via a released wheel.\n",
" pyt_version_str=torch.__version__.split(\"+\")[0].replace(\".\", \"\")\n",
" version_str=\"\".join([\n",

View File

@@ -47,7 +47,7 @@
"except ModuleNotFoundError:\n",
" need_pytorch3d=True\n",
"if need_pytorch3d:\n",
" if torch.__version__.startswith(\"1.12.\") and sys.platform.startswith(\"linux\"):\n",
" if torch.__version__.startswith(\"1.11.\") and sys.platform.startswith(\"linux\"):\n",
" # We try to install PyTorch3D via a released wheel.\n",
" pyt_version_str=torch.__version__.split(\"+\")[0].replace(\".\", \"\")\n",
" version_str=\"\".join([\n",

File diff suppressed because it is too large Load Diff

View File

@@ -1,913 +0,0 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false,
"customInput": null,
"customOutput": null,
"executionStartTime": 1659619824914,
"executionStopTime": 1659619825485,
"originalKey": "d38652e8-200a-413c-a36a-f4d349b78a9d",
"requestMsgId": "641de8aa-0e42-4446-9304-c160a2d226bf",
"showInput": true
},
"outputs": [],
"source": [
"# Copyright (c) Meta Platforms, Inc. and affiliates. All rights reserved."
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"customInput": null,
"originalKey": "a48a9dcf-e80f-474b-a0c4-2c9a765b15c5",
"showInput": false
},
"source": [
"# A simple model using Implicitron\n",
"\n",
"In this demo, we use the VolumeRenderer from PyTorch3D as a custom implicit function in Implicitron. We will see\n",
"* some of the main objects in Implicitron\n",
"* how to plug in a custom part of a model"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"customInput": null,
"originalKey": "51337c0e-ad27-4b75-ad6a-737dca5d7b95",
"showInput": false
},
"source": [
"## 0. Install and import modules\n",
"\n",
"Ensure `torch` and `torchvision` are installed. If `pytorch3d` is not installed, install it using the following cell:\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false,
"customInput": null,
"customOutput": null,
"executionStartTime": 1659619898147,
"executionStopTime": 1659619898274,
"originalKey": "76f1ecd4-6b73-4214-81b0-118ef8d86872",
"requestMsgId": "deb6a860-6923-4227-abef-d31388b5142d",
"showInput": true
},
"outputs": [],
"source": [
"import os\n",
"import sys\n",
"import torch\n",
"need_pytorch3d=False\n",
"try:\n",
" import pytorch3d\n",
"except ModuleNotFoundError:\n",
" need_pytorch3d=True\n",
"if need_pytorch3d:\n",
" if torch.__version__.startswith(\"1.12.\") and sys.platform.startswith(\"linux\"):\n",
" # We try to install PyTorch3D via a released wheel.\n",
" pyt_version_str=torch.__version__.split(\"+\")[0].replace(\".\", \"\")\n",
" version_str=\"\".join([\n",
" f\"py3{sys.version_info.minor}_cu\",\n",
" torch.version.cuda.replace(\".\",\"\"),\n",
" f\"_pyt{pyt_version_str}\"\n",
" ])\n",
" !pip install fvcore iopath\n",
" !pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/{version_str}/download.html\n",
" else:\n",
" # We try to install PyTorch3D from source.\n",
" !curl -LO https://github.com/NVIDIA/cub/archive/1.10.0.tar.gz\n",
" !tar xzf 1.10.0.tar.gz\n",
" os.environ[\"CUB_HOME\"] = os.getcwd() + \"/cub-1.10.0\"\n",
" !pip install 'git+https://github.com/facebookresearch/pytorch3d.git@stable'"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"customInput": null,
"originalKey": "2c1020e6-eb4a-4644-9719-9147500d8e4f",
"showInput": false
},
"source": [
"Ensure omegaconf and visdom are installed. If not, run this cell. (It should not be necessary to restart the runtime.)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"customInput": null,
"customOutput": null,
"originalKey": "9e751931-a38d-44c9-9ff1-ac2f7d3a3f99",
"showInput": true
},
"outputs": [],
"source": [
"!pip install omegaconf visdom"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"code_folding": [],
"collapsed": false,
"customOutput": null,
"executionStartTime": 1659612480556,
"executionStopTime": 1659612480644,
"hidden_ranges": [],
"originalKey": "86807e4a-1675-4520-a033-c7af85b233ec",
"requestMsgId": "880a7e20-4a90-4b37-a5eb-bccc0b23cac6"
},
"outputs": [],
"source": [
"import logging\n",
"from typing import Tuple\n",
"\n",
"import matplotlib.animation as animation\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import torch\n",
"import tqdm\n",
"from IPython.display import HTML\n",
"from omegaconf import OmegaConf\n",
"from PIL import Image\n",
"from pytorch3d.implicitron.dataset.dataset_base import FrameData\n",
"from pytorch3d.implicitron.dataset.rendered_mesh_dataset_map_provider import RenderedMeshDatasetMapProvider\n",
"from pytorch3d.implicitron.models.generic_model import GenericModel\n",
"from pytorch3d.implicitron.models.implicit_function.base import ImplicitFunctionBase\n",
"from pytorch3d.implicitron.models.renderer.base import EvaluationMode\n",
"from pytorch3d.implicitron.tools.config import expand_args_fields, get_default_args, registry, remove_unused_components\n",
"from pytorch3d.renderer import RayBundle\n",
"from pytorch3d.renderer.implicit.renderer import VolumeSampler\n",
"from pytorch3d.structures import Volumes\n",
"from pytorch3d.vis.plotly_vis import plot_batch_individually, plot_scene"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"code_folding": [],
"collapsed": false,
"customInput": null,
"customOutput": null,
"executionStartTime": 1659610929375,
"executionStopTime": 1659610929383,
"hidden_ranges": [],
"originalKey": "b2d9f5bd-a9d4-4f78-b21e-92f2658e0fe9",
"requestMsgId": "7e43e623-4030-438b-af4e-b96170c9a052",
"showInput": true
},
"outputs": [],
"source": [
"output_resolution = 80"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"code_folding": [],
"collapsed": false,
"customInput": null,
"customOutput": null,
"executionStartTime": 1659610930042,
"executionStopTime": 1659610930050,
"hidden_ranges": [],
"originalKey": "0b0c2087-4c86-4c57-b0ee-6f48a70a9c78",
"requestMsgId": "46883aad-f00b-4fd4-ac17-eec0b2ac272a",
"showInput": true
},
"outputs": [],
"source": [
"torch.set_printoptions(sci_mode=False)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"customInput": null,
"originalKey": "37809d0d-b02e-42df-85b6-cdd038373653",
"showInput": false
},
"source": [
"## 1. Load renders of a mesh (the cow mesh) as a dataset\n",
"\n",
"A dataset's train, val and test parts in Implicitron are represented as a `dataset_map`, and provided by an implementation of `DatasetMapProvider`. \n",
"`RenderedMeshDatasetMapProvider` is one which generates a single-scene dataset with only a train component by taking a mesh and rendering it.\n",
"We use it with the cow mesh."
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"collapsed": false,
"customInput": null,
"customOutput": null,
"executionStartTime": 1659620739780,
"executionStopTime": 1659620739914,
"originalKey": "cc68cb9c-b8bf-4e9e-bef1-2cfafdf6caa2",
"requestMsgId": "398cfcae-5d43-4b6f-9c75-db3d297364d4",
"showInput": false
},
"source": [
"If running this notebook using **Google Colab**, run the following cell to fetch the mesh obj and texture files and save it at the path data/cow_mesh.\n",
"If running locally, the data is already available at the correct path."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"customInput": null,
"customOutput": null,
"originalKey": "2c55e002-a885-4169-8fdc-af9078b05968",
"showInput": true
},
"outputs": [],
"source": [
"!mkdir -p data/cow_mesh\n",
"!wget -P data/cow_mesh https://dl.fbaipublicfiles.com/pytorch3d/data/cow_mesh/cow.obj\n",
"!wget -P data/cow_mesh https://dl.fbaipublicfiles.com/pytorch3d/data/cow_mesh/cow.mtl\n",
"!wget -P data/cow_mesh https://dl.fbaipublicfiles.com/pytorch3d/data/cow_mesh/cow_texture.png"
]
},
{
"cell_type": "markdown",
"metadata": {
"customInput": null,
"originalKey": "2a976be8-01bf-4a1c-a6e7-61d5d08c3dbd",
"showInput": false
},
"source": [
"If we want to instantiate one of Implicitron's configurable objects, such as `RenderedMeshDatasetMapProvider`, without using the OmegaConf initialisation (get_default_args), we need to call `expand_args_fields` on the class first."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"code_folding": [],
"collapsed": false,
"customOutput": null,
"executionStartTime": 1659621652237,
"executionStopTime": 1659621652903,
"hidden_ranges": [],
"originalKey": "eb77aaec-048c-40bd-bd69-0e66b6ab60b1",
"requestMsgId": "09b9975c-ff86-41c9-b4a9-975d23afc562",
"showInput": true
},
"outputs": [],
"source": [
"expand_args_fields(RenderedMeshDatasetMapProvider)\n",
"cow_provider = RenderedMeshDatasetMapProvider(\n",
" data_file=\"data/cow_mesh/cow.obj\",\n",
" use_point_light=False,\n",
" resolution=output_resolution,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"code_folding": [],
"collapsed": false,
"customInput": null,
"customOutput": null,
"executionStartTime": 1659610966145,
"executionStopTime": 1659610966255,
"hidden_ranges": [],
"originalKey": "8210e15b-da48-4306-a49a-41c4e7e7d42f",
"requestMsgId": "c243edd2-a106-4fba-8471-dfa4f99a2088",
"showInput": true
},
"outputs": [],
"source": [
"dataset_map = cow_provider.get_dataset_map()\n",
"tr_cameras = [training_frame.camera for training_frame in dataset_map.train]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"code_folding": [],
"collapsed": false,
"customInput": null,
"customOutput": null,
"executionStartTime": 1659610967703,
"executionStopTime": 1659610967848,
"hidden_ranges": [],
"originalKey": "458d72ad-d9a7-4f13-b5b7-90d2aec61c16",
"requestMsgId": "7f9431f3-8717-4d89-a7fe-1420dd0e00c4",
"showInput": true
},
"outputs": [],
"source": [
"# The cameras are all in the XZ plane, in a circle about 2.7 from the origin\n",
"centers = torch.cat([i.get_camera_center() for i in tr_cameras])\n",
"print(centers.min(0).values)\n",
"print(centers.max(0).values)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"code_folding": [],
"collapsed": false,
"customInput": null,
"customOutput": null,
"executionStartTime": 1659552920194,
"executionStopTime": 1659552923122,
"hidden_ranges": [],
"originalKey": "931e712b-b141-437a-97fb-dc2a07ce3458",
"requestMsgId": "931e712b-b141-437a-97fb-dc2a07ce3458",
"showInput": true
},
"outputs": [],
"source": [
"# visualization of the cameras\n",
"plot = plot_scene({\"k\": {i: camera for i, camera in enumerate(tr_cameras)}}, camera_scale=0.25)\n",
"plot.layout.scene.aspectmode = \"data\"\n",
"plot"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"customInput": null,
"originalKey": "afa9c02d-f76b-4f68-83e9-9733c615406b",
"showInput": false
},
"source": [
"## 2. Custom implicit function 🧊\n",
"\n",
"At the core of neural rendering methods are functions of spatial coordinates called implicit functions, which are used in some kind of rendering process.\n",
"(Often those functions can additionally take other data as well, such as view direction.)\n",
"A common rendering process is ray marching over densities and colors provided by an implicit function.\n",
"In our case, taking samples from a 3D volume grid is a very simple function of spatial coordinates. \n",
"\n",
"Here we define our own implicit function, which uses PyTorch3D's existing functionality for sampling from a volume grid.\n",
"We do this by subclassing `ImplicitFunctionBase`.\n",
"We need to register our subclass with a special decorator.\n",
"We use Python's dataclass annotations for configuring the module."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"code_folding": [],
"collapsed": false,
"customInput": null,
"customOutput": null,
"executionStartTime": 1659613575850,
"executionStopTime": 1659613575940,
"hidden_ranges": [],
"originalKey": "61b55043-dc52-4de7-992e-e2195edd2123",
"requestMsgId": "dfaace3c-098c-4ffe-9240-6a7ae0ff271e",
"showInput": true
},
"outputs": [],
"source": [
"@registry.register\n",
"class MyVolumes(ImplicitFunctionBase, torch.nn.Module):\n",
" grid_resolution: int = 50 # common HWD of volumes, the number of voxels in each direction\n",
" extent: float = 1.0 # In world coordinates, the volume occupies is [-extent, extent] along each axis\n",
"\n",
" def __post_init__(self):\n",
" # We have to call this explicitly if there are other base classes like Module\n",
" super().__init__()\n",
"\n",
" # We define parameters like other torch.nn.Module objects.\n",
" # In this case, both our parameter tensors are trainable; they govern the contents of the volume grid.\n",
" density = torch.full((self.grid_resolution, self.grid_resolution, self.grid_resolution), -2.0)\n",
" self.density = torch.nn.Parameter(density)\n",
" color = torch.full((3, self.grid_resolution, self.grid_resolution, self.grid_resolution), 0.0)\n",
" self.color = torch.nn.Parameter(color)\n",
" self.density_activation = torch.nn.Softplus()\n",
"\n",
" def forward(\n",
" self,\n",
" ray_bundle: RayBundle,\n",
" fun_viewpool=None,\n",
" global_code=None,\n",
" ):\n",
" densities = self.density_activation(self.density[None, None])\n",
" voxel_size = 2.0 * float(self.extent) / self.grid_resolution\n",
" features = self.color.sigmoid()[None]\n",
"\n",
" # Like other PyTorch3D structures, the actual Volumes object should only exist as long\n",
" # as one iteration of training. It is local to this function.\n",
"\n",
" volume = Volumes(densities=densities, features=features, voxel_size=voxel_size)\n",
" sampler = VolumeSampler(volumes=volume)\n",
" densities, features = sampler(ray_bundle)\n",
"\n",
" # When an implicit function is used for raymarching, i.e. for MultiPassEmissionAbsorptionRenderer,\n",
" # it must return (densities, features, an auxiliary tuple)\n",
" return densities, features, {}\n"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"customInput": null,
"originalKey": "abaf2cd6-1b68-400e-a142-8fb9f49953f3",
"showInput": false
},
"source": [
"## 3. Construct the model object.\n",
"\n",
"The main model object in PyTorch3D is `GenericModel`, which has pluggable components for the major steps, including the renderer and the implicit function(s).\n",
"There are two ways to construct it which are equivalent here."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false,
"customInput": null,
"customOutput": null,
"executionStartTime": 1659621267561,
"executionStopTime": 1659621267938,
"originalKey": "f26c3dce-fbae-4592-bd0e-e4a8abc57c2c",
"requestMsgId": "9213687e-1caf-46a8-a4e5-a9c531530092",
"showInput": true
},
"outputs": [],
"source": [
"CONSTRUCT_MODEL_FROM_CONFIG = True\n",
"if CONSTRUCT_MODEL_FROM_CONFIG:\n",
" # Via a DictConfig - this is how our training loop with hydra works\n",
" cfg = get_default_args(GenericModel)\n",
" cfg.implicit_function_class_type = \"MyVolumes\"\n",
" cfg.render_image_height=output_resolution\n",
" cfg.render_image_width=output_resolution\n",
" cfg.loss_weights={\"loss_rgb_huber\": 1.0}\n",
" cfg.tqdm_trigger_threshold=19000\n",
" cfg.raysampler_AdaptiveRaySampler_args.scene_extent= 4.0\n",
" gm = GenericModel(**cfg)\n",
"else:\n",
" # constructing GenericModel directly\n",
" expand_args_fields(GenericModel)\n",
" gm = GenericModel(\n",
" implicit_function_class_type=\"MyVolumes\",\n",
" render_image_height=output_resolution,\n",
" render_image_width=output_resolution,\n",
" loss_weights={\"loss_rgb_huber\": 1.0},\n",
" tqdm_trigger_threshold=19000,\n",
" raysampler_AdaptiveRaySampler_args = {\"scene_extent\": 4.0}\n",
" )\n",
"\n",
" # In this case we can get the equivalent DictConfig cfg object to the way gm is configured as follows\n",
" cfg = OmegaConf.structured(gm)\n"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"code_folding": [],
"collapsed": false,
"customInput": null,
"customOutput": null,
"executionStartTime": 1659611214689,
"executionStopTime": 1659611214748,
"hidden_ranges": [],
"originalKey": "4e659f7d-ce66-4999-83de-005eb09d7705",
"requestMsgId": "7b815b2b-cf19-44d0-ae89-76fde6df35ec",
"showInput": false
},
"source": [
" The default renderer is an emission-absorbtion raymarcher. We keep that default."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"code_folding": [],
"collapsed": false,
"customInput": null,
"customOutput": null,
"executionStartTime": 1659621268007,
"executionStopTime": 1659621268190,
"hidden_ranges": [],
"originalKey": "d37ae488-c57c-44d3-9def-825dc1a6495b",
"requestMsgId": "71143ec1-730f-4876-8a14-e46eea9d6dd1",
"showInput": true
},
"outputs": [],
"source": [
"# We can display the configuration in use as follows.\n",
"remove_unused_components(cfg)\n",
"yaml = OmegaConf.to_yaml(cfg, sort_keys=False)\n",
"%page -r yaml"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"code_folding": [],
"collapsed": false,
"customInput": null,
"customOutput": null,
"executionStartTime": 1659621268727,
"executionStopTime": 1659621268776,
"hidden_ranges": [],
"originalKey": "52e53179-3c6e-4c1f-a38a-3a6d803687bb",
"requestMsgId": "05de9bc3-3f74-4a6f-851c-9ec919b59506",
"showInput": true
},
"outputs": [],
"source": [
"device = torch.device(\"cuda:0\")\n",
"gm.to(device)\n",
"assert next(gm.parameters()).is_cuda"
]
},
{
"cell_type": "markdown",
"metadata": {
"customInput": null,
"originalKey": "528a7d53-c645-49c2-9021-09adbb18cd23",
"showInput": false
},
"source": [
"## 4. train the model "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"code_folding": [],
"collapsed": false,
"customInput": null,
"customOutput": null,
"executionStartTime": 1659621270236,
"executionStopTime": 1659621270446,
"hidden_ranges": [],
"originalKey": "953280bd-3161-42ba-8dcb-0c8ef2d5cc25",
"requestMsgId": "9bba424b-7bfd-4e5a-9d79-ae316e20bab0",
"showInput": true
},
"outputs": [],
"source": [
"train_data_collated = [FrameData.collate([frame.to(device)]) for frame in dataset_map.train]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"code_folding": [],
"collapsed": false,
"customInput": null,
"customOutput": null,
"executionStartTime": 1659621270815,
"executionStopTime": 1659621270948,
"hidden_ranges": [],
"originalKey": "2fcf07f0-0c28-49c7-8c76-1c9a9d810167",
"requestMsgId": "821deb43-6084-4ece-83c3-dee214562c47",
"showInput": true
},
"outputs": [],
"source": [
"gm.train()\n",
"optimizer = torch.optim.Adam(gm.parameters(), lr=0.1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"code_folding": [],
"collapsed": false,
"customOutput": null,
"executionStartTime": 1659621271875,
"executionStopTime": 1659621298146,
"hidden_ranges": [],
"originalKey": "105099f7-ed0c-4e7f-a976-61a93fd0a8fe",
"requestMsgId": "0c87c108-83e3-4129-ad02-85e0140f1368",
"showInput": true
},
"outputs": [],
"source": [
"iterator = tqdm.tqdm(range(2000))\n",
"for n_batch in iterator:\n",
" optimizer.zero_grad()\n",
"\n",
" frame = train_data_collated[n_batch % len(dataset_map.train)]\n",
" out = gm(**frame, evaluation_mode=EvaluationMode.TRAINING)\n",
" out[\"objective\"].backward()\n",
" if n_batch % 100 == 0:\n",
" iterator.set_postfix_str(f\"loss: {float(out['objective']):.5f}\")\n",
" optimizer.step()"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"collapsed": false,
"customInput": null,
"customOutput": null,
"executionStartTime": 1659535024768,
"executionStopTime": 1659535024906,
"originalKey": "e3cd494a-536b-48bc-8290-c048118c82eb",
"requestMsgId": "e3cd494a-536b-48bc-8290-c048118c82eb",
"showInput": false
},
"source": [
"## 5. Evaluate the module\n",
"\n",
"We generate complete images from all the viewpoints to see how they look."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"code_folding": [],
"collapsed": false,
"customInput": null,
"customOutput": null,
"executionStartTime": 1659621299859,
"executionStopTime": 1659621311133,
"hidden_ranges": [],
"originalKey": "fbe1b2ea-cc24-4b20-a2d7-0249185e34a5",
"requestMsgId": "771ef1f8-5eee-4932-9e81-33604bf0512a",
"showInput": true
},
"outputs": [],
"source": [
"def to_numpy_image(image):\n",
" # Takes an image of shape (C, H, W) in [0,1], where C=3 or 1\n",
" # to a numpy uint image of shape (H, W, 3)\n",
" return (image * 255).to(torch.uint8).permute(1, 2, 0).detach().cpu().expand(-1, -1, 3).numpy()\n",
"def resize_image(image):\n",
" # Takes images of shape (B, C, H, W) to (B, C, output_resolution, output_resolution)\n",
" return torch.nn.functional.interpolate(image, size=(output_resolution, output_resolution))\n",
"\n",
"gm.eval()\n",
"images = []\n",
"expected = []\n",
"masks = []\n",
"masks_expected = []\n",
"for frame in tqdm.tqdm(train_data_collated):\n",
" with torch.no_grad():\n",
" out = gm(**frame, evaluation_mode=EvaluationMode.EVALUATION)\n",
"\n",
" image_rgb = to_numpy_image(out[\"images_render\"][0])\n",
" mask = to_numpy_image(out[\"masks_render\"][0])\n",
" expd = to_numpy_image(resize_image(frame.image_rgb)[0])\n",
" mask_expected = to_numpy_image(resize_image(frame.fg_probability)[0])\n",
"\n",
" images.append(image_rgb)\n",
" masks.append(mask)\n",
" expected.append(expd)\n",
" masks_expected.append(mask_expected)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"collapsed": false,
"customInput": null,
"customOutput": null,
"executionStartTime": 1659614622542,
"executionStopTime": 1659614622757,
"originalKey": "24953039-9780-40fd-bd81-5d63e9f40069",
"requestMsgId": "7af895a3-dfe4-4c28-ac3b-4ff0fbb40c7f",
"showInput": false
},
"source": [
"We draw a grid showing predicted image and expected image, followed by predicted mask and expected mask, from each viewpoint. \n",
"This is a grid of four rows of images, wrapped in to several large rows, i.e..\n",
"<small><center>\n",
"```\n",
"┌────────┬────────┐ ┌────────┐\n",
"│pred │pred │ │pred │\n",
"│image │image │ │image │\n",
"│1 │2 │ │n │\n",
"├────────┼────────┤ ├────────┤\n",
"│expected│expected│ │expected│\n",
"│image │image │ ... │image │\n",
"│1 │2 │ │n │\n",
"├────────┼────────┤ ├────────┤\n",
"│pred │pred │ │pred │\n",
"│mask │mask │ │mask │\n",
"│1 │2 │ │n │\n",
"├────────┼────────┤ ├────────┤\n",
"│expected│expected│ │expected│\n",
"│mask │mask │ │mask │\n",
"│1 │2 │ │n │\n",
"├────────┼────────┤ ├────────┤\n",
"│pred │pred │ │pred │\n",
"│image │image │ │image │\n",
"│n+1 │n+1 │ │2n │\n",
"├────────┼────────┤ ├────────┤\n",
"│expected│expected│ │expected│\n",
"│image │image │ ... │image │\n",
"│n+1 │n+2 │ │2n │\n",
"├────────┼────────┤ ├────────┤\n",
"│pred │pred │ │pred │\n",
"│mask │mask │ │mask │\n",
"│n+1 │n+2 │ │2n │\n",
"├────────┼────────┤ ├────────┤\n",
"│expected│expected│ │expected│\n",
"│mask │mask │ │mask │\n",
"│n+1 │n+2 │ │2n │\n",
"└────────┴────────┘ └────────┘\n",
" ...\n",
"```\n",
"</center></small>"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"code_folding": [],
"collapsed": false,
"customInput": null,
"customOutput": null,
"executionStartTime": 1659621313894,
"executionStopTime": 1659621314042,
"hidden_ranges": [],
"originalKey": "c488a34a-e46d-4649-93fb-4b1bb5a0e439",
"requestMsgId": "4221e632-fca1-4fe5-b2e3-f92c37aa40e4",
"showInput": true
},
"outputs": [],
"source": [
"images_to_display = [images.copy(), expected.copy(), masks.copy(), masks_expected.copy()]\n",
"n_rows = 4\n",
"n_images = len(images)\n",
"blank_image = images[0] * 0\n",
"n_per_row = 1+(n_images-1)//n_rows\n",
"for _ in range(n_per_row*n_rows - n_images):\n",
" for group in images_to_display:\n",
" group.append(blank_image)\n",
"\n",
"images_to_display_listed = [[[i] for i in j] for j in images_to_display]\n",
"split = []\n",
"for row in range(n_rows):\n",
" for group in images_to_display_listed:\n",
" split.append(group[row*n_per_row:(row+1)*n_per_row]) \n",
"\n",
"Image.fromarray(np.block(split))\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"code_folding": [],
"collapsed": false,
"customInput": null,
"customOutput": null,
"executionStartTime": 1659621323795,
"executionStopTime": 1659621323820,
"hidden_ranges": [],
"originalKey": "49eab9e1-4fe2-4fbe-b4f3-7b6953340170",
"requestMsgId": "85b402ad-f903-431f-a13e-c2d697e869bb",
"showInput": true
},
"outputs": [],
"source": [
"# Print the maximum channel intensity in the first image.\n",
"print(images[1].max()/255)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"code_folding": [],
"collapsed": false,
"customInput": null,
"customOutput": null,
"executionStartTime": 1659621408642,
"executionStopTime": 1659621409559,
"hidden_ranges": [],
"originalKey": "137d2c43-d39d-4266-ac5e-2b714da5e0ee",
"requestMsgId": "8e27ec57-c2d6-4ae0-be69-b63b6af929ff",
"showInput": true
},
"outputs": [],
"source": [
"plt.ioff()\n",
"fig, ax = plt.subplots(figsize=(3,3))\n",
"\n",
"ax.grid(None)\n",
"ims = [[ax.imshow(im, animated=True)] for im in images]\n",
"ani = animation.ArtistAnimation(fig, ims, interval=80, blit=True)\n",
"ani_html = ani.to_jshtml()\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false,
"customInput": null,
"customOutput": null,
"executionStartTime": 1659621409620,
"executionStopTime": 1659621409725,
"originalKey": "783e70d6-7cf1-4d76-a126-ba11ffc2f5be",
"requestMsgId": "b6843506-c5fa-4508-80fc-8ecae51a934a",
"showInput": true
},
"outputs": [],
"source": [
"HTML(ani_html)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false,
"customInput": null,
"customOutput": null,
"executionStartTime": 1659614670081,
"executionStopTime": 1659614670168,
"originalKey": "0286c350-2362-4f47-8181-2fc2ba51cfcf",
"requestMsgId": "976f4db9-d4c7-466c-bcfd-218234400226",
"showInput": true
},
"outputs": [],
"source": [
"# If you want to see the output of the model with the volume forced to opaque white, run this and re-evaluate\n",
"# with torch.no_grad():\n",
"# gm._implicit_functions[0]._fn.density.fill_(9.0)\n",
"# gm._implicit_functions[0]._fn.color.fill_(9.0)\n"
]
}
],
"metadata": {
"bento_stylesheets": {
"bento/extensions/flow/main.css": true,
"bento/extensions/kernel_selector/main.css": true,
"bento/extensions/kernel_ui/main.css": true,
"bento/extensions/new_kernel/main.css": true,
"bento/extensions/system_usage/main.css": true,
"bento/extensions/theme/main.css": true
},
"captumWidgetMessage": {},
"dataExplorerConfig": {},
"kernelspec": {
"display_name": "pytorch3d",
"language": "python",
"metadata": {
"cinder_runtime": false,
"fbpkg_supported": true,
"is_prebuilt": true,
"kernel_name": "bento_kernel_pytorch3d",
"nightly_builds": true
},
"name": "bento_kernel_pytorch3d"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3"
},
"last_base_url": "https://9177.od.fbinfra.net:443/",
"last_kernel_id": "bb33cd83-7924-489a-8bd8-2d9d62eb0126",
"last_msg_id": "99f7088e-d22b355b859660479ef0574e_5743",
"last_server_session_id": "2944b203-9ea8-4c0e-9634-645dfea5f26b",
"outputWidgetContext": {}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@@ -50,7 +50,7 @@
"except ModuleNotFoundError:\n",
" need_pytorch3d=True\n",
"if need_pytorch3d:\n",
" if torch.__version__.startswith(\"1.12.\") and sys.platform.startswith(\"linux\"):\n",
" if torch.__version__.startswith(\"1.11.\") and sys.platform.startswith(\"linux\"):\n",
" # We try to install PyTorch3D via a released wheel.\n",
" pyt_version_str=torch.__version__.split(\"+\")[0].replace(\".\", \"\")\n",
" version_str=\"\".join([\n",

View File

@@ -57,7 +57,7 @@
"except ModuleNotFoundError:\n",
" need_pytorch3d=True\n",
"if need_pytorch3d:\n",
" if torch.__version__.startswith(\"1.12.\") and sys.platform.startswith(\"linux\"):\n",
" if torch.__version__.startswith(\"1.11.\") and sys.platform.startswith(\"linux\"):\n",
" # We try to install PyTorch3D via a released wheel.\n",
" pyt_version_str=torch.__version__.split(\"+\")[0].replace(\".\", \"\")\n",
" version_str=\"\".join([\n",

View File

@@ -73,7 +73,7 @@
"except ModuleNotFoundError:\n",
" need_pytorch3d=True\n",
"if need_pytorch3d:\n",
" if torch.__version__.startswith(\"1.12.\") and sys.platform.startswith(\"linux\"):\n",
" if torch.__version__.startswith(\"1.11.\") and sys.platform.startswith(\"linux\"):\n",
" # We try to install PyTorch3D via a released wheel.\n",
" pyt_version_str=torch.__version__.split(\"+\")[0].replace(\".\", \"\")\n",
" version_str=\"\".join([\n",

View File

@@ -44,8 +44,6 @@ def generate_cow_renders(
data_dir: The folder that contains the cow mesh files. If the cow mesh
files do not exist in the folder, this function will automatically
download them.
azimuth_range: number of degrees on each side of the start position to
take samples
Returns:
cameras: A batch of `num_views` `FoVPerspectiveCameras` from which the

View File

@@ -47,6 +47,7 @@ test:
- imageio
- hydra-core
- accelerate
- lpips
commands:
#pytest .
python -m unittest discover -v -s tests -t .

View File

@@ -37,7 +37,7 @@ See [Running](#running) section below for examples of training and evaluation co
To plug in custom implementations, for example, of renderer or implicit-function protocols, you need to create your own runner script and import the plug-in implementations there.
First, install PyTorch3D and Implicitron dependencies as described in the previous section.
Then, implement the custom script; copying `pytorch3d/projects/implicitron_trainer` is a good place to start.
Then, implement the custom script; copying `pytorch3d/projects/implicitron_trainer/experiment.py` is a good place to start.
See [Custom plugins](#custom-plugins) for more information on how to import implementations and enable them in the configs.
@@ -203,29 +203,14 @@ to replace the implementation and potentially override the parameters.
# Code and config structure
The main object for this trainer loop is `Experiment`. It has four top-level replaceable components.
* `data_source`: This is a `DataSourceBase` which defaults to `ImplicitronDataSource`.
It constructs the data sets and dataloaders.
* `model_factory`: This is a `ModelFactoryBase` which defaults to `ImplicitronModelFactory`.
It constructs the model, which is usually an instance of implicitron's main `GenericModel` class, and can load its weights from a checkpoint.
* `optimizer_factory`: This is an `OptimizerFactoryBase` which defaults to `ImplicitronOptimizerFactory`.
It constructs the optimizer and can load its weights from a checkpoint.
* `training_loop`: This is a `TrainingLoopBase` which defaults to `ImplicitronTrainingLoop` and defines the main training loop.
As per above, the config structure is parsed automatically from the module hierarchy.
In particular, for ImplicitronModelFactory with generic model, model parameters are contained in the `model_factory_ImplicitronModelFactory_args.model_GenericModel_args` node, and dataset parameters in `data_source_ImplicitronDataSource_args` node.
In particular, model parameters are contained in `generic_model_args` node, and dataset parameters in `data_source_args` node.
Here is the class structure of GenericModel (single-line edges show aggregation, while double lines show available implementations):
Here is the class structure (single-line edges show aggregation, while double lines show available implementations):
```
model_GenericModel_args: GenericModel
└-- global_encoder_*_args: GlobalEncoderBase
╘== SequenceAutodecoder
└-- autodecoder_args: Autodecoder
╘== HarmonicTimeEncoder
└-- raysampler_*_args: RaySampler
╘== AdaptiveRaysampler
╘== NearFarRaysampler
generic_model_args: GenericModel
└-- sequence_autodecoder_args: Autodecoder
└-- raysampler_args: RaySampler
└-- renderer_*_args: BaseRenderer
╘== MultiPassEmissionAbsorptionRenderer
╘== LSTMRenderer
@@ -243,16 +228,19 @@ model_GenericModel_args: GenericModel
╘== IdrFeatureField
└-- image_feature_extractor_*_args: FeatureExtractorBase
╘== ResNetFeatureExtractor
└-- view_pooler_args: ViewPooler
└-- view_sampler_args: ViewSampler
└-- feature_aggregator_*_args: FeatureAggregatorBase
╘== IdentityFeatureAggregator
╘== AngleWeightedIdentityFeatureAggregator
╘== AngleWeightedReductionFeatureAggregator
╘== ReductionFeatureAggregator
└-- view_sampler_args: ViewSampler
└-- feature_aggregator_*_args: FeatureAggregatorBase
╘== IdentityFeatureAggregator
╘== AngleWeightedIdentityFeatureAggregator
╘== AngleWeightedReductionFeatureAggregator
╘== ReductionFeatureAggregator
solver_args: init_optimizer
data_source_args: ImplicitronDataSource
└-- dataset_map_provider_*_args
└-- data_loader_map_provider_*_args
```
Please look at the annotations of the respective classes or functions for the lists of hyperparameters. `tests/experiment.yaml` shows every possible option if you have no user-defined classes.
Please look at the annotations of the respective classes or functions for the lists of hyperparameters.
# Reproducing CO3D experiments

View File

@@ -2,11 +2,10 @@ defaults:
- default_config
- _self_
exp_dir: ./data/exps/base/
training_loop_ImplicitronTrainingLoop_args:
visdom_port: 8097
visualize_interval: 0
max_epochs: 1000
data_source_ImplicitronDataSource_args:
architecture: generic
visualize_interval: 0
visdom_port: 8097
data_source_args:
data_loader_map_provider_class_type: SequenceDataLoaderMapProvider
dataset_map_provider_class_type: JsonIndexDatasetMapProvider
data_loader_map_provider_SequenceDataLoaderMapProvider_args:
@@ -22,59 +21,55 @@ data_source_ImplicitronDataSource_args:
load_point_clouds: false
mask_depths: false
mask_images: false
model_factory_ImplicitronModelFactory_args:
model_GenericModel_args:
loss_weights:
loss_mask_bce: 1.0
loss_prev_stage_mask_bce: 1.0
loss_autodecoder_norm: 0.01
loss_rgb_mse: 1.0
loss_prev_stage_rgb_mse: 1.0
output_rasterized_mc: false
chunk_size_grid: 102400
render_image_height: 400
render_image_width: 400
num_passes: 2
implicit_function_NeuralRadianceFieldImplicitFunction_args:
n_harmonic_functions_xyz: 10
n_harmonic_functions_dir: 4
n_hidden_neurons_xyz: 256
n_hidden_neurons_dir: 128
n_layers_xyz: 8
append_xyz:
- 5
raysampler_AdaptiveRaySampler_args:
n_rays_per_image_sampled_from_mask: 1024
scene_extent: 8.0
n_pts_per_ray_training: 64
n_pts_per_ray_evaluation: 64
stratified_point_sampling_training: true
stratified_point_sampling_evaluation: false
renderer_MultiPassEmissionAbsorptionRenderer_args:
n_pts_per_ray_fine_training: 64
n_pts_per_ray_fine_evaluation: 64
append_coarse_samples_to_fine: true
density_noise_std_train: 1.0
view_pooler_args:
view_sampler_args:
masked_sampling: false
image_feature_extractor_ResNetFeatureExtractor_args:
stages:
- 1
- 2
- 3
- 4
proj_dim: 16
image_rescale: 0.32
first_max_pool: false
optimizer_factory_ImplicitronOptimizerFactory_args:
breed: Adam
weight_decay: 0.0
lr_policy: MultiStepLR
multistep_lr_milestones: []
generic_model_args:
loss_weights:
loss_mask_bce: 1.0
loss_prev_stage_mask_bce: 1.0
loss_autodecoder_norm: 0.01
loss_rgb_mse: 1.0
loss_prev_stage_rgb_mse: 1.0
output_rasterized_mc: false
chunk_size_grid: 102400
render_image_height: 400
render_image_width: 400
num_passes: 2
implicit_function_NeuralRadianceFieldImplicitFunction_args:
n_harmonic_functions_xyz: 10
n_harmonic_functions_dir: 4
n_hidden_neurons_xyz: 256
n_hidden_neurons_dir: 128
n_layers_xyz: 8
append_xyz:
- 5
latent_dim: 0
raysampler_AdaptiveRaySampler_args:
n_rays_per_image_sampled_from_mask: 1024
scene_extent: 8.0
n_pts_per_ray_training: 64
n_pts_per_ray_evaluation: 64
stratified_point_sampling_training: true
stratified_point_sampling_evaluation: false
renderer_MultiPassEmissionAbsorptionRenderer_args:
n_pts_per_ray_fine_training: 64
n_pts_per_ray_fine_evaluation: 64
append_coarse_samples_to_fine: true
density_noise_std_train: 1.0
view_pooler_args:
view_sampler_args:
masked_sampling: false
image_feature_extractor_ResNetFeatureExtractor_args:
stages:
- 1
- 2
- 3
- 4
proj_dim: 16
image_rescale: 0.32
first_max_pool: false
solver_args:
breed: adam
lr: 0.0005
gamma: 0.1
lr_policy: multistep
max_epochs: 2000
momentum: 0.9
betas:
- 0.9
- 0.999
weight_decay: 0.0

View File

@@ -1,18 +1,17 @@
model_factory_ImplicitronModelFactory_args:
model_GenericModel_args:
image_feature_extractor_class_type: ResNetFeatureExtractor
image_feature_extractor_ResNetFeatureExtractor_args:
add_images: true
add_masks: true
first_max_pool: true
image_rescale: 0.375
l2_norm: true
name: resnet34
normalize_image: true
pretrained: true
stages:
- 1
- 2
- 3
- 4
proj_dim: 32
generic_model_args:
image_feature_extractor_class_type: ResNetFeatureExtractor
image_feature_extractor_ResNetFeatureExtractor_args:
add_images: true
add_masks: true
first_max_pool: true
image_rescale: 0.375
l2_norm: true
name: resnet34
normalize_image: true
pretrained: true
stages:
- 1
- 2
- 3
- 4
proj_dim: 32

View File

@@ -1,18 +1,17 @@
model_factory_ImplicitronModelFactory_args:
model_GenericModel_args:
image_feature_extractor_class_type: ResNetFeatureExtractor
image_feature_extractor_ResNetFeatureExtractor_args:
add_images: true
add_masks: true
first_max_pool: false
image_rescale: 0.375
l2_norm: true
name: resnet34
normalize_image: true
pretrained: true
stages:
- 1
- 2
- 3
- 4
proj_dim: 16
generic_model_args:
image_feature_extractor_class_type: ResNetFeatureExtractor
image_feature_extractor_ResNetFeatureExtractor_args:
add_images: true
add_masks: true
first_max_pool: false
image_rescale: 0.375
l2_norm: true
name: resnet34
normalize_image: true
pretrained: true
stages:
- 1
- 2
- 3
- 4
proj_dim: 16

View File

@@ -1,19 +1,18 @@
model_factory_ImplicitronModelFactory_args:
model_GenericModel_args:
image_feature_extractor_class_type: ResNetFeatureExtractor
image_feature_extractor_ResNetFeatureExtractor_args:
stages:
- 1
- 2
- 3
first_max_pool: false
proj_dim: -1
l2_norm: false
image_rescale: 0.375
name: resnet34
normalize_image: true
pretrained: true
view_pooler_args:
feature_aggregator_AngleWeightedReductionFeatureAggregator_args:
reduction_functions:
- AVG
generic_model_args:
image_feature_extractor_class_type: ResNetFeatureExtractor
image_feature_extractor_ResNetFeatureExtractor_args:
stages:
- 1
- 2
- 3
first_max_pool: false
proj_dim: -1
l2_norm: false
image_rescale: 0.375
name: resnet34
normalize_image: true
pretrained: true
view_pooler_args:
feature_aggregator_AngleWeightedReductionFeatureAggregator_args:
reduction_functions:
- AVG

View File

@@ -1,7 +1,7 @@
defaults:
- repro_base.yaml
- _self_
data_source_ImplicitronDataSource_args:
data_source_args:
data_loader_map_provider_SequenceDataLoaderMapProvider_args:
batch_size: 10
dataset_length_train: 1000
@@ -26,13 +26,10 @@ data_source_ImplicitronDataSource_args:
n_frames_per_sequence: -1
test_on_train: true
test_restrict_sequence_id: 0
optimizer_factory_ImplicitronOptimizerFactory_args:
multistep_lr_milestones:
- 1000
training_loop_ImplicitronTrainingLoop_args:
solver_args:
max_epochs: 3000
evaluator_ImplicitronEvaluator_args:
camera_difficulty_bin_breaks:
- 0.666667
- 0.833334
is_multisequence: true
milestones:
- 1000
camera_difficulty_bin_breaks:
- 0.666667
- 0.833334

View File

@@ -1,65 +1,65 @@
defaults:
- repro_multiseq_base.yaml
- _self_
model_factory_ImplicitronModelFactory_args:
model_GenericModel_args:
loss_weights:
loss_mask_bce: 100.0
loss_kl: 0.0
loss_rgb_mse: 1.0
loss_eikonal: 0.1
chunk_size_grid: 65536
num_passes: 1
output_rasterized_mc: true
sampling_mode_training: mask_sample
global_encoder_class_type: SequenceAutodecoder
global_encoder_SequenceAutodecoder_args:
autodecoder_args:
n_instances: 20000
init_scale: 1.0
encoding_dim: 256
implicit_function_IdrFeatureField_args:
n_harmonic_functions_xyz: 6
bias: 0.6
d_in: 3
d_out: 1
generic_model_args:
loss_weights:
loss_mask_bce: 100.0
loss_kl: 0.0
loss_rgb_mse: 1.0
loss_eikonal: 0.1
chunk_size_grid: 65536
num_passes: 1
output_rasterized_mc: true
sampling_mode_training: mask_sample
global_encoder_class_type: SequenceAutodecoder
global_encoder_SequenceAutodecoder_args:
autodecoder_args:
n_instances: 20000
init_scale: 1.0
encoding_dim: 256
implicit_function_IdrFeatureField_args:
n_harmonic_functions_xyz: 6
bias: 0.6
d_in: 3
d_out: 1
dims:
- 512
- 512
- 512
- 512
- 512
- 512
- 512
- 512
geometric_init: true
pooled_feature_dim: 0
skip_in:
- 6
weight_norm: true
renderer_SignedDistanceFunctionRenderer_args:
ray_tracer_args:
line_search_step: 0.5
line_step_iters: 3
n_secant_steps: 8
n_steps: 100
object_bounding_sphere: 8.0
sdf_threshold: 5.0e-05
ray_normal_coloring_network_args:
d_in: 9
d_out: 3
dims:
- 512
- 512
- 512
- 512
- 512
- 512
- 512
- 512
geometric_init: true
mode: idr
n_harmonic_functions_dir: 4
pooled_feature_dim: 0
skip_in:
- 6
weight_norm: true
renderer_SignedDistanceFunctionRenderer_args:
ray_tracer_args:
line_search_step: 0.5
line_step_iters: 3
n_secant_steps: 8
n_steps: 100
sdf_threshold: 5.0e-05
ray_normal_coloring_network_args:
d_in: 9
d_out: 3
dims:
- 512
- 512
- 512
- 512
mode: idr
n_harmonic_functions_dir: 4
pooled_feature_dim: 0
weight_norm: true
raysampler_AdaptiveRaySampler_args:
n_rays_per_image_sampled_from_mask: 1024
n_pts_per_ray_training: 0
n_pts_per_ray_evaluation: 0
scene_extent: 8.0
renderer_class_type: SignedDistanceFunctionRenderer
implicit_function_class_type: IdrFeatureField
raysampler_AdaptiveRaySampler_args:
n_rays_per_image_sampled_from_mask: 1024
n_pts_per_ray_training: 0
n_pts_per_ray_evaluation: 0
scene_extent: 8.0
renderer_class_type: SignedDistanceFunctionRenderer
implicit_function_class_type: IdrFeatureField

View File

@@ -1,12 +1,11 @@
defaults:
- repro_multiseq_base.yaml
- _self_
model_factory_ImplicitronModelFactory_args:
model_GenericModel_args:
chunk_size_grid: 16000
view_pooler_enabled: false
global_encoder_class_type: SequenceAutodecoder
global_encoder_SequenceAutodecoder_args:
autodecoder_args:
n_instances: 20000
encoding_dim: 256
generic_model_args:
chunk_size_grid: 16000
view_pooler_enabled: false
global_encoder_class_type: SequenceAutodecoder
global_encoder_SequenceAutodecoder_args:
autodecoder_args:
n_instances: 20000
encoding_dim: 256

View File

@@ -2,11 +2,9 @@ defaults:
- repro_multiseq_base.yaml
- repro_feat_extractor_unnormed.yaml
- _self_
model_factory_ImplicitronModelFactory_args:
model_GenericModel_args:
chunk_size_grid: 16000
view_pooler_enabled: true
raysampler_AdaptiveRaySampler_args:
n_rays_per_image_sampled_from_mask: 850
training_loop_ImplicitronTrainingLoop_args:
clip_grad: 1.0
clip_grad: 1.0
generic_model_args:
chunk_size_grid: 16000
view_pooler_enabled: true
raysampler_AdaptiveRaySampler_args:
n_rays_per_image_sampled_from_mask: 850

View File

@@ -2,17 +2,16 @@ defaults:
- repro_multiseq_base.yaml
- repro_feat_extractor_transformer.yaml
- _self_
model_factory_ImplicitronModelFactory_args:
model_GenericModel_args:
chunk_size_grid: 16000
raysampler_AdaptiveRaySampler_args:
n_rays_per_image_sampled_from_mask: 800
n_pts_per_ray_training: 32
n_pts_per_ray_evaluation: 32
renderer_MultiPassEmissionAbsorptionRenderer_args:
n_pts_per_ray_fine_training: 16
n_pts_per_ray_fine_evaluation: 16
implicit_function_class_type: NeRFormerImplicitFunction
view_pooler_enabled: true
view_pooler_args:
feature_aggregator_class_type: IdentityFeatureAggregator
generic_model_args:
chunk_size_grid: 16000
raysampler_AdaptiveRaySampler_args:
n_rays_per_image_sampled_from_mask: 800
n_pts_per_ray_training: 32
n_pts_per_ray_evaluation: 32
renderer_MultiPassEmissionAbsorptionRenderer_args:
n_pts_per_ray_fine_training: 16
n_pts_per_ray_fine_evaluation: 16
implicit_function_class_type: NeRFormerImplicitFunction
view_pooler_enabled: true
view_pooler_args:
feature_aggregator_class_type: IdentityFeatureAggregator

View File

@@ -1,7 +1,6 @@
defaults:
- repro_multiseq_nerformer.yaml
- _self_
model_factory_ImplicitronModelFactory_args:
model_GenericModel_args:
view_pooler_args:
feature_aggregator_class_type: AngleWeightedIdentityFeatureAggregator
generic_model_args:
view_pooler_args:
feature_aggregator_class_type: AngleWeightedIdentityFeatureAggregator

View File

@@ -1,35 +1,34 @@
defaults:
- repro_multiseq_base.yaml
- _self_
model_factory_ImplicitronModelFactory_args:
model_GenericModel_args:
chunk_size_grid: 16000
view_pooler_enabled: false
n_train_target_views: -1
num_passes: 1
loss_weights:
loss_rgb_mse: 200.0
loss_prev_stage_rgb_mse: 0.0
loss_mask_bce: 1.0
loss_prev_stage_mask_bce: 0.0
loss_autodecoder_norm: 0.001
depth_neg_penalty: 10000.0
global_encoder_class_type: SequenceAutodecoder
global_encoder_SequenceAutodecoder_args:
autodecoder_args:
encoding_dim: 256
n_instances: 20000
raysampler_class_type: NearFarRaySampler
raysampler_NearFarRaySampler_args:
n_rays_per_image_sampled_from_mask: 2048
min_depth: 0.05
max_depth: 0.05
n_pts_per_ray_training: 1
n_pts_per_ray_evaluation: 1
stratified_point_sampling_training: false
stratified_point_sampling_evaluation: false
renderer_class_type: LSTMRenderer
implicit_function_class_type: SRNHyperNetImplicitFunction
optimizer_factory_ImplicitronOptimizerFactory_args:
breed: Adam
generic_model_args:
chunk_size_grid: 16000
view_pooler_enabled: false
n_train_target_views: -1
num_passes: 1
loss_weights:
loss_rgb_mse: 200.0
loss_prev_stage_rgb_mse: 0.0
loss_mask_bce: 1.0
loss_prev_stage_mask_bce: 0.0
loss_autodecoder_norm: 0.001
depth_neg_penalty: 10000.0
global_encoder_class_type: SequenceAutodecoder
global_encoder_SequenceAutodecoder_args:
autodecoder_args:
encoding_dim: 256
n_instances: 20000
raysampler_class_type: NearFarRaySampler
raysampler_NearFarRaySampler_args:
n_rays_per_image_sampled_from_mask: 2048
min_depth: 0.05
max_depth: 0.05
n_pts_per_ray_training: 1
n_pts_per_ray_evaluation: 1
stratified_point_sampling_training: false
stratified_point_sampling_evaluation: false
renderer_class_type: LSTMRenderer
implicit_function_class_type: SRNHyperNetImplicitFunction
solver_args:
breed: adam
lr: 5.0e-05

View File

@@ -1,11 +1,10 @@
defaults:
- repro_multiseq_srn_ad_hypernet.yaml
- _self_
model_factory_ImplicitronModelFactory_args:
model_GenericModel_args:
num_passes: 1
implicit_function_SRNHyperNetImplicitFunction_args:
pixel_generator_args:
n_harmonic_functions: 0
hypernet_args:
n_harmonic_functions: 0
generic_model_args:
num_passes: 1
implicit_function_SRNHyperNetImplicitFunction_args:
pixel_generator_args:
n_harmonic_functions: 0
hypernet_args:
n_harmonic_functions: 0

View File

@@ -2,30 +2,29 @@ defaults:
- repro_multiseq_base.yaml
- repro_feat_extractor_normed.yaml
- _self_
model_factory_ImplicitronModelFactory_args:
model_GenericModel_args:
chunk_size_grid: 32000
num_passes: 1
n_train_target_views: -1
loss_weights:
loss_rgb_mse: 200.0
loss_prev_stage_rgb_mse: 0.0
loss_mask_bce: 1.0
loss_prev_stage_mask_bce: 0.0
loss_autodecoder_norm: 0.0
depth_neg_penalty: 10000.0
raysampler_class_type: NearFarRaySampler
raysampler_NearFarRaySampler_args:
n_rays_per_image_sampled_from_mask: 2048
min_depth: 0.05
max_depth: 0.05
n_pts_per_ray_training: 1
n_pts_per_ray_evaluation: 1
stratified_point_sampling_training: false
stratified_point_sampling_evaluation: false
renderer_class_type: LSTMRenderer
implicit_function_class_type: SRNImplicitFunction
view_pooler_enabled: true
optimizer_factory_ImplicitronOptimizerFactory_args:
breed: Adam
generic_model_args:
chunk_size_grid: 32000
num_passes: 1
n_train_target_views: -1
loss_weights:
loss_rgb_mse: 200.0
loss_prev_stage_rgb_mse: 0.0
loss_mask_bce: 1.0
loss_prev_stage_mask_bce: 0.0
loss_autodecoder_norm: 0.0
depth_neg_penalty: 10000.0
raysampler_class_type: NearFarRaySampler
raysampler_NearFarRaySampler_args:
n_rays_per_image_sampled_from_mask: 2048
min_depth: 0.05
max_depth: 0.05
n_pts_per_ray_training: 1
n_pts_per_ray_evaluation: 1
stratified_point_sampling_training: false
stratified_point_sampling_evaluation: false
renderer_class_type: LSTMRenderer
implicit_function_class_type: SRNImplicitFunction
view_pooler_enabled: true
solver_args:
breed: adam
lr: 5.0e-05

View File

@@ -1,11 +1,10 @@
defaults:
- repro_multiseq_srn_wce.yaml
- _self_
model_factory_ImplicitronModelFactory_args:
model_GenericModel_args:
num_passes: 1
implicit_function_SRNImplicitFunction_args:
pixel_generator_args:
n_harmonic_functions: 0
raymarch_function_args:
n_harmonic_functions: 0
generic_model_args:
num_passes: 1
implicit_function_SRNImplicitFunction_args:
pixel_generator_args:
n_harmonic_functions: 0
raymarch_function_args:
n_harmonic_functions: 0

View File

@@ -1,7 +1,7 @@
defaults:
- repro_base
- _self_
data_source_ImplicitronDataSource_args:
data_source_args:
data_loader_map_provider_SequenceDataLoaderMapProvider_args:
batch_size: 1
dataset_length_train: 1000
@@ -12,30 +12,28 @@ data_source_ImplicitronDataSource_args:
n_frames_per_sequence: -1
test_restrict_sequence_id: 0
test_on_train: false
model_factory_ImplicitronModelFactory_args:
model_GenericModel_args:
render_image_height: 800
render_image_width: 800
log_vars:
- loss_rgb_psnr_fg
- loss_rgb_psnr
- loss_eikonal
- loss_prev_stage_rgb_psnr
- loss_mask_bce
- loss_prev_stage_mask_bce
- loss_rgb_mse
- loss_prev_stage_rgb_mse
- loss_depth_abs
- loss_depth_abs_fg
- loss_kl
- loss_mask_neg_iou
- objective
- epoch
- sec/it
optimizer_factory_ImplicitronOptimizerFactory_args:
generic_model_args:
render_image_height: 800
render_image_width: 800
log_vars:
- loss_rgb_psnr_fg
- loss_rgb_psnr
- loss_eikonal
- loss_prev_stage_rgb_psnr
- loss_mask_bce
- loss_prev_stage_mask_bce
- loss_rgb_mse
- loss_prev_stage_rgb_mse
- loss_depth_abs
- loss_depth_abs_fg
- loss_kl
- loss_mask_neg_iou
- objective
- epoch
- sec/it
solver_args:
lr: 0.0005
multistep_lr_milestones:
max_epochs: 400
milestones:
- 200
- 300
training_loop_ImplicitronTrainingLoop_args:
max_epochs: 400

View File

@@ -1,57 +1,57 @@
defaults:
- repro_singleseq_base
- _self_
model_factory_ImplicitronModelFactory_args:
model_GenericModel_args:
loss_weights:
loss_mask_bce: 100.0
loss_kl: 0.0
loss_rgb_mse: 1.0
loss_eikonal: 0.1
chunk_size_grid: 65536
num_passes: 1
view_pooler_enabled: false
implicit_function_IdrFeatureField_args:
n_harmonic_functions_xyz: 6
bias: 0.6
d_in: 3
d_out: 1
generic_model_args:
loss_weights:
loss_mask_bce: 100.0
loss_kl: 0.0
loss_rgb_mse: 1.0
loss_eikonal: 0.1
chunk_size_grid: 65536
num_passes: 1
view_pooler_enabled: false
implicit_function_IdrFeatureField_args:
n_harmonic_functions_xyz: 6
bias: 0.6
d_in: 3
d_out: 1
dims:
- 512
- 512
- 512
- 512
- 512
- 512
- 512
- 512
geometric_init: true
pooled_feature_dim: 0
skip_in:
- 6
weight_norm: true
renderer_SignedDistanceFunctionRenderer_args:
ray_tracer_args:
line_search_step: 0.5
line_step_iters: 3
n_secant_steps: 8
n_steps: 100
object_bounding_sphere: 8.0
sdf_threshold: 5.0e-05
ray_normal_coloring_network_args:
d_in: 9
d_out: 3
dims:
- 512
- 512
- 512
- 512
- 512
- 512
- 512
- 512
geometric_init: true
mode: idr
n_harmonic_functions_dir: 4
pooled_feature_dim: 0
skip_in:
- 6
weight_norm: true
renderer_SignedDistanceFunctionRenderer_args:
ray_tracer_args:
line_search_step: 0.5
line_step_iters: 3
n_secant_steps: 8
n_steps: 100
sdf_threshold: 5.0e-05
ray_normal_coloring_network_args:
d_in: 9
d_out: 3
dims:
- 512
- 512
- 512
- 512
mode: idr
n_harmonic_functions_dir: 4
pooled_feature_dim: 0
weight_norm: true
raysampler_AdaptiveRaySampler_args:
n_rays_per_image_sampled_from_mask: 1024
n_pts_per_ray_training: 0
n_pts_per_ray_evaluation: 0
renderer_class_type: SignedDistanceFunctionRenderer
implicit_function_class_type: IdrFeatureField
raysampler_AdaptiveRaySampler_args:
n_rays_per_image_sampled_from_mask: 1024
n_pts_per_ray_training: 0
n_pts_per_ray_evaluation: 0
renderer_class_type: SignedDistanceFunctionRenderer
implicit_function_class_type: IdrFeatureField

View File

@@ -1,48 +0,0 @@
defaults:
- repro_singleseq_base
- _self_
exp_dir: "./data/nerf_blender_repro/${oc.env:BLENDER_SINGLESEQ_CLASS}"
data_source_ImplicitronDataSource_args:
data_loader_map_provider_SequenceDataLoaderMapProvider_args:
dataset_length_train: 100
dataset_map_provider_class_type: BlenderDatasetMapProvider
dataset_map_provider_BlenderDatasetMapProvider_args:
base_dir: ${oc.env:BLENDER_DATASET_ROOT}
n_known_frames_for_test: null
object_name: ${oc.env:BLENDER_SINGLESEQ_CLASS}
path_manager_factory_class_type: PathManagerFactory
path_manager_factory_PathManagerFactory_args:
silence_logs: true
model_factory_ImplicitronModelFactory_args:
model_GenericModel_args:
mask_images: false
raysampler_class_type: NearFarRaySampler
raysampler_NearFarRaySampler_args:
n_rays_per_image_sampled_from_mask: 4096
min_depth: 2
max_depth: 6
renderer_MultiPassEmissionAbsorptionRenderer_args:
density_noise_std_train: 1.0
n_pts_per_ray_fine_training: 128
n_pts_per_ray_fine_evaluation: 128
raymarcher_EmissionAbsorptionRaymarcher_args:
blend_output: false
loss_weights:
loss_rgb_mse: 1.0
loss_prev_stage_rgb_mse: 1.0
loss_mask_bce: 0.0
loss_prev_stage_mask_bce: 0.0
loss_autodecoder_norm: 0.00
optimizer_factory_ImplicitronOptimizerFactory_args:
exponential_lr_step_size: 3001
lr_policy: LinearExponential
linear_exponential_lr_milestone: 200
training_loop_ImplicitronTrainingLoop_args:
max_epochs: 3201
metric_print_interval: 10
store_checkpoints_purge: 3
test_when_finished: true
validation_interval: 100

View File

@@ -2,9 +2,8 @@ defaults:
- repro_singleseq_wce_base.yaml
- repro_feat_extractor_unnormed.yaml
- _self_
model_factory_ImplicitronModelFactory_args:
model_GenericModel_args:
chunk_size_grid: 16000
view_pooler_enabled: true
raysampler_AdaptiveRaySampler_args:
n_rays_per_image_sampled_from_mask: 850
generic_model_args:
chunk_size_grid: 16000
view_pooler_enabled: true
raysampler_AdaptiveRaySampler_args:
n_rays_per_image_sampled_from_mask: 850

View File

@@ -2,17 +2,16 @@ defaults:
- repro_singleseq_wce_base.yaml
- repro_feat_extractor_transformer.yaml
- _self_
model_factory_ImplicitronModelFactory_args:
model_GenericModel_args:
chunk_size_grid: 16000
view_pooler_enabled: true
implicit_function_class_type: NeRFormerImplicitFunction
raysampler_AdaptiveRaySampler_args:
n_rays_per_image_sampled_from_mask: 800
n_pts_per_ray_training: 32
n_pts_per_ray_evaluation: 32
renderer_MultiPassEmissionAbsorptionRenderer_args:
n_pts_per_ray_fine_training: 16
n_pts_per_ray_fine_evaluation: 16
view_pooler_args:
feature_aggregator_class_type: IdentityFeatureAggregator
generic_model_args:
chunk_size_grid: 16000
view_pooler_enabled: true
implicit_function_class_type: NeRFormerImplicitFunction
raysampler_AdaptiveRaySampler_args:
n_rays_per_image_sampled_from_mask: 800
n_pts_per_ray_training: 32
n_pts_per_ray_evaluation: 32
renderer_MultiPassEmissionAbsorptionRenderer_args:
n_pts_per_ray_fine_training: 16
n_pts_per_ray_fine_evaluation: 16
view_pooler_args:
feature_aggregator_class_type: IdentityFeatureAggregator

View File

@@ -1,29 +1,28 @@
defaults:
- repro_singleseq_base.yaml
- _self_
model_factory_ImplicitronModelFactory_args:
model_GenericModel_args:
num_passes: 1
chunk_size_grid: 32000
view_pooler_enabled: false
loss_weights:
loss_rgb_mse: 200.0
loss_prev_stage_rgb_mse: 0.0
loss_mask_bce: 1.0
loss_prev_stage_mask_bce: 0.0
loss_autodecoder_norm: 0.0
depth_neg_penalty: 10000.0
raysampler_class_type: NearFarRaySampler
raysampler_NearFarRaySampler_args:
n_rays_per_image_sampled_from_mask: 2048
min_depth: 0.05
max_depth: 0.05
n_pts_per_ray_training: 1
n_pts_per_ray_evaluation: 1
stratified_point_sampling_training: false
stratified_point_sampling_evaluation: false
renderer_class_type: LSTMRenderer
implicit_function_class_type: SRNImplicitFunction
optimizer_factory_ImplicitronOptimizerFactory_args:
breed: Adam
generic_model_args:
num_passes: 1
chunk_size_grid: 32000
view_pooler_enabled: false
loss_weights:
loss_rgb_mse: 200.0
loss_prev_stage_rgb_mse: 0.0
loss_mask_bce: 1.0
loss_prev_stage_mask_bce: 0.0
loss_autodecoder_norm: 0.0
depth_neg_penalty: 10000.0
raysampler_class_type: NearFarRaySampler
raysampler_NearFarRaySampler_args:
n_rays_per_image_sampled_from_mask: 2048
min_depth: 0.05
max_depth: 0.05
n_pts_per_ray_training: 1
n_pts_per_ray_evaluation: 1
stratified_point_sampling_training: false
stratified_point_sampling_evaluation: false
renderer_class_type: LSTMRenderer
implicit_function_class_type: SRNImplicitFunction
solver_args:
breed: adam
lr: 5.0e-05

View File

@@ -1,11 +1,10 @@
defaults:
- repro_singleseq_srn.yaml
- _self_
model_factory_ImplicitronModelFactory_args:
model_GenericModel_args:
num_passes: 1
implicit_function_SRNImplicitFunction_args:
pixel_generator_args:
n_harmonic_functions: 0
raymarch_function_args:
n_harmonic_functions: 0
generic_model_args:
num_passes: 1
implicit_function_SRNImplicitFunction_args:
pixel_generator_args:
n_harmonic_functions: 0
raymarch_function_args:
n_harmonic_functions: 0

View File

@@ -2,29 +2,28 @@ defaults:
- repro_singleseq_wce_base
- repro_feat_extractor_normed.yaml
- _self_
model_factory_ImplicitronModelFactory_args:
model_GenericModel_args:
num_passes: 1
chunk_size_grid: 32000
view_pooler_enabled: true
loss_weights:
loss_rgb_mse: 200.0
loss_prev_stage_rgb_mse: 0.0
loss_mask_bce: 1.0
loss_prev_stage_mask_bce: 0.0
loss_autodecoder_norm: 0.0
depth_neg_penalty: 10000.0
raysampler_class_type: NearFarRaySampler
raysampler_NearFarRaySampler_args:
n_rays_per_image_sampled_from_mask: 2048
min_depth: 0.05
max_depth: 0.05
n_pts_per_ray_training: 1
n_pts_per_ray_evaluation: 1
stratified_point_sampling_training: false
stratified_point_sampling_evaluation: false
renderer_class_type: LSTMRenderer
implicit_function_class_type: SRNImplicitFunction
optimizer_factory_ImplicitronOptimizerFactory_args:
breed: Adam
generic_model_args:
num_passes: 1
chunk_size_grid: 32000
view_pooler_enabled: true
loss_weights:
loss_rgb_mse: 200.0
loss_prev_stage_rgb_mse: 0.0
loss_mask_bce: 1.0
loss_prev_stage_mask_bce: 0.0
loss_autodecoder_norm: 0.0
depth_neg_penalty: 10000.0
raysampler_class_type: NearFarRaySampler
raysampler_NearFarRaySampler_args:
n_rays_per_image_sampled_from_mask: 2048
min_depth: 0.05
max_depth: 0.05
n_pts_per_ray_training: 1
n_pts_per_ray_evaluation: 1
stratified_point_sampling_training: false
stratified_point_sampling_evaluation: false
renderer_class_type: LSTMRenderer
implicit_function_class_type: SRNImplicitFunction
solver_args:
breed: adam
lr: 5.0e-05

View File

@@ -1,11 +1,10 @@
defaults:
- repro_singleseq_srn_wce.yaml
- _self_
model_factory_ImplicitronModelFactory_args:
model_GenericModel_args:
num_passes: 1
implicit_function_SRNImplicitFunction_args:
pixel_generator_args:
n_harmonic_functions: 0
raymarch_function_args:
n_harmonic_functions: 0
generic_model_args:
num_passes: 1
implicit_function_SRNImplicitFunction_args:
pixel_generator_args:
n_harmonic_functions: 0
raymarch_function_args:
n_harmonic_functions: 0

View File

@@ -1,7 +1,7 @@
defaults:
- repro_singleseq_base
- _self_
data_source_ImplicitronDataSource_args:
data_source_args:
data_loader_map_provider_SequenceDataLoaderMapProvider_args:
batch_size: 10
dataset_length_train: 1000

View File

@@ -8,28 +8,27 @@
""""
This file is the entry point for launching experiments with Implicitron.
Main functions
---------------
- `run_training` is the wrapper for the train, val, test loops
and checkpointing
- `trainvalidate` is the inner loop which runs the model forward/backward
pass, visualizations and metric printing
Launch Training
---------------
Experiment config .yaml files are located in the
`projects/implicitron_trainer/configs` folder. To launch an experiment,
specify the name of the file. Specific config values can also be overridden
from the command line, for example:
`projects/implicitron_trainer/configs` folder. To launch
an experiment, specify the name of the file. Specific config values can
also be overridden from the command line, for example:
```
./experiment.py --config-name base_config.yaml override.param.one=42 override.param.two=84
```
To run an experiment on a specific GPU, specify the `gpu_idx` key in the
config file / CLI. To run on a different device, specify the device in
`run_training`.
Main functions
---------------
- The Experiment class defines `run` which creates the model, optimizer, and other
objects used in training, then starts TrainingLoop's `run` function.
- TrainingLoop takes care of the actual training logic: forward and backward passes,
evaluation and testing, as well as model checkpointing, visualization, and metric
printing.
To run an experiment on a specific GPU, specify the `gpu_idx` key
in the config file / CLI. To run on a different device, specify the
device in `run_training`.
Outputs
--------
@@ -46,40 +45,43 @@ The outputs of the experiment are saved and logged in multiple ways:
config file.
"""
import copy
import json
import logging
import os
import random
import time
import warnings
from dataclasses import field
from typing import Any, Dict, Optional, Tuple
import hydra
import lpips
import numpy as np
import torch
import tqdm
from accelerate import Accelerator
from omegaconf import DictConfig, OmegaConf
from packaging import version
from pytorch3d.implicitron.dataset.data_source import (
DataSourceBase,
ImplicitronDataSource,
)
from pytorch3d.implicitron.models.generic_model import ImplicitronModelBase
from pytorch3d.implicitron.dataset import utils as ds_utils
from pytorch3d.implicitron.dataset.data_loader_map_provider import DataLoaderMap
from pytorch3d.implicitron.dataset.data_source import ImplicitronDataSource, Task
from pytorch3d.implicitron.dataset.dataset_map_provider import DatasetMap
from pytorch3d.implicitron.evaluation import evaluate_new_view_synthesis as evaluate
from pytorch3d.implicitron.models.generic_model import EvaluationMode, GenericModel
from pytorch3d.implicitron.models.renderer.multipass_ea import (
MultiPassEmissionAbsorptionRenderer,
)
from pytorch3d.implicitron.models.renderer.ray_sampler import AdaptiveRaySampler
from pytorch3d.implicitron.tools import model_io, vis_utils
from pytorch3d.implicitron.tools.config import (
Configurable,
expand_args_fields,
remove_unused_components,
run_auto_creation,
)
from pytorch3d.implicitron.tools.stats import Stats
from pytorch3d.renderer.cameras import CamerasBase
from .impl.model_factory import ModelFactoryBase
from .impl.optimizer_factory import OptimizerFactoryBase
from .impl.training_loop import TrainingLoopBase
from .impl.utils import seed_all_random_engines
from .impl.experiment_config import ExperimentConfig
from .impl.optimization import init_optimizer
logger = logging.getLogger(__name__)
@@ -98,132 +100,551 @@ except ModuleNotFoundError:
no_accelerate = os.environ.get("PYTORCH3D_NO_ACCELERATE") is not None
class Experiment(Configurable): # pyre-ignore: 13
def init_model(
*,
cfg: DictConfig,
accelerator: Optional[Accelerator] = None,
force_load: bool = False,
clear_stats: bool = False,
load_model_only: bool = False,
) -> Tuple[GenericModel, Stats, Optional[Dict[str, Any]]]:
"""
This class is at the top level of Implicitron's config hierarchy. Its
members are high-level components necessary for training an implicit rende-
ring network.
Returns an instance of `GenericModel`.
Members:
data_source: An object that produces datasets and dataloaders.
model_factory: An object that produces an implicit rendering model as
well as its corresponding Stats object.
optimizer_factory: An object that produces the optimizer and lr
scheduler.
training_loop: An object that runs training given the outputs produced
by the data_source, model_factory and optimizer_factory.
seed: A random seed to ensure reproducibility.
detect_anomaly: Whether torch.autograd should detect anomalies. Useful
for debugging, but might slow down the training.
exp_dir: Root experimentation directory. Checkpoints and training stats
will be saved here.
If `cfg.resume` is set or `force_load` is true,
attempts to load the last checkpoint from `cfg.exp_dir`. Failure to do so
will return the model with initial weights, unless `force_load` is passed,
in which case a FileNotFoundError is raised.
Args:
force_load: If true, force load model from checkpoint even if
cfg.resume is false.
clear_stats: If true, clear the stats object loaded from checkpoint
load_model_only: If true, load only the model weights from checkpoint
and do not load the state of the optimizer and stats.
Returns:
model: The model with optionally loaded weights from checkpoint
stats: The stats structure (optionally loaded from checkpoint)
optimizer_state: The optimizer state dict containing
`state` and `param_groups` keys (optionally loaded from checkpoint)
Raise:
FileNotFoundError if `force_load` is passed but checkpoint is not found.
"""
data_source: DataSourceBase
data_source_class_type: str = "ImplicitronDataSource"
model_factory: ModelFactoryBase
model_factory_class_type: str = "ImplicitronModelFactory"
optimizer_factory: OptimizerFactoryBase
optimizer_factory_class_type: str = "ImplicitronOptimizerFactory"
training_loop: TrainingLoopBase
training_loop_class_type: str = "ImplicitronTrainingLoop"
# Initialize the model
if cfg.architecture == "generic":
model = GenericModel(**cfg.generic_model_args)
else:
raise ValueError(f"No such arch {cfg.architecture}.")
seed: int = 42
detect_anomaly: bool = False
exp_dir: str = "./data/default_experiment/"
# Determine the network outputs that should be logged
if hasattr(model, "log_vars"):
log_vars = copy.deepcopy(list(model.log_vars))
else:
log_vars = ["objective"]
hydra: dict = field(
default_factory=lambda: {
"run": {"dir": "."}, # Make hydra not change the working dir.
"output_subdir": None, # disable storing the .hydra logs
}
visdom_env_charts = vis_utils.get_visdom_env(cfg) + "_charts"
# Init the stats struct
stats = Stats(
log_vars,
visdom_env=visdom_env_charts,
verbose=False,
visdom_server=cfg.visdom_server,
visdom_port=cfg.visdom_port,
)
def __post_init__(self):
seed_all_random_engines(
self.seed
) # Set all random engine seeds for reproducibility
# Retrieve the last checkpoint
if cfg.resume_epoch > 0:
model_path = model_io.get_checkpoint(cfg.exp_dir, cfg.resume_epoch)
else:
model_path = model_io.find_last_checkpoint(cfg.exp_dir)
run_auto_creation(self)
optimizer_state = None
if model_path is not None:
logger.info("found previous model %s" % model_path)
if force_load or cfg.resume:
logger.info(" -> resuming")
def run(self) -> None:
# Initialize the accelerator if desired.
if no_accelerate:
accelerator = None
device = torch.device("cuda:0")
map_location = None
if accelerator is not None and not accelerator.is_local_main_process:
map_location = {
"cuda:%d" % 0: "cuda:%d" % accelerator.local_process_index
}
if load_model_only:
model_state_dict = torch.load(
model_io.get_model_path(model_path), map_location=map_location
)
stats_load, optimizer_state = None, None
else:
model_state_dict, stats_load, optimizer_state = model_io.load_model(
model_path, map_location=map_location
)
# Determine if stats should be reset
if not clear_stats:
if stats_load is None:
logger.info("\n\n\n\nCORRUPT STATS -> clearing stats\n\n\n\n")
last_epoch = model_io.parse_epoch_from_model_path(model_path)
logger.info(f"Estimated resume epoch = {last_epoch}")
# Reset the stats struct
for _ in range(last_epoch + 1):
stats.new_epoch()
assert last_epoch == stats.epoch
else:
stats = stats_load
# Update stats properties incase it was reset on load
stats.visdom_env = visdom_env_charts
stats.visdom_server = cfg.visdom_server
stats.visdom_port = cfg.visdom_port
stats.plot_file = os.path.join(cfg.exp_dir, "train_stats.pdf")
stats.synchronize_logged_vars(log_vars)
else:
logger.info(" -> clearing stats")
try:
# TODO: fix on creation of the buffers
# after the hack above, this will not pass in most cases
# ... but this is fine for now
model.load_state_dict(model_state_dict, strict=True)
except RuntimeError as e:
logger.error(e)
logger.info("Cant load state dict in strict mode! -> trying non-strict")
model.load_state_dict(model_state_dict, strict=False)
model.log_vars = log_vars
else:
accelerator = Accelerator(device_placement=False)
logger.info(accelerator.state)
device = accelerator.device
logger.info(" -> but not resuming -> starting from scratch")
elif force_load:
raise FileNotFoundError(f"Cannot find a checkpoint in {cfg.exp_dir}!")
logger.info(f"Running experiment on device: {device}")
os.makedirs(self.exp_dir, exist_ok=True)
return model, stats, optimizer_state
# set the debug mode
if self.detect_anomaly:
logger.info("Anomaly detection!")
torch.autograd.set_detect_anomaly(self.detect_anomaly)
# Initialize the datasets and dataloaders.
datasets, dataloaders = self.data_source.get_datasets_and_dataloaders()
def trainvalidate(
model,
stats,
epoch,
loader,
optimizer,
validation: bool,
*,
accelerator: Optional[Accelerator],
device: torch.device,
bp_var: str = "objective",
metric_print_interval: int = 5,
visualize_interval: int = 100,
visdom_env_root: str = "trainvalidate",
clip_grad: float = 0.0,
**kwargs,
) -> None:
"""
This is the main loop for training and evaluation including:
model forward pass, loss computation, backward pass and visualization.
# Init the model and the corresponding Stats object.
model = self.model_factory(
accelerator=accelerator,
exp_dir=self.exp_dir,
)
Args:
model: The model module optionally loaded from checkpoint
stats: The stats struct, also optionally loaded from checkpoint
epoch: The index of the current epoch
loader: The dataloader to use for the loop
optimizer: The optimizer module optionally loaded from checkpoint
validation: If true, run the loop with the model in eval mode
and skip the backward pass
bp_var: The name of the key in the model output `preds` dict which
should be used as the loss for the backward pass.
metric_print_interval: The batch interval at which the stats should be
logged.
visualize_interval: The batch interval at which the visualizations
should be plotted
visdom_env_root: The name of the visdom environment to use for plotting
clip_grad: Optionally clip the gradient norms.
If set to a value <=0.0, no clipping
device: The device on which to run the model.
stats = self.training_loop.load_stats(
log_vars=model.log_vars,
exp_dir=self.exp_dir,
resume=self.model_factory.resume,
resume_epoch=self.model_factory.resume_epoch, # pyre-ignore [16]
)
start_epoch = stats.epoch + 1
Returns:
None
"""
model.to(device)
if validation:
model.eval()
trainmode = "val"
else:
model.train()
trainmode = "train"
# Init the optimizer and LR scheduler.
optimizer, scheduler = self.optimizer_factory(
accelerator=accelerator,
exp_dir=self.exp_dir,
last_epoch=start_epoch,
model=model,
resume=self.model_factory.resume,
resume_epoch=self.model_factory.resume_epoch,
)
t_start = time.time()
# Wrap all modules in the distributed library
# Note: we don't pass the scheduler to prepare as it
# doesn't need to be stepped at each optimizer step
train_loader = dataloaders.train
val_loader = dataloaders.val
test_loader = dataloaders.test
if accelerator is not None:
(
model,
optimizer,
train_loader,
val_loader,
) = accelerator.prepare(model, optimizer, train_loader, val_loader)
# get the visdom env name
visdom_env_imgs = visdom_env_root + "_images_" + trainmode
viz = vis_utils.get_visdom_connection(
server=stats.visdom_server,
port=stats.visdom_port,
)
all_train_cameras = self.data_source.all_train_cameras
# Iterate through the batches
n_batches = len(loader)
for it, net_input in enumerate(loader):
last_iter = it == n_batches - 1
# Enter the main training loop.
self.training_loop.run(
train_loader=train_loader,
val_loader=val_loader,
test_loader=test_loader,
model=model,
optimizer=optimizer,
scheduler=scheduler,
all_train_cameras=all_train_cameras,
accelerator=accelerator,
# move to gpu where possible (in place)
net_input = net_input.to(device)
# run the forward pass
if not validation:
optimizer.zero_grad()
preds = model(**{**net_input, "evaluation_mode": EvaluationMode.TRAINING})
else:
with torch.no_grad():
preds = model(
**{**net_input, "evaluation_mode": EvaluationMode.EVALUATION}
)
# make sure we dont overwrite something
assert all(k not in preds for k in net_input.keys())
# merge everything into one big dict
preds.update(net_input)
# update the stats logger
stats.update(preds, time_start=t_start, stat_set=trainmode)
assert stats.it[trainmode] == it, "inconsistent stat iteration number!"
# print textual status update
if it % metric_print_interval == 0 or last_iter:
stats.print(stat_set=trainmode, max_it=n_batches)
# visualize results
if (
(accelerator is None or accelerator.is_local_main_process)
and visualize_interval > 0
and it % visualize_interval == 0
):
prefix = f"e{stats.epoch}_it{stats.it[trainmode]}"
model.visualize(
viz,
visdom_env_imgs,
preds,
prefix,
)
# optimizer step
if not validation:
loss = preds[bp_var]
assert torch.isfinite(loss).all(), "Non-finite loss!"
# backprop
if accelerator is None:
loss.backward()
else:
accelerator.backward(loss)
if clip_grad > 0.0:
# Optionally clip the gradient norms.
total_norm = torch.nn.utils.clip_grad_norm(
model.parameters(), clip_grad
)
if total_norm > clip_grad:
logger.info(
f"Clipping gradient: {total_norm}"
+ f" with coef {clip_grad / float(total_norm)}."
)
optimizer.step()
def run_training(cfg: DictConfig) -> None:
"""
Entry point to run the training and validation loops
based on the specified config file.
"""
# Initialize the accelerator
if no_accelerate:
accelerator = None
device = torch.device("cuda:0")
else:
accelerator = Accelerator(device_placement=False)
logger.info(accelerator.state)
device = accelerator.device
logger.info(f"Running experiment on device: {device}")
# set the debug mode
if cfg.detect_anomaly:
logger.info("Anomaly detection!")
torch.autograd.set_detect_anomaly(cfg.detect_anomaly)
# create the output folder
os.makedirs(cfg.exp_dir, exist_ok=True)
_seed_all_random_engines(cfg.seed)
remove_unused_components(cfg)
# dump the exp config to the exp dir
try:
cfg_filename = os.path.join(cfg.exp_dir, "expconfig.yaml")
OmegaConf.save(config=cfg, f=cfg_filename)
except PermissionError:
warnings.warn("Cant dump config due to insufficient permissions!")
# setup datasets
datasource = ImplicitronDataSource(**cfg.data_source_args)
datasets, dataloaders = datasource.get_datasets_and_dataloaders()
task = datasource.get_task()
# init the model
model, stats, optimizer_state = init_model(cfg=cfg, accelerator=accelerator)
start_epoch = stats.epoch + 1
# move model to gpu
model.to(device)
# only run evaluation on the test dataloader
if cfg.eval_only:
_eval_and_dump(
cfg,
task,
datasource.all_train_cameras,
datasets,
dataloaders,
model,
stats,
device=device,
exp_dir=self.exp_dir,
stats=stats,
seed=self.seed,
)
return
# init the optimizer
optimizer, scheduler = init_optimizer(
model,
optimizer_state=optimizer_state,
last_epoch=start_epoch,
**cfg.solver_args,
)
# check the scheduler and stats have been initialized correctly
assert scheduler.last_epoch == stats.epoch + 1
assert scheduler.last_epoch == start_epoch
# Wrap all modules in the distributed library
# Note: we don't pass the scheduler to prepare as it
# doesn't need to be stepped at each optimizer step
train_loader = dataloaders.train
val_loader = dataloaders.val
if accelerator is not None:
(
model,
optimizer,
train_loader,
val_loader,
) = accelerator.prepare(model, optimizer, train_loader, val_loader)
past_scheduler_lrs = []
# loop through epochs
for epoch in range(start_epoch, cfg.solver_args.max_epochs):
# automatic new_epoch and plotting of stats at every epoch start
with stats:
# Make sure to re-seed random generators to ensure reproducibility
# even after restart.
_seed_all_random_engines(cfg.seed + epoch)
cur_lr = float(scheduler.get_last_lr()[-1])
logger.info(f"scheduler lr = {cur_lr:1.2e}")
past_scheduler_lrs.append(cur_lr)
# train loop
trainvalidate(
model,
stats,
epoch,
train_loader,
optimizer,
False,
visdom_env_root=vis_utils.get_visdom_env(cfg),
device=device,
accelerator=accelerator,
**cfg,
)
# val loop (optional)
if val_loader is not None and epoch % cfg.validation_interval == 0:
trainvalidate(
model,
stats,
epoch,
val_loader,
optimizer,
True,
visdom_env_root=vis_utils.get_visdom_env(cfg),
device=device,
accelerator=accelerator,
**cfg,
)
# eval loop (optional)
if (
dataloaders.test is not None
and cfg.test_interval > 0
and epoch % cfg.test_interval == 0
):
_run_eval(
model,
datasource.all_train_cameras,
dataloaders.test,
task,
camera_difficulty_bin_breaks=cfg.camera_difficulty_bin_breaks,
device=device,
)
assert stats.epoch == epoch, "inconsistent stats!"
# delete previous models if required
# save model only on the main process
if cfg.store_checkpoints and (
accelerator is None or accelerator.is_local_main_process
):
if cfg.store_checkpoints_purge > 0:
for prev_epoch in range(epoch - cfg.store_checkpoints_purge):
model_io.purge_epoch(cfg.exp_dir, prev_epoch)
outfile = model_io.get_checkpoint(cfg.exp_dir, epoch)
unwrapped_model = (
model if accelerator is None else accelerator.unwrap_model(model)
)
model_io.safe_save_model(
unwrapped_model, stats, outfile, optimizer=optimizer
)
scheduler.step()
new_lr = float(scheduler.get_last_lr()[-1])
if new_lr != cur_lr:
logger.info(f"LR change! {cur_lr} -> {new_lr}")
if cfg.test_when_finished:
_eval_and_dump(
cfg,
task,
datasource.all_train_cameras,
datasets,
dataloaders,
model,
stats,
device=device,
)
def _eval_and_dump(
cfg,
task: Task,
all_train_cameras: Optional[CamerasBase],
datasets: DatasetMap,
dataloaders: DataLoaderMap,
model,
stats,
device,
) -> None:
"""
Run the evaluation loop with the test data loader and
save the predictions to the `exp_dir`.
"""
dataloader = dataloaders.test
if dataloader is None:
raise ValueError('DataLoaderMap have to contain the "test" entry for eval!')
results = _run_eval(
model,
all_train_cameras,
dataloader,
task,
camera_difficulty_bin_breaks=cfg.camera_difficulty_bin_breaks,
device=device,
)
# add the evaluation epoch to the results
for r in results:
r["eval_epoch"] = int(stats.epoch)
logger.info("Evaluation results")
evaluate.pretty_print_nvs_metrics(results)
with open(os.path.join(cfg.exp_dir, "results_test.json"), "w") as f:
json.dump(results, f)
def _get_eval_frame_data(frame_data):
"""
Masks the unknown image data to make sure we cannot use it at model evaluation time.
"""
frame_data_for_eval = copy.deepcopy(frame_data)
is_known = ds_utils.is_known_frame(frame_data.frame_type).type_as(
frame_data.image_rgb
)[:, None, None, None]
for k in ("image_rgb", "depth_map", "fg_probability", "mask_crop"):
value_masked = getattr(frame_data_for_eval, k).clone() * is_known
setattr(frame_data_for_eval, k, value_masked)
return frame_data_for_eval
def _run_eval(
model,
all_train_cameras,
loader,
task: Task,
camera_difficulty_bin_breaks: Tuple[float, float],
device,
):
"""
Run the evaluation loop on the test dataloader
"""
lpips_model = lpips.LPIPS(net="vgg")
lpips_model = lpips_model.to(device)
model.eval()
per_batch_eval_results = []
logger.info("Evaluating model ...")
for frame_data in tqdm.tqdm(loader):
frame_data = frame_data.to(device)
# mask out the unknown images so that the model does not see them
frame_data_for_eval = _get_eval_frame_data(frame_data)
with torch.no_grad():
preds = model(
**{**frame_data_for_eval, "evaluation_mode": EvaluationMode.EVALUATION}
)
# TODO: Cannot use accelerate gather for two reasons:.
# (1) TypeError: Can't apply _gpu_gather_one on object of type
# <class 'pytorch3d.implicitron.models.base_model.ImplicitronRender'>,
# only of nested list/tuple/dicts of objects that satisfy is_torch_tensor.
# (2) Same error above but for frame_data which contains Cameras.
implicitron_render = copy.deepcopy(preds["implicitron_render"])
per_batch_eval_results.append(
evaluate.eval_batch(
frame_data,
implicitron_render,
bg_color="black",
lpips_model=lpips_model,
source_cameras=all_train_cameras,
)
)
_, category_result = evaluate.summarize_nvs_eval_results(
per_batch_eval_results, task, camera_difficulty_bin_breaks
)
return category_result["results"]
def _seed_all_random_engines(seed: int) -> None:
np.random.seed(seed)
torch.manual_seed(seed)
random.seed(seed)
def _setup_envvars_for_cluster() -> bool:
@@ -257,20 +678,9 @@ def _setup_envvars_for_cluster() -> bool:
return True
def dump_cfg(cfg: DictConfig) -> None:
remove_unused_components(cfg)
# dump the exp config to the exp dir
os.makedirs(cfg.exp_dir, exist_ok=True)
try:
cfg_filename = os.path.join(cfg.exp_dir, "expconfig.yaml")
OmegaConf.save(config=cfg, f=cfg_filename)
except PermissionError:
warnings.warn("Can't dump config due to insufficient permissions!")
expand_args_fields(Experiment)
expand_args_fields(ExperimentConfig)
cs = hydra.core.config_store.ConfigStore.instance()
cs.store(name="default_config", node=Experiment)
cs.store(name="default_config", node=ExperimentConfig)
@hydra.main(config_path="./configs/", config_name="default_config")
@@ -284,14 +694,12 @@ def experiment(cfg: DictConfig) -> None:
logger.info("Running locally")
# TODO: The following may be needed for hydra/submitit it to work
expand_args_fields(ImplicitronModelBase)
expand_args_fields(GenericModel)
expand_args_fields(AdaptiveRaySampler)
expand_args_fields(MultiPassEmissionAbsorptionRenderer)
expand_args_fields(ImplicitronDataSource)
experiment = Experiment(**cfg)
dump_cfg(cfg)
experiment.run()
run_training(cfg)
if __name__ == "__main__":

View File

@@ -0,0 +1,49 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
from dataclasses import field
from typing import Any, Dict, Tuple
from omegaconf import DictConfig
from pytorch3d.implicitron.dataset.data_source import ImplicitronDataSource
from pytorch3d.implicitron.models.generic_model import GenericModel
from pytorch3d.implicitron.tools.config import Configurable, get_default_args_field
from .optimization import init_optimizer
class ExperimentConfig(Configurable):
generic_model_args: DictConfig = get_default_args_field(GenericModel)
solver_args: DictConfig = get_default_args_field(init_optimizer)
data_source_args: DictConfig = get_default_args_field(ImplicitronDataSource)
architecture: str = "generic"
detect_anomaly: bool = False
eval_only: bool = False
exp_dir: str = "./data/default_experiment/"
exp_idx: int = 0
gpu_idx: int = 0
metric_print_interval: int = 5
resume: bool = True
resume_epoch: int = -1
seed: int = 0
store_checkpoints: bool = True
store_checkpoints_purge: int = 1
test_interval: int = -1
test_when_finished: bool = False
validation_interval: int = 1
visdom_env: str = ""
visdom_port: int = 8097
visdom_server: str = "http://127.0.0.1"
visualize_interval: int = 1000
clip_grad: float = 0.0
camera_difficulty_bin_breaks: Tuple[float, ...] = 0.97, 0.98
hydra: Dict[str, Any] = field(
default_factory=lambda: {
"run": {"dir": "."}, # Make hydra not change the working dir.
"output_subdir": None, # disable storing the .hydra logs
}
)

View File

@@ -1,133 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
import logging
import os
from typing import Optional
import torch.optim
from accelerate import Accelerator
from pytorch3d.implicitron.models.base_model import ImplicitronModelBase
from pytorch3d.implicitron.tools import model_io
from pytorch3d.implicitron.tools.config import (
registry,
ReplaceableBase,
run_auto_creation,
)
from pytorch3d.implicitron.tools.stats import Stats
logger = logging.getLogger(__name__)
class ModelFactoryBase(ReplaceableBase):
resume: bool = True # resume from the last checkpoint
def __call__(self, **kwargs) -> ImplicitronModelBase:
"""
Initialize the model (possibly from a previously saved state).
Returns: An instance of ImplicitronModelBase.
"""
raise NotImplementedError()
def load_stats(self, **kwargs) -> Stats:
"""
Initialize or load a Stats object.
"""
raise NotImplementedError()
@registry.register
class ImplicitronModelFactory(ModelFactoryBase): # pyre-ignore [13]
"""
A factory class that initializes an implicit rendering model.
Members:
model: An ImplicitronModelBase object.
resume: If True, attempt to load the last checkpoint from `exp_dir`
passed to __call__. Failure to do so will return a model with ini-
tial weights unless `force_resume` is True.
resume_epoch: If `resume` is True: Resume a model at this epoch, or if
`resume_epoch` <= 0, then resume from the latest checkpoint.
force_resume: If True, throw a FileNotFoundError if `resume` is True but
a model checkpoint cannot be found.
"""
model: ImplicitronModelBase
model_class_type: str = "GenericModel"
resume: bool = True
resume_epoch: int = -1
force_resume: bool = False
def __post_init__(self):
run_auto_creation(self)
def __call__(
self,
exp_dir: str,
accelerator: Optional[Accelerator] = None,
) -> ImplicitronModelBase:
"""
Returns an instance of `ImplicitronModelBase`, possibly loaded from a
checkpoint (if self.resume, self.resume_epoch specify so).
Args:
exp_dir: Root experiment directory.
accelerator: An Accelerator object.
Returns:
model: The model with optionally loaded weights from checkpoint
Raise:
FileNotFoundError if `force_resume` is True but checkpoint not found.
"""
# Determine the network outputs that should be logged
if hasattr(self.model, "log_vars"):
log_vars = list(self.model.log_vars)
else:
log_vars = ["objective"]
if self.resume_epoch > 0:
# Resume from a certain epoch
model_path = model_io.get_checkpoint(exp_dir, self.resume_epoch)
if not os.path.isfile(model_path):
raise ValueError(f"Cannot find model from epoch {self.resume_epoch}.")
else:
# Retrieve the last checkpoint
model_path = model_io.find_last_checkpoint(exp_dir)
if model_path is not None:
logger.info(f"Found previous model {model_path}")
if self.force_resume or self.resume:
logger.info("Resuming.")
map_location = None
if accelerator is not None and not accelerator.is_local_main_process:
map_location = {
"cuda:%d" % 0: "cuda:%d" % accelerator.local_process_index
}
model_state_dict = torch.load(
model_io.get_model_path(model_path), map_location=map_location
)
try:
self.model.load_state_dict(model_state_dict, strict=True)
except RuntimeError as e:
logger.error(e)
logger.info(
"Cannot load state dict in strict mode! -> trying non-strict"
)
self.model.load_state_dict(model_state_dict, strict=False)
self.model.log_vars = log_vars
else:
logger.info("Not resuming -> starting from scratch.")
elif self.force_resume:
raise FileNotFoundError(f"Cannot find a checkpoint in {exp_dir}!")
return self.model

View File

@@ -0,0 +1,109 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
import logging
from typing import Any, Dict, Optional, Tuple
import torch
from pytorch3d.implicitron.models.generic_model import GenericModel
from pytorch3d.implicitron.tools.config import enable_get_default_args
logger = logging.getLogger(__name__)
def init_optimizer(
model: GenericModel,
optimizer_state: Optional[Dict[str, Any]],
last_epoch: int,
breed: str = "adam",
weight_decay: float = 0.0,
lr_policy: str = "multistep",
lr: float = 0.0005,
gamma: float = 0.1,
momentum: float = 0.9,
betas: Tuple[float, ...] = (0.9, 0.999),
milestones: Tuple[int, ...] = (),
max_epochs: int = 1000,
):
"""
Initialize the optimizer (optionally from checkpoint state)
and the learning rate scheduler.
Args:
model: The model with optionally loaded weights
optimizer_state: The state dict for the optimizer. If None
it has not been loaded from checkpoint
last_epoch: If the model was loaded from checkpoint this will be the
number of the last epoch that was saved
breed: The type of optimizer to use e.g. adam
weight_decay: The optimizer weight_decay (L2 penalty on model weights)
lr_policy: The policy to use for learning rate. Currently, only "multistep:
is supported.
lr: The value for the initial learning rate
gamma: Multiplicative factor of learning rate decay
momentum: Momentum factor for SGD optimizer
betas: Coefficients used for computing running averages of gradient and its square
in the Adam optimizer
milestones: List of increasing epoch indices at which the learning rate is
modified
max_epochs: The maximum number of epochs to run the optimizer for
Returns:
optimizer: Optimizer module, optionally loaded from checkpoint
scheduler: Learning rate scheduler module
Raise:
ValueError if `breed` or `lr_policy` are not supported.
"""
# Get the parameters to optimize
if hasattr(model, "_get_param_groups"): # use the model function
# pyre-ignore[29]
p_groups = model._get_param_groups(lr, wd=weight_decay)
else:
allprm = [prm for prm in model.parameters() if prm.requires_grad]
p_groups = [{"params": allprm, "lr": lr}]
# Intialize the optimizer
if breed == "sgd":
optimizer = torch.optim.SGD(
p_groups, lr=lr, momentum=momentum, weight_decay=weight_decay
)
elif breed == "adagrad":
optimizer = torch.optim.Adagrad(p_groups, lr=lr, weight_decay=weight_decay)
elif breed == "adam":
optimizer = torch.optim.Adam(
p_groups, lr=lr, betas=betas, weight_decay=weight_decay
)
else:
raise ValueError("no such solver type %s" % breed)
logger.info(" -> solver type = %s" % breed)
# Load state from checkpoint
if optimizer_state is not None:
logger.info(" -> setting loaded optimizer state")
optimizer.load_state_dict(optimizer_state)
# Initialize the learning rate scheduler
if lr_policy == "multistep":
scheduler = torch.optim.lr_scheduler.MultiStepLR(
optimizer,
milestones=milestones,
gamma=gamma,
)
else:
raise ValueError("no such lr policy %s" % lr_policy)
# When loading from checkpoint, this will make sure that the
# lr is correctly set even after returning
for _ in range(last_epoch):
scheduler.step()
optimizer.zero_grad()
return optimizer, scheduler
enable_get_default_args(init_optimizer)

View File

@@ -1,230 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
import logging
import os
from typing import Any, Dict, Optional, Tuple
import torch.optim
from accelerate import Accelerator
from pytorch3d.implicitron.models.base_model import ImplicitronModelBase
from pytorch3d.implicitron.tools import model_io
from pytorch3d.implicitron.tools.config import (
registry,
ReplaceableBase,
run_auto_creation,
)
logger = logging.getLogger(__name__)
class OptimizerFactoryBase(ReplaceableBase):
def __call__(
self, model: ImplicitronModelBase, **kwargs
) -> Tuple[torch.optim.Optimizer, Any]:
"""
Initialize the optimizer and lr scheduler.
Args:
model: The model with optionally loaded weights.
Returns:
An optimizer module (optionally loaded from a checkpoint) and
a learning rate scheduler module (should be a subclass of torch.optim's
lr_scheduler._LRScheduler).
"""
raise NotImplementedError()
@registry.register
class ImplicitronOptimizerFactory(OptimizerFactoryBase):
"""
A factory that initializes the optimizer and lr scheduler.
Members:
betas: Beta parameters for the Adam optimizer.
breed: The type of optimizer to use. We currently support SGD, Adagrad
and Adam.
exponential_lr_step_size: With Exponential policy only,
lr = lr * gamma ** (epoch/step_size)
gamma: Multiplicative factor of learning rate decay.
lr: The value for the initial learning rate.
lr_policy: The policy to use for learning rate. We currently support
MultiStepLR and Exponential policies.
momentum: A momentum value (for SGD only).
multistep_lr_milestones: With MultiStepLR policy only: list of
increasing epoch indices at which the learning rate is modified.
momentum: Momentum factor for SGD optimizer.
weight_decay: The optimizer weight_decay (L2 penalty on model weights).
"""
betas: Tuple[float, ...] = (0.9, 0.999)
breed: str = "Adam"
exponential_lr_step_size: int = 250
gamma: float = 0.1
lr: float = 0.0005
lr_policy: str = "MultiStepLR"
momentum: float = 0.9
multistep_lr_milestones: tuple = ()
weight_decay: float = 0.0
linear_exponential_lr_milestone: int = 200
linear_exponential_start_gamma: float = 0.1
def __post_init__(self):
run_auto_creation(self)
def __call__(
self,
last_epoch: int,
model: ImplicitronModelBase,
accelerator: Optional[Accelerator] = None,
exp_dir: Optional[str] = None,
resume: bool = True,
resume_epoch: int = -1,
**kwargs,
) -> Tuple[torch.optim.Optimizer, Any]:
"""
Initialize the optimizer (optionally from a checkpoint) and the lr scheduluer.
Args:
last_epoch: If the model was loaded from checkpoint this will be the
number of the last epoch that was saved.
model: The model with optionally loaded weights.
accelerator: An optional Accelerator instance.
exp_dir: Root experiment directory.
resume: If True, attempt to load optimizer checkpoint from exp_dir.
Failure to do so will return a newly initialized optimizer.
resume_epoch: If `resume` is True: Resume optimizer at this epoch. If
`resume_epoch` <= 0, then resume from the latest checkpoint.
Returns:
An optimizer module (optionally loaded from a checkpoint) and
a learning rate scheduler module (should be a subclass of torch.optim's
lr_scheduler._LRScheduler).
"""
# Get the parameters to optimize
if hasattr(model, "_get_param_groups"): # use the model function
# pyre-ignore[29]
p_groups = model._get_param_groups(self.lr, wd=self.weight_decay)
else:
allprm = [prm for prm in model.parameters() if prm.requires_grad]
p_groups = [{"params": allprm, "lr": self.lr}]
# Intialize the optimizer
if self.breed == "SGD":
optimizer = torch.optim.SGD(
p_groups,
lr=self.lr,
momentum=self.momentum,
weight_decay=self.weight_decay,
)
elif self.breed == "Adagrad":
optimizer = torch.optim.Adagrad(
p_groups, lr=self.lr, weight_decay=self.weight_decay
)
elif self.breed == "Adam":
optimizer = torch.optim.Adam(
p_groups, lr=self.lr, betas=self.betas, weight_decay=self.weight_decay
)
else:
raise ValueError(f"No such solver type {self.breed}")
logger.info(f"Solver type = {self.breed}")
# Load state from checkpoint
optimizer_state = self._get_optimizer_state(
exp_dir,
accelerator,
resume_epoch=resume_epoch,
resume=resume,
)
if optimizer_state is not None:
logger.info("Setting loaded optimizer state.")
optimizer.load_state_dict(optimizer_state)
# Initialize the learning rate scheduler
if self.lr_policy.casefold() == "MultiStepLR".casefold():
scheduler = torch.optim.lr_scheduler.MultiStepLR(
optimizer,
milestones=self.multistep_lr_milestones,
gamma=self.gamma,
)
elif self.lr_policy.casefold() == "Exponential".casefold():
scheduler = torch.optim.lr_scheduler.LambdaLR(
optimizer,
lambda epoch: self.gamma ** (epoch / self.exponential_lr_step_size),
verbose=False,
)
elif self.lr_policy.casefold() == "LinearExponential".casefold():
# linear learning rate progression between epochs 0 to
# self.linear_exponential_lr_milestone, followed by exponential
# lr decay for the rest of the epochs
def _get_lr(epoch: int):
m = self.linear_exponential_lr_milestone
if epoch < m:
w = (m - epoch) / m
gamma = w * self.linear_exponential_start_gamma + (1 - w)
else:
epoch_rest = epoch - m
gamma = self.gamma ** (epoch_rest / self.exponential_lr_step_size)
return gamma
scheduler = torch.optim.lr_scheduler.LambdaLR(
optimizer, _get_lr, verbose=False
)
else:
raise ValueError("no such lr policy %s" % self.lr_policy)
# When loading from checkpoint, this will make sure that the
# lr is correctly set even after returning.
for _ in range(last_epoch):
scheduler.step()
optimizer.zero_grad()
return optimizer, scheduler
def _get_optimizer_state(
self,
exp_dir: Optional[str],
accelerator: Optional[Accelerator] = None,
resume: bool = True,
resume_epoch: int = -1,
) -> Optional[Dict[str, Any]]:
"""
Load an optimizer state from a checkpoint.
resume: If True, attempt to load the last checkpoint from `exp_dir`
passed to __call__. Failure to do so will return a newly initialized
optimizer.
resume_epoch: If `resume` is True: Resume optimizer at this epoch. If
`resume_epoch` <= 0, then resume from the latest checkpoint.
"""
if exp_dir is None or not resume:
return None
if resume_epoch > 0:
save_path = model_io.get_checkpoint(exp_dir, resume_epoch)
if not os.path.isfile(save_path):
raise FileNotFoundError(
f"Cannot find optimizer from epoch {resume_epoch}."
)
else:
save_path = model_io.find_last_checkpoint(exp_dir)
optimizer_state = None
if save_path is not None:
logger.info(f"Found previous optimizer state {save_path} -> resuming.")
opt_path = model_io.get_optimizer_path(save_path)
if os.path.isfile(opt_path):
map_location = None
if accelerator is not None and not accelerator.is_local_main_process:
map_location = {
"cuda:%d" % 0: "cuda:%d" % accelerator.local_process_index
}
optimizer_state = torch.load(opt_path, map_location)
else:
raise FileNotFoundError(f"Optimizer state {opt_path} does not exist.")
return optimizer_state

View File

@@ -1,447 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
import logging
import os
import time
from typing import Any, List, Optional
import torch
from accelerate import Accelerator
from pytorch3d.implicitron.evaluation.evaluator import EvaluatorBase
from pytorch3d.implicitron.models.base_model import ImplicitronModelBase
from pytorch3d.implicitron.models.generic_model import EvaluationMode
from pytorch3d.implicitron.tools import model_io, vis_utils
from pytorch3d.implicitron.tools.config import (
registry,
ReplaceableBase,
run_auto_creation,
)
from pytorch3d.implicitron.tools.stats import Stats
from pytorch3d.renderer.cameras import CamerasBase
from torch.utils.data import DataLoader
from .utils import seed_all_random_engines
logger = logging.getLogger(__name__)
class TrainingLoopBase(ReplaceableBase):
def run(
self,
train_loader: DataLoader,
val_loader: Optional[DataLoader],
test_loader: Optional[DataLoader],
model: ImplicitronModelBase,
optimizer: torch.optim.Optimizer,
scheduler: Any,
**kwargs,
) -> None:
raise NotImplementedError()
def load_stats(
self,
log_vars: List[str],
exp_dir: str,
resume: bool = True,
resume_epoch: int = -1,
**kwargs,
) -> Stats:
raise NotImplementedError()
@registry.register
class ImplicitronTrainingLoop(TrainingLoopBase): # pyre-ignore [13]
"""
Members:
eval_only: If True, only run evaluation using the test dataloader.
evaluator: An EvaluatorBase instance, used to evaluate training results.
max_epochs: Train for this many epochs. Note that if the model was
loaded from a checkpoint, we will restart training at the appropriate
epoch and run for (max_epochs - checkpoint_epoch) epochs.
store_checkpoints: If True, store model and optimizer state checkpoints.
store_checkpoints_purge: If >= 0, remove any checkpoints older or equal
to this many epochs.
test_interval: Evaluate on a test dataloader each `test_interval` epochs.
test_when_finished: If True, evaluate on a test dataloader when training
completes.
validation_interval: Validate each `validation_interval` epochs.
clip_grad: Optionally clip the gradient norms.
If set to a value <=0.0, no clipping
metric_print_interval: The batch interval at which the stats should be
logged.
visualize_interval: The batch interval at which the visualizations
should be plotted
visdom_env: The name of the Visdom environment to use for plotting.
visdom_port: The Visdom port.
visdom_server: Address of the Visdom server.
"""
# Parameters of the outer training loop.
eval_only: bool = False
evaluator: EvaluatorBase
evaluator_class_type: str = "ImplicitronEvaluator"
max_epochs: int = 1000
store_checkpoints: bool = True
store_checkpoints_purge: int = 1
test_interval: int = -1
test_when_finished: bool = False
validation_interval: int = 1
# Gradient clipping.
clip_grad: float = 0.0
# Visualization/logging parameters.
metric_print_interval: int = 5
visualize_interval: int = 1000
visdom_env: str = ""
visdom_port: int = int(os.environ.get("VISDOM_PORT", 8097))
visdom_server: str = "http://127.0.0.1"
def __post_init__(self):
run_auto_creation(self)
def run(
self,
*,
train_loader: DataLoader,
val_loader: Optional[DataLoader],
test_loader: Optional[DataLoader],
model: ImplicitronModelBase,
optimizer: torch.optim.Optimizer,
scheduler: Any,
accelerator: Optional[Accelerator],
all_train_cameras: Optional[CamerasBase],
device: torch.device,
exp_dir: str,
stats: Stats,
seed: int,
**kwargs,
):
"""
Entry point to run the training and validation loops
based on the specified config file.
"""
start_epoch = stats.epoch + 1
assert scheduler.last_epoch == stats.epoch + 1
assert scheduler.last_epoch == start_epoch
# only run evaluation on the test dataloader
if self.eval_only:
if test_loader is not None:
self.evaluator.run(
all_train_cameras=all_train_cameras,
dataloader=test_loader,
device=device,
dump_to_json=True,
epoch=stats.epoch,
exp_dir=exp_dir,
model=model,
)
return
else:
raise ValueError(
"Cannot evaluate and dump results to json, no test data provided."
)
# loop through epochs
for epoch in range(start_epoch, self.max_epochs):
# automatic new_epoch and plotting of stats at every epoch start
with stats:
# Make sure to re-seed random generators to ensure reproducibility
# even after restart.
seed_all_random_engines(seed + epoch)
cur_lr = float(scheduler.get_last_lr()[-1])
logger.debug(f"scheduler lr = {cur_lr:1.2e}")
# train loop
self._training_or_validation_epoch(
accelerator=accelerator,
device=device,
epoch=epoch,
loader=train_loader,
model=model,
optimizer=optimizer,
stats=stats,
validation=False,
)
# val loop (optional)
if val_loader is not None and epoch % self.validation_interval == 0:
self._training_or_validation_epoch(
accelerator=accelerator,
device=device,
epoch=epoch,
loader=val_loader,
model=model,
optimizer=optimizer,
stats=stats,
validation=True,
)
# eval loop (optional)
if (
test_loader is not None
and self.test_interval > 0
and epoch % self.test_interval == 0
):
self.evaluator.run(
all_train_cameras=all_train_cameras,
device=device,
dataloader=test_loader,
model=model,
)
assert stats.epoch == epoch, "inconsistent stats!"
self._checkpoint(accelerator, epoch, exp_dir, model, optimizer, stats)
scheduler.step()
new_lr = float(scheduler.get_last_lr()[-1])
if new_lr != cur_lr:
logger.info(f"LR change! {cur_lr} -> {new_lr}")
if self.test_when_finished:
if test_loader is not None:
self.evaluator.run(
all_train_cameras=all_train_cameras,
device=device,
dump_to_json=True,
epoch=stats.epoch,
exp_dir=exp_dir,
dataloader=test_loader,
model=model,
)
else:
raise ValueError(
"Cannot evaluate and dump results to json, no test data provided."
)
def load_stats(
self,
log_vars: List[str],
exp_dir: str,
resume: bool = True,
resume_epoch: int = -1,
**kwargs,
) -> Stats:
"""
Load Stats that correspond to the model's log_vars and resume_epoch.
Args:
log_vars: A list of variable names to log. Should be a subset of the
`preds` returned by the forward function of the corresponding
ImplicitronModelBase instance.
exp_dir: Root experiment directory.
resume: If False, do not load stats from the checkpoint speci-
fied by resume and resume_epoch; instead, create a fresh stats object.
stats: The stats structure (optionally loaded from checkpoint)
"""
# Init the stats struct
visdom_env_charts = (
vis_utils.get_visdom_env(self.visdom_env, exp_dir) + "_charts"
)
stats = Stats(
# log_vars should be a list, but OmegaConf might load them as ListConfig
list(log_vars),
plot_file=os.path.join(exp_dir, "train_stats.pdf"),
visdom_env=visdom_env_charts,
verbose=False,
visdom_server=self.visdom_server,
visdom_port=self.visdom_port,
)
model_path = None
if resume:
if resume_epoch > 0:
model_path = model_io.get_checkpoint(exp_dir, resume_epoch)
if not os.path.isfile(model_path):
raise FileNotFoundError(
f"Cannot find stats from epoch {resume_epoch}."
)
else:
model_path = model_io.find_last_checkpoint(exp_dir)
if model_path is not None:
stats_path = model_io.get_stats_path(model_path)
stats_load = model_io.load_stats(stats_path)
# Determine if stats should be reset
if resume:
if stats_load is None:
logger.warning("\n\n\n\nCORRUPT STATS -> clearing stats\n\n\n\n")
last_epoch = model_io.parse_epoch_from_model_path(model_path)
logger.info(f"Estimated resume epoch = {last_epoch}")
# Reset the stats struct
for _ in range(last_epoch + 1):
stats.new_epoch()
assert last_epoch == stats.epoch
else:
logger.info(f"Found previous stats in {stats_path} -> resuming.")
stats = stats_load
# Update stats properties incase it was reset on load
stats.visdom_env = visdom_env_charts
stats.visdom_server = self.visdom_server
stats.visdom_port = self.visdom_port
stats.plot_file = os.path.join(exp_dir, "train_stats.pdf")
stats.synchronize_logged_vars(log_vars)
else:
logger.info("Clearing stats")
return stats
def _training_or_validation_epoch(
self,
epoch: int,
loader: DataLoader,
model: ImplicitronModelBase,
optimizer: torch.optim.Optimizer,
stats: Stats,
validation: bool,
*,
accelerator: Optional[Accelerator],
bp_var: str = "objective",
device: torch.device,
**kwargs,
) -> None:
"""
This is the main loop for training and evaluation including:
model forward pass, loss computation, backward pass and visualization.
Args:
epoch: The index of the current epoch
loader: The dataloader to use for the loop
model: The model module optionally loaded from checkpoint
optimizer: The optimizer module optionally loaded from checkpoint
stats: The stats struct, also optionally loaded from checkpoint
validation: If true, run the loop with the model in eval mode
and skip the backward pass
accelerator: An optional Accelerator instance.
bp_var: The name of the key in the model output `preds` dict which
should be used as the loss for the backward pass.
device: The device on which to run the model.
"""
if validation:
model.eval()
trainmode = "val"
else:
model.train()
trainmode = "train"
t_start = time.time()
# get the visdom env name
visdom_env_imgs = stats.visdom_env + "_images_" + trainmode
viz = vis_utils.get_visdom_connection(
server=stats.visdom_server,
port=stats.visdom_port,
)
# Iterate through the batches
n_batches = len(loader)
for it, net_input in enumerate(loader):
last_iter = it == n_batches - 1
# move to gpu where possible (in place)
net_input = net_input.to(device)
# run the forward pass
if not validation:
optimizer.zero_grad()
preds = model(
**{**net_input, "evaluation_mode": EvaluationMode.TRAINING}
)
else:
with torch.no_grad():
preds = model(
**{**net_input, "evaluation_mode": EvaluationMode.EVALUATION}
)
# make sure we dont overwrite something
assert all(k not in preds for k in net_input.keys())
# merge everything into one big dict
preds.update(net_input)
# update the stats logger
stats.update(preds, time_start=t_start, stat_set=trainmode)
# pyre-ignore [16]
assert stats.it[trainmode] == it, "inconsistent stat iteration number!"
# print textual status update
if it % self.metric_print_interval == 0 or last_iter:
stats.print(stat_set=trainmode, max_it=n_batches)
# visualize results
if (
(accelerator is None or accelerator.is_local_main_process)
and self.visualize_interval > 0
and it % self.visualize_interval == 0
):
prefix = f"e{stats.epoch}_it{stats.it[trainmode]}"
if hasattr(model, "visualize"):
# pyre-ignore [29]
model.visualize(
viz,
visdom_env_imgs,
preds,
prefix,
)
# optimizer step
if not validation:
loss = preds[bp_var]
assert torch.isfinite(loss).all(), "Non-finite loss!"
# backprop
if accelerator is None:
loss.backward()
else:
accelerator.backward(loss)
if self.clip_grad > 0.0:
# Optionally clip the gradient norms.
total_norm = torch.nn.utils.clip_grad_norm(
model.parameters(), self.clip_grad
)
if total_norm > self.clip_grad:
logger.debug(
f"Clipping gradient: {total_norm}"
+ f" with coef {self.clip_grad / float(total_norm)}."
)
optimizer.step()
def _checkpoint(
self,
accelerator: Optional[Accelerator],
epoch: int,
exp_dir: str,
model: ImplicitronModelBase,
optimizer: torch.optim.Optimizer,
stats: Stats,
):
"""
Save a model and its corresponding Stats object to a file, if
`self.store_checkpoints` is True. In addition, if
`self.store_checkpoints_purge` is True, remove any checkpoints older
than `self.store_checkpoints_purge` epochs old.
"""
if self.store_checkpoints and (
accelerator is None or accelerator.is_local_main_process
):
if self.store_checkpoints_purge > 0:
for prev_epoch in range(epoch - self.store_checkpoints_purge):
model_io.purge_epoch(exp_dir, prev_epoch)
outfile = model_io.get_checkpoint(exp_dir, epoch)
unwrapped_model = (
model if accelerator is None else accelerator.unwrap_model(model)
)
model_io.safe_save_model(
unwrapped_model, stats, outfile, optimizer=optimizer
)

View File

@@ -1,17 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
import random
import numpy as np
import torch
def seed_all_random_engines(seed: int) -> None:
np.random.seed(seed)
torch.manual_seed(seed)
random.seed(seed)

View File

@@ -1,15 +1,296 @@
data_source_class_type: ImplicitronDataSource
model_factory_class_type: ImplicitronModelFactory
optimizer_factory_class_type: ImplicitronOptimizerFactory
training_loop_class_type: ImplicitronTrainingLoop
seed: 42
detect_anomaly: false
exp_dir: ./data/default_experiment/
hydra:
run:
dir: .
output_subdir: null
data_source_ImplicitronDataSource_args:
generic_model_args:
mask_images: true
mask_depths: true
render_image_width: 400
render_image_height: 400
mask_threshold: 0.5
output_rasterized_mc: false
bg_color:
- 0.0
- 0.0
- 0.0
num_passes: 1
chunk_size_grid: 4096
render_features_dimensions: 3
tqdm_trigger_threshold: 16
n_train_target_views: 1
sampling_mode_training: mask_sample
sampling_mode_evaluation: full_grid
global_encoder_class_type: null
raysampler_class_type: AdaptiveRaySampler
renderer_class_type: MultiPassEmissionAbsorptionRenderer
image_feature_extractor_class_type: null
view_pooler_enabled: false
implicit_function_class_type: NeuralRadianceFieldImplicitFunction
view_metrics_class_type: ViewMetrics
regularization_metrics_class_type: RegularizationMetrics
loss_weights:
loss_rgb_mse: 1.0
loss_prev_stage_rgb_mse: 1.0
loss_mask_bce: 0.0
loss_prev_stage_mask_bce: 0.0
log_vars:
- loss_rgb_psnr_fg
- loss_rgb_psnr
- loss_rgb_mse
- loss_rgb_huber
- loss_depth_abs
- loss_depth_abs_fg
- loss_mask_neg_iou
- loss_mask_bce
- loss_mask_beta_prior
- loss_eikonal
- loss_density_tv
- loss_depth_neg_penalty
- loss_autodecoder_norm
- loss_prev_stage_rgb_mse
- loss_prev_stage_rgb_psnr_fg
- loss_prev_stage_rgb_psnr
- loss_prev_stage_mask_bce
- objective
- epoch
- sec/it
global_encoder_HarmonicTimeEncoder_args:
n_harmonic_functions: 10
append_input: true
time_divisor: 1.0
global_encoder_SequenceAutodecoder_args:
autodecoder_args:
encoding_dim: 0
n_instances: 0
init_scale: 1.0
ignore_input: false
raysampler_AdaptiveRaySampler_args:
image_width: 400
image_height: 400
sampling_mode_training: mask_sample
sampling_mode_evaluation: full_grid
n_pts_per_ray_training: 64
n_pts_per_ray_evaluation: 64
n_rays_per_image_sampled_from_mask: 1024
stratified_point_sampling_training: true
stratified_point_sampling_evaluation: false
scene_extent: 8.0
scene_center:
- 0.0
- 0.0
- 0.0
raysampler_NearFarRaySampler_args:
image_width: 400
image_height: 400
sampling_mode_training: mask_sample
sampling_mode_evaluation: full_grid
n_pts_per_ray_training: 64
n_pts_per_ray_evaluation: 64
n_rays_per_image_sampled_from_mask: 1024
stratified_point_sampling_training: true
stratified_point_sampling_evaluation: false
min_depth: 0.1
max_depth: 8.0
renderer_LSTMRenderer_args:
num_raymarch_steps: 10
init_depth: 17.0
init_depth_noise_std: 0.0005
hidden_size: 16
n_feature_channels: 256
bg_color: null
verbose: false
renderer_MultiPassEmissionAbsorptionRenderer_args:
raymarcher_class_type: EmissionAbsorptionRaymarcher
n_pts_per_ray_fine_training: 64
n_pts_per_ray_fine_evaluation: 64
stratified_sampling_coarse_training: true
stratified_sampling_coarse_evaluation: false
append_coarse_samples_to_fine: true
density_noise_std_train: 0.0
return_weights: false
raymarcher_CumsumRaymarcher_args:
surface_thickness: 1
bg_color:
- 0.0
background_opacity: 0.0
density_relu: true
blend_output: false
raymarcher_EmissionAbsorptionRaymarcher_args:
surface_thickness: 1
bg_color:
- 0.0
background_opacity: 10000000000.0
density_relu: true
blend_output: false
renderer_SignedDistanceFunctionRenderer_args:
render_features_dimensions: 3
ray_tracer_args:
object_bounding_sphere: 1.0
sdf_threshold: 5.0e-05
line_search_step: 0.5
line_step_iters: 1
sphere_tracing_iters: 10
n_steps: 100
n_secant_steps: 8
ray_normal_coloring_network_args:
feature_vector_size: 3
mode: idr
d_in: 9
d_out: 3
dims:
- 512
- 512
- 512
- 512
weight_norm: true
n_harmonic_functions_dir: 0
pooled_feature_dim: 0
bg_color:
- 0.0
soft_mask_alpha: 50.0
image_feature_extractor_ResNetFeatureExtractor_args:
name: resnet34
pretrained: true
stages:
- 1
- 2
- 3
- 4
normalize_image: true
image_rescale: 0.16
first_max_pool: true
proj_dim: 32
l2_norm: true
add_masks: true
add_images: true
global_average_pool: false
feature_rescale: 1.0
view_pooler_args:
feature_aggregator_class_type: AngleWeightedReductionFeatureAggregator
view_sampler_args:
masked_sampling: false
sampling_mode: bilinear
feature_aggregator_AngleWeightedIdentityFeatureAggregator_args:
exclude_target_view: true
exclude_target_view_mask_features: true
concatenate_output: true
weight_by_ray_angle_gamma: 1.0
min_ray_angle_weight: 0.1
feature_aggregator_AngleWeightedReductionFeatureAggregator_args:
exclude_target_view: true
exclude_target_view_mask_features: true
concatenate_output: true
reduction_functions:
- AVG
- STD
weight_by_ray_angle_gamma: 1.0
min_ray_angle_weight: 0.1
feature_aggregator_IdentityFeatureAggregator_args:
exclude_target_view: true
exclude_target_view_mask_features: true
concatenate_output: true
feature_aggregator_ReductionFeatureAggregator_args:
exclude_target_view: true
exclude_target_view_mask_features: true
concatenate_output: true
reduction_functions:
- AVG
- STD
implicit_function_IdrFeatureField_args:
feature_vector_size: 3
d_in: 3
d_out: 1
dims:
- 512
- 512
- 512
- 512
- 512
- 512
- 512
- 512
geometric_init: true
bias: 1.0
skip_in: []
weight_norm: true
n_harmonic_functions_xyz: 0
pooled_feature_dim: 0
encoding_dim: 0
implicit_function_NeRFormerImplicitFunction_args:
n_harmonic_functions_xyz: 10
n_harmonic_functions_dir: 4
n_hidden_neurons_dir: 128
latent_dim: 0
input_xyz: true
xyz_ray_dir_in_camera_coords: false
color_dim: 3
transformer_dim_down_factor: 2.0
n_hidden_neurons_xyz: 80
n_layers_xyz: 2
append_xyz:
- 1
implicit_function_NeuralRadianceFieldImplicitFunction_args:
n_harmonic_functions_xyz: 10
n_harmonic_functions_dir: 4
n_hidden_neurons_dir: 128
latent_dim: 0
input_xyz: true
xyz_ray_dir_in_camera_coords: false
color_dim: 3
transformer_dim_down_factor: 1.0
n_hidden_neurons_xyz: 256
n_layers_xyz: 8
append_xyz:
- 5
implicit_function_SRNHyperNetImplicitFunction_args:
hypernet_args:
n_harmonic_functions: 3
n_hidden_units: 256
n_layers: 2
n_hidden_units_hypernet: 256
n_layers_hypernet: 1
in_features: 3
out_features: 256
latent_dim_hypernet: 0
latent_dim: 0
xyz_in_camera_coords: false
pixel_generator_args:
n_harmonic_functions: 4
n_hidden_units: 256
n_hidden_units_color: 128
n_layers: 2
in_features: 256
out_features: 3
ray_dir_in_camera_coords: false
implicit_function_SRNImplicitFunction_args:
raymarch_function_args:
n_harmonic_functions: 3
n_hidden_units: 256
n_layers: 2
in_features: 3
out_features: 256
latent_dim: 0
xyz_in_camera_coords: false
raymarch_function: null
pixel_generator_args:
n_harmonic_functions: 4
n_hidden_units: 256
n_hidden_units_color: 128
n_layers: 2
in_features: 256
out_features: 3
ray_dir_in_camera_coords: false
view_metrics_ViewMetrics_args: {}
regularization_metrics_RegularizationMetrics_args: {}
solver_args:
breed: adam
weight_decay: 0.0
lr_policy: multistep
lr: 0.0005
gamma: 0.1
momentum: 0.9
betas:
- 0.9
- 0.999
milestones: []
max_epochs: 1000
data_source_args:
dataset_map_provider_class_type: ???
data_loader_map_provider_class_type: SequenceDataLoaderMapProvider
dataset_map_provider_BlenderDatasetMapProvider_args:
@@ -64,11 +345,17 @@ data_source_ImplicitronDataSource_args:
dataset_class_type: JsonIndexDataset
path_manager_factory_class_type: PathManagerFactory
dataset_JsonIndexDataset_args:
path_manager: null
frame_annotations_file: ''
sequence_annotations_file: ''
subset_lists_file: ''
subsets: null
limit_to: 0
limit_sequences_to: 0
pick_sequence: []
exclude_sequence: []
limit_category_to: []
dataset_root: ''
load_images: true
load_depths: true
load_depth_masks: true
@@ -86,6 +373,7 @@ data_source_ImplicitronDataSource_args:
n_frames_per_sequence: -1
seed: 0
sort_frames: false
eval_batches: null
path_manager_factory_PathManagerFactory_args:
silence_logs: true
dataset_map_provider_LlffDatasetMapProvider_args:
@@ -95,16 +383,6 @@ data_source_ImplicitronDataSource_args:
n_known_frames_for_test: null
path_manager_factory_PathManagerFactory_args:
silence_logs: true
downscale_factor: 4
dataset_map_provider_RenderedMeshDatasetMapProvider_args:
num_views: 40
data_file: null
azimuth_range: 180.0
resolution: 128
use_point_light: true
path_manager_factory_class_type: PathManagerFactory
path_manager_factory_PathManagerFactory_args:
silence_logs: true
data_loader_map_provider_SequenceDataLoaderMapProvider_args:
batch_size: 1
num_workers: 0
@@ -118,309 +396,30 @@ data_source_ImplicitronDataSource_args:
sample_consecutive_frames: false
consecutive_frames_max_gap: 0
consecutive_frames_max_gap_seconds: 0.1
data_loader_map_provider_SimpleDataLoaderMapProvider_args:
batch_size: 1
num_workers: 0
dataset_length_train: 0
dataset_length_val: 0
dataset_length_test: 0
model_factory_ImplicitronModelFactory_args:
resume: true
model_class_type: GenericModel
resume_epoch: -1
force_resume: false
model_GenericModel_args:
log_vars:
- loss_rgb_psnr_fg
- loss_rgb_psnr
- loss_rgb_mse
- loss_rgb_huber
- loss_depth_abs
- loss_depth_abs_fg
- loss_mask_neg_iou
- loss_mask_bce
- loss_mask_beta_prior
- loss_eikonal
- loss_density_tv
- loss_depth_neg_penalty
- loss_autodecoder_norm
- loss_prev_stage_rgb_mse
- loss_prev_stage_rgb_psnr_fg
- loss_prev_stage_rgb_psnr
- loss_prev_stage_mask_bce
- objective
- epoch
- sec/it
mask_images: true
mask_depths: true
render_image_width: 400
render_image_height: 400
mask_threshold: 0.5
output_rasterized_mc: false
bg_color:
- 0.0
- 0.0
- 0.0
num_passes: 1
chunk_size_grid: 4096
render_features_dimensions: 3
tqdm_trigger_threshold: 16
n_train_target_views: 1
sampling_mode_training: mask_sample
sampling_mode_evaluation: full_grid
global_encoder_class_type: null
raysampler_class_type: AdaptiveRaySampler
renderer_class_type: MultiPassEmissionAbsorptionRenderer
image_feature_extractor_class_type: null
view_pooler_enabled: false
implicit_function_class_type: NeuralRadianceFieldImplicitFunction
view_metrics_class_type: ViewMetrics
regularization_metrics_class_type: RegularizationMetrics
loss_weights:
loss_rgb_mse: 1.0
loss_prev_stage_rgb_mse: 1.0
loss_mask_bce: 0.0
loss_prev_stage_mask_bce: 0.0
global_encoder_HarmonicTimeEncoder_args:
n_harmonic_functions: 10
append_input: true
time_divisor: 1.0
global_encoder_SequenceAutodecoder_args:
autodecoder_args:
encoding_dim: 0
n_instances: 1
init_scale: 1.0
ignore_input: false
raysampler_AdaptiveRaySampler_args:
n_pts_per_ray_training: 64
n_pts_per_ray_evaluation: 64
n_rays_per_image_sampled_from_mask: 1024
stratified_point_sampling_training: true
stratified_point_sampling_evaluation: false
scene_extent: 8.0
scene_center:
- 0.0
- 0.0
- 0.0
raysampler_NearFarRaySampler_args:
n_pts_per_ray_training: 64
n_pts_per_ray_evaluation: 64
n_rays_per_image_sampled_from_mask: 1024
stratified_point_sampling_training: true
stratified_point_sampling_evaluation: false
min_depth: 0.1
max_depth: 8.0
renderer_LSTMRenderer_args:
num_raymarch_steps: 10
init_depth: 17.0
init_depth_noise_std: 0.0005
hidden_size: 16
n_feature_channels: 256
bg_color: null
verbose: false
renderer_MultiPassEmissionAbsorptionRenderer_args:
raymarcher_class_type: EmissionAbsorptionRaymarcher
n_pts_per_ray_fine_training: 64
n_pts_per_ray_fine_evaluation: 64
stratified_sampling_coarse_training: true
stratified_sampling_coarse_evaluation: false
append_coarse_samples_to_fine: true
density_noise_std_train: 0.0
return_weights: false
raymarcher_CumsumRaymarcher_args:
surface_thickness: 1
bg_color:
- 0.0
background_opacity: 0.0
density_relu: true
blend_output: false
raymarcher_EmissionAbsorptionRaymarcher_args:
surface_thickness: 1
bg_color:
- 0.0
background_opacity: 10000000000.0
density_relu: true
blend_output: false
renderer_SignedDistanceFunctionRenderer_args:
ray_normal_coloring_network_args:
feature_vector_size: 3
mode: idr
d_in: 9
d_out: 3
dims:
- 512
- 512
- 512
- 512
weight_norm: true
n_harmonic_functions_dir: 0
pooled_feature_dim: 0
bg_color:
- 0.0
soft_mask_alpha: 50.0
ray_tracer_args:
sdf_threshold: 5.0e-05
line_search_step: 0.5
line_step_iters: 1
sphere_tracing_iters: 10
n_steps: 100
n_secant_steps: 8
image_feature_extractor_ResNetFeatureExtractor_args:
name: resnet34
pretrained: true
stages:
- 1
- 2
- 3
- 4
normalize_image: true
image_rescale: 0.16
first_max_pool: true
proj_dim: 32
l2_norm: true
add_masks: true
add_images: true
global_average_pool: false
feature_rescale: 1.0
view_pooler_args:
feature_aggregator_class_type: AngleWeightedReductionFeatureAggregator
view_sampler_args:
masked_sampling: false
sampling_mode: bilinear
feature_aggregator_AngleWeightedIdentityFeatureAggregator_args:
exclude_target_view: true
exclude_target_view_mask_features: true
concatenate_output: true
weight_by_ray_angle_gamma: 1.0
min_ray_angle_weight: 0.1
feature_aggregator_AngleWeightedReductionFeatureAggregator_args:
exclude_target_view: true
exclude_target_view_mask_features: true
concatenate_output: true
reduction_functions:
- AVG
- STD
weight_by_ray_angle_gamma: 1.0
min_ray_angle_weight: 0.1
feature_aggregator_IdentityFeatureAggregator_args:
exclude_target_view: true
exclude_target_view_mask_features: true
concatenate_output: true
feature_aggregator_ReductionFeatureAggregator_args:
exclude_target_view: true
exclude_target_view_mask_features: true
concatenate_output: true
reduction_functions:
- AVG
- STD
implicit_function_IdrFeatureField_args:
d_in: 3
d_out: 1
dims:
- 512
- 512
- 512
- 512
- 512
- 512
- 512
- 512
geometric_init: true
bias: 1.0
skip_in: []
weight_norm: true
n_harmonic_functions_xyz: 0
pooled_feature_dim: 0
implicit_function_NeRFormerImplicitFunction_args:
n_harmonic_functions_xyz: 10
n_harmonic_functions_dir: 4
n_hidden_neurons_dir: 128
input_xyz: true
xyz_ray_dir_in_camera_coords: false
transformer_dim_down_factor: 2.0
n_hidden_neurons_xyz: 80
n_layers_xyz: 2
append_xyz:
- 1
implicit_function_NeuralRadianceFieldImplicitFunction_args:
n_harmonic_functions_xyz: 10
n_harmonic_functions_dir: 4
n_hidden_neurons_dir: 128
input_xyz: true
xyz_ray_dir_in_camera_coords: false
transformer_dim_down_factor: 1.0
n_hidden_neurons_xyz: 256
n_layers_xyz: 8
append_xyz:
- 5
implicit_function_SRNHyperNetImplicitFunction_args:
hypernet_args:
n_harmonic_functions: 3
n_hidden_units: 256
n_layers: 2
n_hidden_units_hypernet: 256
n_layers_hypernet: 1
in_features: 3
out_features: 256
xyz_in_camera_coords: false
pixel_generator_args:
n_harmonic_functions: 4
n_hidden_units: 256
n_hidden_units_color: 128
n_layers: 2
in_features: 256
out_features: 3
ray_dir_in_camera_coords: false
implicit_function_SRNImplicitFunction_args:
raymarch_function_args:
n_harmonic_functions: 3
n_hidden_units: 256
n_layers: 2
in_features: 3
out_features: 256
xyz_in_camera_coords: false
raymarch_function: null
pixel_generator_args:
n_harmonic_functions: 4
n_hidden_units: 256
n_hidden_units_color: 128
n_layers: 2
in_features: 256
out_features: 3
ray_dir_in_camera_coords: false
view_metrics_ViewMetrics_args: {}
regularization_metrics_RegularizationMetrics_args: {}
optimizer_factory_ImplicitronOptimizerFactory_args:
betas:
- 0.9
- 0.999
breed: Adam
exponential_lr_step_size: 250
gamma: 0.1
lr: 0.0005
lr_policy: MultiStepLR
momentum: 0.9
multistep_lr_milestones: []
weight_decay: 0.0
linear_exponential_lr_milestone: 200
linear_exponential_start_gamma: 0.1
training_loop_ImplicitronTrainingLoop_args:
eval_only: false
evaluator_class_type: ImplicitronEvaluator
max_epochs: 1000
store_checkpoints: true
store_checkpoints_purge: 1
test_interval: -1
test_when_finished: false
validation_interval: 1
clip_grad: 0.0
metric_print_interval: 5
visualize_interval: 1000
visdom_env: ''
visdom_port: 8097
visdom_server: http://127.0.0.1
evaluator_ImplicitronEvaluator_args:
camera_difficulty_bin_breaks:
- 0.97
- 0.98
is_multisequence: false
architecture: generic
detect_anomaly: false
eval_only: false
exp_dir: ./data/default_experiment/
exp_idx: 0
gpu_idx: 0
metric_print_interval: 5
resume: true
resume_epoch: -1
seed: 0
store_checkpoints: true
store_checkpoints_purge: 1
test_interval: -1
test_when_finished: false
validation_interval: 1
visdom_env: ''
visdom_port: 8097
visdom_server: http://127.0.0.1
visualize_interval: 1000
clip_grad: 0.0
camera_difficulty_bin_breaks:
- 0.97
- 0.98
hydra:
run:
dir: .
output_subdir: null

View File

@@ -5,7 +5,6 @@
# LICENSE file in the root directory of this source tree.
import os
import tempfile
import unittest
from pathlib import Path
@@ -13,7 +12,6 @@ from hydra import compose, initialize_config_dir
from omegaconf import OmegaConf
from .. import experiment
from .utils import intercept_logs
def interactive_testing_requested() -> bool:
@@ -35,10 +33,7 @@ DEBUG: bool = False
# TODO:
# - add enough files to skateboard_first_5 that this works on RE.
# - share common code with PyTorch3D tests?
def _parse_float_from_log(line):
return float(line.split()[-1])
# - deal with the temporary output files this test creates
class TestExperiment(unittest.TestCase):
@@ -49,18 +44,15 @@ class TestExperiment(unittest.TestCase):
# Test making minimal changes to the dataclass defaults.
if not interactive_testing_requested() or not internal:
return
# Manually override config values. Note that this is not necessary out-
# side of the tests!
cfg = OmegaConf.structured(experiment.Experiment)
cfg.data_source_ImplicitronDataSource_args.dataset_map_provider_class_type = (
cfg = OmegaConf.structured(experiment.ExperimentConfig)
cfg.data_source_args.dataset_map_provider_class_type = (
"JsonIndexDatasetMapProvider"
)
dataset_args = (
cfg.data_source_ImplicitronDataSource_args.dataset_map_provider_JsonIndexDatasetMapProvider_args
cfg.data_source_args.dataset_map_provider_JsonIndexDatasetMapProvider_args
)
dataloader_args = (
cfg.data_source_ImplicitronDataSource_args.data_loader_map_provider_SequenceDataLoaderMapProvider_args
cfg.data_source_args.data_loader_map_provider_SequenceDataLoaderMapProvider_args
)
dataset_args.category = "skateboard"
dataset_args.test_restrict_sequence_id = 0
@@ -70,80 +62,18 @@ class TestExperiment(unittest.TestCase):
dataset_args.dataset_JsonIndexDataset_args.image_width = 80
dataloader_args.dataset_length_train = 1
dataloader_args.dataset_length_val = 1
cfg.training_loop_ImplicitronTrainingLoop_args.max_epochs = 2
cfg.training_loop_ImplicitronTrainingLoop_args.store_checkpoints = False
cfg.optimizer_factory_ImplicitronOptimizerFactory_args.multistep_lr_milestones = [
0,
1,
]
cfg.solver_args.max_epochs = 2
if DEBUG:
experiment.dump_cfg(cfg)
with intercept_logs(
logger_name="projects.implicitron_trainer.impl.training_loop",
regexp="LR change!",
) as intercepted_logs:
experiment_runner = experiment.Experiment(**cfg)
experiment_runner.run()
# Make sure LR decreased on 0th and 1st epoch 10fold.
self.assertEqual(intercepted_logs[0].split()[-1], "5e-06")
def test_exponential_lr(self):
# Test making minimal changes to the dataclass defaults.
if not interactive_testing_requested():
return
cfg = OmegaConf.structured(experiment.Experiment)
cfg.data_source_ImplicitronDataSource_args.dataset_map_provider_class_type = (
"JsonIndexDatasetMapProvider"
)
dataset_args = (
cfg.data_source_ImplicitronDataSource_args.dataset_map_provider_JsonIndexDatasetMapProvider_args
)
dataloader_args = (
cfg.data_source_ImplicitronDataSource_args.data_loader_map_provider_SequenceDataLoaderMapProvider_args
)
dataset_args.category = "skateboard"
dataset_args.test_restrict_sequence_id = 0
dataset_args.dataset_root = "manifold://co3d/tree/extracted"
dataset_args.dataset_JsonIndexDataset_args.limit_sequences_to = 5
dataset_args.dataset_JsonIndexDataset_args.image_height = 80
dataset_args.dataset_JsonIndexDataset_args.image_width = 80
dataloader_args.dataset_length_train = 1
dataloader_args.dataset_length_val = 1
cfg.training_loop_ImplicitronTrainingLoop_args.max_epochs = 2
cfg.training_loop_ImplicitronTrainingLoop_args.store_checkpoints = False
cfg.optimizer_factory_ImplicitronOptimizerFactory_args.lr_policy = "Exponential"
cfg.optimizer_factory_ImplicitronOptimizerFactory_args.exponential_lr_step_size = (
2
)
if DEBUG:
experiment.dump_cfg(cfg)
with intercept_logs(
logger_name="projects.implicitron_trainer.impl.training_loop",
regexp="LR change!",
) as intercepted_logs:
experiment_runner = experiment.Experiment(**cfg)
experiment_runner.run()
# Make sure we followed the exponential lr schedule with gamma=0.1,
# exponential_lr_step_size=2 -- so after two epochs, should
# decrease lr 10x to 5e-5.
self.assertEqual(intercepted_logs[0].split()[-1], "0.00015811388300841897")
self.assertEqual(intercepted_logs[1].split()[-1], "5e-05")
experiment.run_training(cfg)
def test_yaml_contents(self):
# Check that the default config values, defined by Experiment and its
# members, is what we expect it to be.
cfg = OmegaConf.structured(experiment.Experiment)
cfg = OmegaConf.structured(experiment.ExperimentConfig)
yaml = OmegaConf.to_yaml(cfg, sort_keys=False)
if DEBUG:
(DATA_DIR / "experiment.yaml").write_text(yaml)
self.assertEqual(yaml, (DATA_DIR / "experiment.yaml").read_text())
def test_load_configs(self):
# Check that all the pre-prepared configs are valid.
config_files = []
for pattern in ("repro_singleseq*.yaml", "repro_multiseq*.yaml"):
@@ -159,78 +89,3 @@ class TestExperiment(unittest.TestCase):
with self.subTest(file.name):
with initialize_config_dir(config_dir=str(IMPLICITRON_CONFIGS_DIR)):
compose(file.name)
class TestNerfRepro(unittest.TestCase):
@unittest.skip("This test runs full blender training.")
def test_nerf_blender(self):
# Train vanilla NERF.
# Set env vars BLENDER_DATASET_ROOT and BLENDER_SINGLESEQ_CLASS first!
if not interactive_testing_requested():
return
with initialize_config_dir(config_dir=str(IMPLICITRON_CONFIGS_DIR)):
cfg = compose(config_name="repro_singleseq_nerf_blender", overrides=[])
experiment_runner = experiment.Experiment(**cfg)
experiment.dump_cfg(cfg)
experiment_runner.run()
@unittest.skip("This test runs full llff training.")
def test_nerf_llff(self):
# Train vanilla NERF.
# Set env vars LLFF_DATASET_ROOT and LLFF_SINGLESEQ_CLASS first!
LLFF_SINGLESEQ_CLASS = os.environ["LLFF_SINGLESEQ_CLASS"]
if not interactive_testing_requested():
return
with initialize_config_dir(config_dir=str(IMPLICITRON_CONFIGS_DIR)):
cfg = compose(
config_name=f"repro_singleseq_nerf_llff_{LLFF_SINGLESEQ_CLASS}",
overrides=[],
)
experiment_runner = experiment.Experiment(**cfg)
experiment.dump_cfg(cfg)
experiment_runner.run()
@unittest.skip("This test checks resuming of the NeRF training.")
def test_nerf_blender_resume(self):
# Train one train batch of NeRF, then resume for one more batch.
# Set env vars BLENDER_DATASET_ROOT and BLENDER_SINGLESEQ_CLASS first!
if not interactive_testing_requested():
return
with initialize_config_dir(config_dir=str(IMPLICITRON_CONFIGS_DIR)):
with tempfile.TemporaryDirectory() as exp_dir:
cfg = compose(config_name="repro_singleseq_nerf_blender", overrides=[])
cfg.exp_dir = exp_dir
# set dataset len to 1
# fmt: off
(
cfg
.data_source_ImplicitronDataSource_args
.data_loader_map_provider_SequenceDataLoaderMapProvider_args
.dataset_length_train
) = 1
# fmt: on
# run for one epoch
cfg.training_loop_ImplicitronTrainingLoop_args.max_epochs = 1
experiment_runner = experiment.Experiment(**cfg)
experiment.dump_cfg(cfg)
experiment_runner.run()
# update num epochs + 2, let the optimizer resume
cfg.training_loop_ImplicitronTrainingLoop_args.max_epochs = 3
experiment_runner = experiment.Experiment(**cfg)
experiment_runner.run()
# start from scratch
cfg.model_factory_ImplicitronModelFactory_args.resume = False
experiment_runner = experiment.Experiment(**cfg)
experiment_runner.run()
# force resume from epoch 1
cfg.model_factory_ImplicitronModelFactory_args.resume = True
cfg.model_factory_ImplicitronModelFactory_args.force_resume = True
cfg.model_factory_ImplicitronModelFactory_args.resume_epoch = 1
experiment_runner = experiment.Experiment(**cfg)
experiment_runner.run()

View File

@@ -1,30 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
import contextlib
import logging
import re
@contextlib.contextmanager
def intercept_logs(logger_name: str, regexp: str):
# Intercept logs that match a regexp, from a given logger.
intercepted_messages = []
logger = logging.getLogger(logger_name)
class LoggerInterceptor(logging.Filter):
def filter(self, record):
message = record.getMessage()
if re.search(regexp, message):
intercepted_messages.append(message)
return True
interceptor = LoggerInterceptor()
logger.addFilter(interceptor)
try:
yield intercepted_messages
finally:
logger.removeFilter(interceptor)

View File

@@ -22,6 +22,7 @@ import numpy as np
import torch
import torch.nn.functional as Fu
from omegaconf import OmegaConf
from pytorch3d.implicitron.dataset.data_source import ImplicitronDataSource
from pytorch3d.implicitron.dataset.dataset_base import DatasetBase, FrameData
from pytorch3d.implicitron.dataset.utils import is_train_frame
from pytorch3d.implicitron.models.base_model import EvaluationMode
@@ -36,7 +37,7 @@ from pytorch3d.implicitron.tools.vis_utils import (
)
from tqdm import tqdm
from .experiment import Experiment
from .experiment import init_model
def render_sequence(
@@ -343,14 +344,13 @@ def export_scenes(
os.environ["CUDA_VISIBLE_DEVICES"] = str(config.gpu_idx)
# Load the previously trained model
experiment = Experiment(config)
model = experiment.model_factory(force_resume=True)
model, _, _ = init_model(cfg=config, force_load=True, load_model_only=True)
model.cuda()
model.eval()
# Setup the dataset
data_source = experiment.data_source
dataset_map, _ = data_source.get_datasets_and_dataloaders()
datasource = ImplicitronDataSource(**config.data_source_args)
dataset_map = datasource.dataset_map_provider.get_dataset_map()
dataset = dataset_map[split]
if dataset is None:
raise ValueError(f"{split} dataset not provided")

View File

@@ -4,4 +4,4 @@
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
__version__ = "0.7.0"
__version__ = "0.6.2"

View File

@@ -10,7 +10,7 @@ import torch
"""
Some functions which depend on PyTorch or Python versions.
Some functions which depend on PyTorch versions.
"""
@@ -79,12 +79,3 @@ def meshgrid_ij(
# pyre-fixme[6]: For 1st param expected `Union[List[Tensor], Tensor]` but got
# `Union[Sequence[Tensor], Tensor]`.
return torch.meshgrid(*A)
def prod(iterable, *, start=1):
"""
Like math.prod in Python 3.8 and later.
"""
for i in iterable:
start *= i
return start

View File

@@ -61,84 +61,6 @@ class DataLoaderMapProviderBase(ReplaceableBase):
raise NotImplementedError()
@registry.register
class SimpleDataLoaderMapProvider(DataLoaderMapProviderBase):
"""
Trivial implementation of DataLoaderMapProviderBase.
If a dataset returns batches from get_eval_batches(), then
they will be what the corresponding dataloader returns,
independently of any of the fields on this class.
Otherwise, returns shuffled batches.
"""
batch_size: int = 1
num_workers: int = 0
dataset_length_train: int = 0
dataset_length_val: int = 0
dataset_length_test: int = 0
def get_data_loader_map(self, datasets: DatasetMap) -> DataLoaderMap:
"""
Returns a collection of data loaders for a given collection of datasets.
"""
return DataLoaderMap(
train=self._make_data_loader(
datasets.train,
self.dataset_length_train,
),
val=self._make_data_loader(
datasets.val,
self.dataset_length_val,
),
test=self._make_data_loader(
datasets.test,
self.dataset_length_test,
),
)
def _make_data_loader(
self,
dataset: Optional[DatasetBase],
num_batches: int,
) -> Optional[DataLoader[FrameData]]:
"""
Returns the dataloader for a dataset.
Args:
dataset: the dataset
num_batches: possible ceiling on number of batches per epoch
"""
if dataset is None:
return None
data_loader_kwargs = {
"num_workers": self.num_workers,
"collate_fn": dataset.frame_data_type.collate,
}
eval_batches = dataset.get_eval_batches()
if eval_batches is not None:
return DataLoader(
dataset,
batch_sampler=eval_batches,
**data_loader_kwargs,
)
if num_batches > 0:
num_samples = self.batch_size * num_batches
else:
num_samples = None
sampler = RandomSampler(dataset, replacement=True, num_samples=num_samples)
batch_sampler = BatchSampler(sampler, self.batch_size, drop_last=True)
return DataLoader(
dataset,
batch_sampler=batch_sampler,
**data_loader_kwargs,
)
class DoublePoolBatchSampler(Sampler[List[int]]):
"""
Batch sampler for making random batches of a single frame
@@ -199,7 +121,7 @@ class DoublePoolBatchSampler(Sampler[List[int]]):
torch.randperm(len(self.first_indices), generator=self.generator)
for _ in range(n_copies)
]
i_first = torch.cat(raw_indices)[:num_batches]
i_first = torch.concat(raw_indices)[:num_batches]
else:
i_first = torch.randperm(len(self.first_indices), generator=self.generator)
first_indices = [self.first_indices[i] for i in i_first]

View File

@@ -15,11 +15,10 @@ from pytorch3d.renderer.cameras import CamerasBase
from .blender_dataset_map_provider import BlenderDatasetMapProvider # noqa
from .data_loader_map_provider import DataLoaderMap, DataLoaderMapProviderBase
from .dataset_map_provider import DatasetMap, DatasetMapProviderBase
from .dataset_map_provider import DatasetMap, DatasetMapProviderBase, Task
from .json_index_dataset_map_provider import JsonIndexDatasetMapProvider # noqa
from .json_index_dataset_map_provider_v2 import JsonIndexDatasetMapProviderV2 # noqa
from .llff_dataset_map_provider import LlffDatasetMapProvider # noqa
from .rendered_mesh_dataset_map_provider import RenderedMeshDatasetMapProvider # noqa
class DataSourceBase(ReplaceableBase):
@@ -68,6 +67,9 @@ class ImplicitronDataSource(DataSourceBase): # pyre-ignore[13]
dataloaders = self.data_loader_map_provider.get_data_loader_map(datasets)
return datasets, dataloaders
def get_task(self) -> Task:
return self.dataset_map_provider.get_task()
@property
def all_train_cameras(self) -> Optional[CamerasBase]:
if self._all_train_cameras_cache is None: # pyre-ignore[16]

View File

@@ -7,6 +7,7 @@
import logging
import os
from dataclasses import dataclass
from enum import Enum
from typing import Iterator, Optional
from iopath.common.file_io import PathManager
@@ -52,6 +53,11 @@ class DatasetMap:
yield self.test
class Task(Enum):
SINGLE_SEQUENCE = "singlesequence"
MULTI_SEQUENCE = "multisequence"
class DatasetMapProviderBase(ReplaceableBase):
"""
Base class for a provider of training / validation and testing
@@ -65,6 +71,9 @@ class DatasetMapProviderBase(ReplaceableBase):
"""
raise NotImplementedError()
def get_task(self) -> Task:
raise NotImplementedError()
def get_all_train_cameras(self) -> Optional[CamerasBase]:
"""
If the data is all for a single scene, returns a list

View File

@@ -24,7 +24,7 @@ from typing import (
Sequence,
Tuple,
Type,
TYPE_CHECKING,
TypedDict,
Union,
)
@@ -45,15 +45,9 @@ from .utils import is_known_frame_scalar
logger = logging.getLogger(__name__)
if TYPE_CHECKING:
from typing import TypedDict
class FrameAnnotsEntry(TypedDict):
subset: Optional[str]
frame_annotation: types.FrameAnnotation
else:
FrameAnnotsEntry = dict
class FrameAnnotsEntry(TypedDict):
subset: Optional[str]
frame_annotation: types.FrameAnnotation
@registry.register
@@ -118,11 +112,6 @@ class JsonIndexDataset(DatasetBase, ReplaceableBase):
eval_batches: A list of batches that form the evaluation set;
list of batch-sized lists of indices corresponding to __getitem__
of this class, thus it can be used directly as a batch sampler.
eval_batch_index:
( Optional[List[List[Union[Tuple[str, int, str], Tuple[str, int]]]] )
A list of batches of frames described as (sequence_name, frame_idx)
that can form the evaluation set, `eval_batches` will be set from this.
"""
frame_annotations_type: ClassVar[
@@ -158,7 +147,6 @@ class JsonIndexDataset(DatasetBase, ReplaceableBase):
seed: int = 0
sort_frames: bool = False
eval_batches: Any = None
eval_batch_index: Any = None
# frame_annots: List[FrameAnnotsEntry] = field(init=False)
# seq_annots: Dict[str, types.SequenceAnnotation] = field(init=False)
@@ -171,22 +159,8 @@ class JsonIndexDataset(DatasetBase, ReplaceableBase):
self._sort_frames()
self._load_subset_lists()
self._filter_db() # also computes sequence indices
self._extract_and_set_eval_batches()
logger.info(str(self))
def _extract_and_set_eval_batches(self):
"""
Sets eval_batches based on input eval_batch_index.
"""
if self.eval_batch_index is not None:
if self.eval_batches is not None:
raise ValueError(
"Cannot define both eval_batch_index and eval_batches."
)
self.eval_batches = self.seq_frame_index_to_dataset_index(
self.eval_batch_index
)
def is_filtered(self):
"""
Returns `True` in case the dataset has been filtered and thus some frame annotations

View File

@@ -9,7 +9,7 @@ import json
import os
from typing import Dict, List, Optional, Tuple, Type
from omegaconf import DictConfig
from omegaconf import DictConfig, open_dict
from pytorch3d.implicitron.tools.config import (
expand_args_fields,
registry,
@@ -17,7 +17,12 @@ from pytorch3d.implicitron.tools.config import (
)
from pytorch3d.renderer.cameras import CamerasBase
from .dataset_map_provider import DatasetMap, DatasetMapProviderBase, PathManagerFactory
from .dataset_map_provider import (
DatasetMap,
DatasetMapProviderBase,
PathManagerFactory,
Task,
)
from .json_index_dataset import JsonIndexDataset
from .utils import (
@@ -52,7 +57,6 @@ _CO3D_DATASET_ROOT: str = os.getenv("CO3D_DATASET_ROOT", "")
_NEED_CONTROL: Tuple[str, ...] = (
"dataset_root",
"eval_batches",
"eval_batch_index",
"n_frames_per_sequence",
"path_manager",
"pick_sequence",
@@ -113,8 +117,9 @@ class JsonIndexDatasetMapProvider(DatasetMapProviderBase): # pyre-ignore [13]
Called by get_default_args(JsonIndexDatasetMapProvider) to
not expose certain fields of each dataset class.
"""
for key in _NEED_CONTROL:
del args[key]
with open_dict(args):
for key in _NEED_CONTROL:
del args[key]
def create_dataset(self):
"""
@@ -154,7 +159,7 @@ class JsonIndexDatasetMapProvider(DatasetMapProviderBase): # pyre-ignore [13]
# This maps the common names of the dataset subsets ("train"/"val"/"test")
# to the names of the subsets in the CO3D dataset.
set_names_mapping = _get_co3d_set_names_mapping(
self.task_str,
self.get_task(),
self.test_on_train,
self.only_test_set,
)
@@ -179,7 +184,7 @@ class JsonIndexDatasetMapProvider(DatasetMapProviderBase): # pyre-ignore [13]
eval_batch_index = json.load(f)
restrict_sequence_name = self.restrict_sequence_name
if self.task_str == "singlesequence":
if self.get_task() == Task.SINGLE_SEQUENCE:
if (
self.test_restrict_sequence_id is None
or self.test_restrict_sequence_id < 0
@@ -207,10 +212,6 @@ class JsonIndexDatasetMapProvider(DatasetMapProviderBase): # pyre-ignore [13]
]
# overwrite the restrict_sequence_name
restrict_sequence_name = [eval_sequence_name]
if len(restrict_sequence_name) > 0:
eval_batch_index = [
b for b in eval_batch_index if b[0][0] in restrict_sequence_name
]
dataset_type: Type[JsonIndexDataset] = registry.get(
JsonIndexDataset, self.dataset_class_type
@@ -238,9 +239,15 @@ class JsonIndexDatasetMapProvider(DatasetMapProviderBase): # pyre-ignore [13]
n_frames_per_sequence=-1,
subsets=set_names_mapping["test"],
pick_sequence=restrict_sequence_name,
eval_batch_index=eval_batch_index,
**common_kwargs,
)
if len(restrict_sequence_name) > 0:
eval_batch_index = [
b for b in eval_batch_index if b[0][0] in restrict_sequence_name
]
test_dataset.eval_batches = test_dataset.seq_frame_index_to_dataset_index(
eval_batch_index
)
dataset_map = DatasetMap(
train=train_dataset, val=val_dataset, test=test_dataset
)
@@ -261,11 +268,12 @@ class JsonIndexDatasetMapProvider(DatasetMapProviderBase): # pyre-ignore [13]
# pyre-ignore[16]
return self.dataset_map
def get_all_train_cameras(self) -> Optional[CamerasBase]:
if self.task_str == "multisequence":
return None
def get_task(self) -> Task:
return Task(self.task_str)
assert self.task_str == "singlesequence"
def get_all_train_cameras(self) -> Optional[CamerasBase]:
if Task(self.task_str) == Task.MULTI_SEQUENCE:
return None
# pyre-ignore[16]
train_dataset = self.dataset_map.train
@@ -274,7 +282,7 @@ class JsonIndexDatasetMapProvider(DatasetMapProviderBase): # pyre-ignore [13]
def _get_co3d_set_names_mapping(
task_str: str,
task: Task,
test_on_train: bool,
only_test: bool,
) -> Dict[str, List[str]]:
@@ -288,7 +296,7 @@ def _get_co3d_set_names_mapping(
- val (if not test_on_train)
- test (if not test_on_train)
"""
single_seq = task_str == "singlesequence"
single_seq = task == Task.SINGLE_SEQUENCE
if only_test:
set_names_mapping = {}

View File

@@ -9,13 +9,13 @@ import json
import logging
import os
import warnings
from typing import Dict, List, Optional, Tuple, Type
from typing import Dict, List, Optional, Type
from omegaconf import DictConfig
from pytorch3d.implicitron.dataset.dataset_map_provider import (
DatasetMap,
DatasetMapProviderBase,
PathManagerFactory,
Task,
)
from pytorch3d.implicitron.dataset.json_index_dataset import JsonIndexDataset
from pytorch3d.implicitron.tools.config import (
@@ -29,19 +29,6 @@ from pytorch3d.renderer.cameras import CamerasBase
_CO3DV2_DATASET_ROOT: str = os.getenv("CO3DV2_DATASET_ROOT", "")
# _NEED_CONTROL is a list of those elements of JsonIndexDataset which
# are not directly specified for it in the config but come from the
# DatasetMapProvider.
_NEED_CONTROL: Tuple[str, ...] = (
"dataset_root",
"eval_batches",
"eval_batch_index",
"path_manager",
"subsets",
"frame_annotations_file",
"sequence_annotations_file",
"subset_lists_file",
)
logger = logging.getLogger(__name__)
@@ -189,20 +176,6 @@ class JsonIndexDatasetMapProviderV2(DatasetMapProviderBase): # pyre-ignore [13]
path_manager = self.path_manager_factory.get()
if path_manager is not None:
path_managed_frame_file = path_manager.get_local_path(frame_file)
else:
path_managed_frame_file = frame_file
if not os.path.isfile(path_managed_frame_file):
# The frame_file does not exist.
# Most probably the user has not specified the root folder.
raise ValueError(
f"Looking for frame annotations in {path_managed_frame_file}."
+ " Please specify a correct dataset_root folder."
+ " Note: By default the root folder is taken from the"
+ " CO3DV2_DATASET_ROOT environment variable."
)
# setup the common dataset arguments
common_dataset_kwargs = getattr(self, f"dataset_{self.dataset_class_type}_args")
common_dataset_kwargs = {
@@ -296,15 +269,6 @@ class JsonIndexDatasetMapProviderV2(DatasetMapProviderBase): # pyre-ignore [13]
train=train_dataset, val=val_dataset, test=test_dataset
)
@classmethod
def dataset_tweak_args(cls, type, args: DictConfig) -> None:
"""
Called by get_default_args(JsonIndexDatasetMapProviderV2) to
not expose certain fields of each dataset class.
"""
for key in _NEED_CONTROL:
del args[key]
def create_dataset(self):
# The dataset object is created inside `self.get_dataset_map`
pass
@@ -335,6 +299,12 @@ class JsonIndexDatasetMapProviderV2(DatasetMapProviderBase): # pyre-ignore [13]
)
return category_to_subset_name_list
def get_task(self) -> Task: # TODO: we plan to get rid of tasks
return {
"manyview": Task.SINGLE_SEQUENCE,
"fewview": Task.MULTI_SEQUENCE,
}[self.subset_name.split("_")[0]]
def get_all_train_cameras(self) -> Optional[CamerasBase]:
# pyre-ignore[16]
train_dataset = self.dataset_map.train

View File

@@ -32,21 +32,17 @@ class LlffDatasetMapProvider(SingleSceneDatasetMapProviderBase):
and test datasets, and this many random training frames are added to
each test batch. If not set, test batches each contain just a single
testing frame.
downscale_factor: determines image sizes.
"""
downscale_factor: int = 4
def _load_data(self) -> None:
path_manager = self.path_manager_factory.get()
images, poses, _ = load_llff_data(
self.base_dir, factor=self.downscale_factor, path_manager=path_manager
self.base_dir, factor=8, path_manager=path_manager
)
hwf = poses[0, :3, -1]
poses = poses[:, :3, :4]
llffhold = 8
i_test = np.arange(images.shape[0])[::llffhold]
i_test = np.arange(images.shape[0])[::8]
i_test_index = set(i_test.tolist())
i_train = np.array(
[i for i in np.arange(images.shape[0]) if i not in i_test_index]

View File

@@ -294,7 +294,7 @@ def _local_path(path_manager, path):
def _ls(path_manager, path):
if path_manager is None:
return os.listdir(path)
return os.path.listdir(path)
return path_manager.ls(path)

View File

@@ -1,211 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
from os.path import dirname, join, realpath
from typing import Optional, Tuple
import torch
from pytorch3d.implicitron.tools.config import (
expand_args_fields,
registry,
run_auto_creation,
)
from pytorch3d.io import IO
from pytorch3d.renderer import (
AmbientLights,
BlendParams,
CamerasBase,
FoVPerspectiveCameras,
HardPhongShader,
look_at_view_transform,
MeshRasterizer,
MeshRendererWithFragments,
PointLights,
RasterizationSettings,
)
from pytorch3d.structures.meshes import Meshes
from .dataset_map_provider import DatasetMap, DatasetMapProviderBase, PathManagerFactory
from .single_sequence_dataset import SingleSceneDataset
from .utils import DATASET_TYPE_KNOWN
@registry.register
class RenderedMeshDatasetMapProvider(DatasetMapProviderBase): # pyre-ignore [13]
"""
A simple single-scene dataset based on PyTorch3D renders of a mesh.
Provides `num_views` renders of the mesh as train, with no val
and test. The renders are generated from viewpoints sampled at uniformly
distributed azimuth intervals. The elevation is kept constant so that the
camera's vertical position coincides with the equator.
By default, uses Keenan Crane's cow model, and the camera locations are
set to make sense for that.
Although the rendering used to generate this dataset will use a GPU
if one is available, the data it produces is on the CPU just like
the data returned by implicitron's other dataset map providers.
This is because both datasets and models can be large, so implicitron's
GenericModel.forward (etc) expects data on the CPU and only moves
what it needs to the device.
For a more detailed explanation of this code, please refer to the
docs/tutorials/fit_textured_mesh.ipynb notebook.
Members:
num_views: The number of generated renders.
data_file: The folder that contains the mesh file. By default, finds
the cow mesh in the same repo as this code.
azimuth_range: number of degrees on each side of the start position to
take samples
resolution: the common height and width of the output images.
use_point_light: whether to use a particular point light as opposed
to ambient white.
"""
num_views: int = 40
data_file: Optional[str] = None
azimuth_range: float = 180
resolution: int = 128
use_point_light: bool = True
path_manager_factory: PathManagerFactory
path_manager_factory_class_type: str = "PathManagerFactory"
def get_dataset_map(self) -> DatasetMap:
# pyre-ignore[16]
return DatasetMap(train=self.train_dataset, val=None, test=None)
def get_all_train_cameras(self) -> CamerasBase:
# pyre-ignore[16]
return self.poses
def __post_init__(self) -> None:
super().__init__()
run_auto_creation(self)
if torch.cuda.is_available():
device = torch.device("cuda:0")
else:
device = torch.device("cpu")
if self.data_file is None:
data_file = join(
dirname(dirname(dirname(dirname(realpath(__file__))))),
"docs",
"tutorials",
"data",
"cow_mesh",
"cow.obj",
)
else:
data_file = self.data_file
io = IO(path_manager=self.path_manager_factory.get())
mesh = io.load_mesh(data_file, device=device)
poses, images, masks = _generate_cow_renders(
num_views=self.num_views,
mesh=mesh,
azimuth_range=self.azimuth_range,
resolution=self.resolution,
device=device,
use_point_light=self.use_point_light,
)
# pyre-ignore[16]
self.poses = poses.cpu()
expand_args_fields(SingleSceneDataset)
# pyre-ignore[16]
self.train_dataset = SingleSceneDataset( # pyre-ignore[28]
object_name="cow",
images=list(images.permute(0, 3, 1, 2).cpu()),
fg_probabilities=list(masks[:, None].cpu()),
poses=[self.poses[i] for i in range(len(poses))],
frame_types=[DATASET_TYPE_KNOWN] * len(poses),
eval_batches=None,
)
@torch.no_grad()
def _generate_cow_renders(
*,
num_views: int,
mesh: Meshes,
azimuth_range: float,
resolution: int,
device: torch.device,
use_point_light: bool,
) -> Tuple[CamerasBase, torch.Tensor, torch.Tensor]:
"""
Returns:
cameras: A batch of `num_views` `FoVPerspectiveCameras` from which the
images are rendered.
images: A tensor of shape `(num_views, height, width, 3)` containing
the rendered images.
silhouettes: A tensor of shape `(num_views, height, width)` containing
the rendered silhouettes.
"""
# Load obj file
# We scale normalize and center the target mesh to fit in a sphere of radius 1
# centered at (0,0,0). (scale, center) will be used to bring the predicted mesh
# to its original center and scale. Note that normalizing the target mesh,
# speeds up the optimization but is not necessary!
verts = mesh.verts_packed()
N = verts.shape[0]
center = verts.mean(0)
scale = max((verts - center).abs().max(0)[0])
mesh.offset_verts_(-(center.expand(N, 3)))
mesh.scale_verts_((1.0 / float(scale)))
# Get a batch of viewing angles.
elev = torch.linspace(0, 0, num_views) # keep constant
azim = torch.linspace(-azimuth_range, azimuth_range, num_views) + 180.0
# Place a point light in front of the object. As mentioned above, the front of
# the cow is facing the -z direction.
if use_point_light:
lights = PointLights(device=device, location=[[0.0, 0.0, -3.0]])
else:
lights = AmbientLights(device=device)
# Initialize an OpenGL perspective camera that represents a batch of different
# viewing angles. All the cameras helper methods support mixed type inputs and
# broadcasting. So we can view the camera from the a distance of dist=2.7, and
# then specify elevation and azimuth angles for each viewpoint as tensors.
R, T = look_at_view_transform(dist=2.7, elev=elev, azim=azim)
cameras = FoVPerspectiveCameras(device=device, R=R, T=T)
# Define the settings for rasterization and shading.
# As we are rendering images for visualization
# purposes only we will set faces_per_pixel=1 and blur_radius=0.0. Refer to
# rasterize_meshes.py for explanations of these parameters. We also leave
# bin_size and max_faces_per_bin to their default values of None, which sets
# their values using heuristics and ensures that the faster coarse-to-fine
# rasterization method is used. Refer to docs/notes/renderer.md for an
# explanation of the difference between naive and coarse-to-fine rasterization.
raster_settings = RasterizationSettings(
image_size=resolution, blur_radius=0.0, faces_per_pixel=1
)
# Create a Phong renderer by composing a rasterizer and a shader. The textured
# Phong shader will interpolate the texture uv coordinates for each vertex,
# sample from a texture image and apply the Phong lighting model
blend_params = BlendParams(sigma=1e-4, gamma=1e-4, background_color=(0.0, 0.0, 0.0))
rasterizer_type = MeshRasterizer
renderer = MeshRendererWithFragments(
rasterizer=rasterizer_type(cameras=cameras, raster_settings=raster_settings),
shader=HardPhongShader(
device=device, cameras=cameras, lights=lights, blend_params=blend_params
),
)
# Create a batch of meshes by repeating the cow mesh and associated textures.
# Meshes has a useful `extend` method which allows us do this very easily.
# This also extends the textures.
meshes = mesh.extend(num_views)
# Render the cow mesh from each viewing angle
target_images, fragments = renderer(meshes, cameras=cameras, lights=lights)
silhouette_binary = (fragments.pix_to_face[..., 0] >= 0).float()
return cameras, target_images[..., :3], silhouette_binary

View File

@@ -21,13 +21,17 @@ from pytorch3d.implicitron.tools.config import (
from pytorch3d.renderer import CamerasBase, join_cameras_as_batch, PerspectiveCameras
from .dataset_base import DatasetBase, FrameData
from .dataset_map_provider import DatasetMap, DatasetMapProviderBase, PathManagerFactory
from .dataset_map_provider import (
DatasetMap,
DatasetMapProviderBase,
PathManagerFactory,
Task,
)
from .utils import DATASET_TYPE_KNOWN, DATASET_TYPE_UNKNOWN
_SINGLE_SEQUENCE_NAME: str = "one_sequence"
@expand_args_fields
class SingleSceneDataset(DatasetBase, Configurable):
"""
A dataset from images from a single scene.
@@ -111,6 +115,7 @@ class SingleSceneDatasetMapProviderBase(DatasetMapProviderBase):
def _get_dataset(
self, split_idx: int, frame_type: str, set_eval_batches: bool = False
) -> SingleSceneDataset:
expand_args_fields(SingleSceneDataset)
# pyre-ignore[16]
split = self.i_split[split_idx]
frame_types = [frame_type] * len(split)
@@ -154,6 +159,9 @@ class SingleSceneDatasetMapProviderBase(DatasetMapProviderBase):
test=self._get_dataset(2, DATASET_TYPE_UNKNOWN, True),
)
def get_task(self) -> Task:
return Task.SINGLE_SEQUENCE
def get_all_train_cameras(self) -> Optional[CamerasBase]:
# pyre-ignore[16]
cameras = [self.poses[i] for i in self.i_split[0]]

View File

@@ -7,12 +7,11 @@
import dataclasses
import os
from enum import Enum
from typing import Any, cast, Dict, List, Optional, Tuple
import lpips
import torch
from pytorch3d.implicitron.dataset.data_source import ImplicitronDataSource
from pytorch3d.implicitron.dataset.data_source import ImplicitronDataSource, Task
from pytorch3d.implicitron.dataset.json_index_dataset import JsonIndexDataset
from pytorch3d.implicitron.dataset.json_index_dataset_map_provider import (
CO3D_CATEGORIES,
@@ -28,11 +27,6 @@ from pytorch3d.implicitron.tools.utils import dataclass_to_cuda_
from tqdm import tqdm
class Task(Enum):
SINGLE_SEQUENCE = "singlesequence"
MULTI_SEQUENCE = "multisequence"
def main() -> None:
"""
Evaluates new view synthesis metrics of a simple depth-based image rendering
@@ -159,15 +153,11 @@ def evaluate_dbir_for_category(
if task == Task.SINGLE_SEQUENCE:
camera_difficulty_bin_breaks = 0.97, 0.98
multisequence_evaluation = False
else:
camera_difficulty_bin_breaks = 2.0 / 3, 5.0 / 6
multisequence_evaluation = True
category_result_flat, category_result = summarize_nvs_eval_results(
per_batch_eval_results,
camera_difficulty_bin_breaks=camera_difficulty_bin_breaks,
is_multisequence=multisequence_evaluation,
per_batch_eval_results, task, camera_difficulty_bin_breaks
)
return category_result["results"]

View File

@@ -14,6 +14,7 @@ from typing import Any, Dict, List, Optional, Sequence, Tuple, Union
import numpy as np
import torch
import torch.nn.functional as F
from pytorch3d.implicitron.dataset.data_source import Task
from pytorch3d.implicitron.dataset.dataset_base import FrameData
from pytorch3d.implicitron.dataset.utils import is_known_frame, is_train_frame
from pytorch3d.implicitron.models.base_model import ImplicitronRender
@@ -242,26 +243,10 @@ def eval_batch(
if frame_data.depth_map is None or frame_data.depth_map.sum() <= 0:
warnings.warn("Empty or missing depth map in evaluation!")
if frame_data.mask_crop is None:
warnings.warn("mask_crop is None, assuming the whole image is valid.")
if frame_data.fg_probability is None:
warnings.warn("fg_probability is None, assuming the whole image is fg.")
# threshold the masks to make ground truth binary masks
mask_fg = (
frame_data.fg_probability >= mask_thr
if frame_data.fg_probability is not None
# pyre-ignore [16]
else torch.ones_like(frame_data.image_rgb[:, :1, ...]).bool()
)
mask_crop = (
frame_data.mask_crop
if frame_data.mask_crop is not None
else torch.ones_like(mask_fg)
)
mask_fg, mask_crop = [
(getattr(frame_data, k) >= mask_thr) for k in ("fg_probability", "mask_crop")
]
image_rgb_masked = mask_background(
# pyre-fixme[6]: Expected `Tensor` for 1st param but got
# `Optional[torch.Tensor]`.
@@ -281,6 +266,7 @@ def eval_batch(
# pyre-fixme[6]: Expected `Tensor` for 4th param but got
# `Optional[torch.Tensor]`.
depth_map=frame_data.depth_map,
# pyre-fixme[16]: `Optional` has no attribute `__getitem__`.
depth_mask=frame_data.depth_mask[:1],
visdom_env=visualize_visdom_env,
)
@@ -312,7 +298,7 @@ def eval_batch(
results[metric_name].item(), metric_name, loss_mask_now
)
if name_postfix == "_fg" and frame_data.depth_map is not None:
if name_postfix == "_fg":
# only record depth metrics for the foreground
_, abs_ = eval_depth(
cloned_render["depth_render"],
@@ -328,7 +314,9 @@ def eval_batch(
if visualize:
visualizer.show_depth(abs_.mean().item(), name_postfix, loss_mask_now)
if break_after_visualising:
breakpoint() # noqa: B601
import pdb # noqa: B602
pdb.set_trace()
if lpips_model is not None:
im1, im2 = [
@@ -432,16 +420,16 @@ def _get_camera_difficulty_bin_edges(camera_difficulty_bin_breaks: Tuple[float,
def summarize_nvs_eval_results(
per_batch_eval_results: List[Dict[str, Any]],
is_multisequence: bool,
camera_difficulty_bin_breaks: Tuple[float, float],
task: Task,
camera_difficulty_bin_breaks: Tuple[float, float] = (0.97, 0.98),
):
"""
Compile the per-batch evaluation results `per_batch_eval_results` into
a set of aggregate metrics. The produced metrics depend on is_multisequence.
a set of aggregate metrics. The produced metrics depend on the task.
Args:
per_batch_eval_results: Metrics of each per-batch evaluation.
is_multisequence: Whether to evaluate as a multisequence task
task: The type of the new-view synthesis task.
camera_difficulty_bin_breaks: edge hard-medium and medium-easy
@@ -451,9 +439,14 @@ def summarize_nvs_eval_results(
"""
n_batches = len(per_batch_eval_results)
eval_sets: List[Optional[str]] = []
eval_sets = [None]
if is_multisequence:
if task == Task.SINGLE_SEQUENCE:
eval_sets = [None]
# assert n_batches==100
elif task == Task.MULTI_SEQUENCE:
eval_sets = ["train", "test"]
# assert n_batches==1000
else:
raise ValueError(task)
batch_sizes = torch.tensor(
[r["meta"]["batch_size"] for r in per_batch_eval_results]
).long()
@@ -473,9 +466,11 @@ def summarize_nvs_eval_results(
# add per set averages
for SET in eval_sets:
if SET is None:
assert task == Task.SINGLE_SEQUENCE
ok_set = torch.ones(n_batches, dtype=torch.bool)
set_name = "test"
else:
assert task == Task.MULTI_SEQUENCE
ok_set = is_train == int(SET == "train")
set_name = SET
@@ -500,7 +495,7 @@ def summarize_nvs_eval_results(
}
)
if is_multisequence:
if task == Task.MULTI_SEQUENCE:
# split based on n_src_views
n_src_views = batch_sizes - 1
for n_src in EVAL_N_SRC_VIEWS:

View File

@@ -1,164 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
import copy
import json
import logging
import os
from typing import Any, Dict, List, Optional, Tuple
import lpips
import torch
import tqdm
from pytorch3d.implicitron.dataset import utils as ds_utils
from pytorch3d.implicitron.evaluation import evaluate_new_view_synthesis as evaluate
from pytorch3d.implicitron.models.base_model import EvaluationMode, ImplicitronModelBase
from pytorch3d.implicitron.tools.config import (
registry,
ReplaceableBase,
run_auto_creation,
)
from pytorch3d.renderer.cameras import CamerasBase
from torch.utils.data import DataLoader
logger = logging.getLogger(__name__)
class EvaluatorBase(ReplaceableBase):
"""
Evaluate a trained model on given data. Returns a dict of loss/objective
names and their values.
"""
def run(
self, model: ImplicitronModelBase, dataloader: DataLoader, **kwargs
) -> Dict[str, Any]:
"""
Evaluate the results of Implicitron training.
"""
raise NotImplementedError()
@registry.register
class ImplicitronEvaluator(EvaluatorBase):
"""
Evaluate the results of Implicitron training.
Members:
camera_difficulty_bin_breaks: low/medium vals to divide camera difficulties into
[0-eps, low, medium, 1+eps].
"""
camera_difficulty_bin_breaks: Tuple[float, ...] = 0.97, 0.98
is_multisequence: bool = False
def __post_init__(self):
run_auto_creation(self)
def run(
self,
model: ImplicitronModelBase,
dataloader: DataLoader,
all_train_cameras: Optional[CamerasBase],
device: torch.device,
dump_to_json: bool = False,
exp_dir: Optional[str] = None,
epoch: Optional[int] = None,
**kwargs,
) -> Dict[str, Any]:
"""
Evaluate the results of Implicitron training. Optionally, dump results to
exp_dir/results_test.json.
Args:
model: A (trained) model to evaluate.
dataloader: A test dataloader.
all_train_cameras: Camera instances we used for training.
device: A torch device.
dump_to_json: If True, will dump the results to a json file.
exp_dir: Root expeirment directory.
epoch: Evaluation epoch (to be stored in the results dict).
Returns:
A dictionary of results.
"""
lpips_model = lpips.LPIPS(net="vgg")
lpips_model = lpips_model.to(device)
model.eval()
per_batch_eval_results = []
logger.info("Evaluating model ...")
for frame_data in tqdm.tqdm(dataloader):
frame_data = frame_data.to(device)
# mask out the unknown images so that the model does not see them
frame_data_for_eval = _get_eval_frame_data(frame_data)
with torch.no_grad():
preds = model(
**{
**frame_data_for_eval,
"evaluation_mode": EvaluationMode.EVALUATION,
}
)
implicitron_render = copy.deepcopy(preds["implicitron_render"])
per_batch_eval_results.append(
evaluate.eval_batch(
frame_data,
implicitron_render,
bg_color="black",
lpips_model=lpips_model,
source_cameras=( # None will make it use batchs known cameras
None if self.is_multisequence else all_train_cameras
),
)
)
_, category_result = evaluate.summarize_nvs_eval_results(
per_batch_eval_results,
self.is_multisequence,
self.camera_difficulty_bin_breaks,
)
results = category_result["results"]
evaluate.pretty_print_nvs_metrics(results)
if dump_to_json:
_dump_to_json(epoch, exp_dir, results)
return category_result["results"]
def _dump_to_json(
epoch: Optional[int], exp_dir: Optional[str], results: List[Dict[str, Any]]
) -> None:
if epoch is not None:
for r in results:
r["eval_epoch"] = int(epoch)
logger.info("Evaluation results")
if exp_dir is None:
raise ValueError("Cannot save results to json without a specified save path.")
with open(os.path.join(exp_dir, "results_test.json"), "w") as f:
json.dump(results, f)
def _get_eval_frame_data(frame_data: Any) -> Any:
"""
Masks the unknown image data to make sure we cannot use it at model evaluation time.
"""
frame_data_for_eval = copy.deepcopy(frame_data)
is_known = ds_utils.is_known_frame(frame_data.frame_type).type_as(
frame_data.image_rgb
)[:, None, None, None]
for k in ("image_rgb", "depth_map", "fg_probability", "mask_crop"):
value = getattr(frame_data_for_eval, k)
value_masked = value.clone() * is_known if value is not None else None
setattr(frame_data_for_eval, k, value_masked)
return frame_data_for_eval

View File

@@ -4,7 +4,7 @@
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
from dataclasses import dataclass, field
from dataclasses import dataclass
from typing import Any, Dict, List, Optional
import torch
@@ -37,18 +37,12 @@ class ImplicitronRender:
)
class ImplicitronModelBase(ReplaceableBase, torch.nn.Module):
class ImplicitronModelBase(ReplaceableBase):
"""
Replaceable abstract base for all image generation / rendering models.
`forward()` method produces a render with a depth map. Derives from Module
so we can rely on basic functionality provided to torch for model
optimization.
`forward()` method produces a render with a depth map.
"""
# The keys from `preds` (output of ImplicitronModelBase.forward) to be logged in
# the training loop.
log_vars: List[str] = field(default_factory=lambda: ["objective"])
def __init__(self) -> None:
super().__init__()

View File

@@ -16,10 +16,10 @@ from typing import Any, Dict, List, Optional, Tuple, Union
import torch
import tqdm
from omegaconf import DictConfig
from pytorch3d.common.compat import prod
from pytorch3d.implicitron.models.metrics import (
from pytorch3d.implicitron.models.metrics import ( # noqa
RegularizationMetrics,
RegularizationMetricsBase,
ViewMetrics,
ViewMetricsBase,
)
from pytorch3d.implicitron.tools import image_utils, vis_utils
@@ -29,7 +29,7 @@ from pytorch3d.implicitron.tools.config import (
run_auto_creation,
)
from pytorch3d.implicitron.tools.rasterize_mc import rasterize_mc_samples
from pytorch3d.implicitron.tools.utils import cat_dataclass
from pytorch3d.implicitron.tools.utils import cat_dataclass, setattr_if_hasattr
from pytorch3d.renderer import RayBundle, utils as rend_utils
from pytorch3d.renderer.cameras import CamerasBase
from visdom import Visdom
@@ -67,7 +67,7 @@ logger = logging.getLogger(__name__)
@registry.register
class GenericModel(ImplicitronModelBase): # pyre-ignore: 13
class GenericModel(ImplicitronModelBase, torch.nn.Module): # pyre-ignore: 13
"""
GenericModel is a wrapper for the neural implicit
rendering and reconstruction pipeline which consists
@@ -148,9 +148,7 @@ class GenericModel(ImplicitronModelBase): # pyre-ignore: 13
thresholded by this value before being applied to the RGB/Depth images
output_rasterized_mc: If True, visualize the Monte-Carlo pixel renders by
splatting onto an image grid. Default: False.
bg_color: RGB values for setting the background color of input image
if mask_images=True. Defaults to (0.0, 0.0, 0.0). Each renderer has its own
way to determine the background color of its output, unrelated to this.
bg_color: RGB values for the background color. Default (0.0, 0.0, 0.0)
num_passes: The specified implicit_function is initialized num_passes
times and run sequentially.
chunk_size_grid: The total number of points which can be rendered
@@ -535,7 +533,6 @@ class GenericModel(ImplicitronModelBase): # pyre-ignore: 13
return None
loss = sum(losses_weighted)
assert torch.is_tensor(loss)
# pyre-fixme[7]: Expected `Optional[Tensor]` but got `int`.
return loss
def visualize(
@@ -620,57 +617,51 @@ class GenericModel(ImplicitronModelBase): # pyre-ignore: 13
self.image_feature_extractor.get_feat_dims()
)
@classmethod
def raysampler_tweak_args(cls, type, args: DictConfig) -> None:
"""
We don't expose certain fields of the raysampler because we want to set
them from our own members.
"""
del args["sampling_mode_training"]
del args["sampling_mode_evaluation"]
del args["image_width"]
del args["image_height"]
def create_raysampler(self):
extra_args = {
"sampling_mode_training": self.sampling_mode_training,
"sampling_mode_evaluation": self.sampling_mode_evaluation,
"image_width": self.render_image_width,
"image_height": self.render_image_height,
}
raysampler_args = getattr(
self, "raysampler_" + self.raysampler_class_type + "_args"
)
setattr_if_hasattr(
raysampler_args, "sampling_mode_training", self.sampling_mode_training
)
setattr_if_hasattr(
raysampler_args, "sampling_mode_evaluation", self.sampling_mode_evaluation
)
setattr_if_hasattr(raysampler_args, "image_width", self.render_image_width)
setattr_if_hasattr(raysampler_args, "image_height", self.render_image_height)
self.raysampler = registry.get(RaySamplerBase, self.raysampler_class_type)(
**raysampler_args, **extra_args
**raysampler_args
)
@classmethod
def renderer_tweak_args(cls, type, args: DictConfig) -> None:
"""
We don't expose certain fields of the renderer because we want to set
them based on other inputs.
"""
args.pop("render_features_dimensions", None)
args.pop("object_bounding_sphere", None)
def create_renderer(self):
extra_args = {}
raysampler_args = getattr(
self, "raysampler_" + self.raysampler_class_type + "_args"
)
self.renderer_MultiPassEmissionAbsorptionRenderer_args[
"stratified_sampling_coarse_training"
] = raysampler_args["stratified_point_sampling_training"]
self.renderer_MultiPassEmissionAbsorptionRenderer_args[
"stratified_sampling_coarse_evaluation"
] = raysampler_args["stratified_point_sampling_evaluation"]
self.renderer_SignedDistanceFunctionRenderer_args[
"render_features_dimensions"
] = self.render_features_dimensions
if self.renderer_class_type == "SignedDistanceFunctionRenderer":
extra_args["render_features_dimensions"] = self.render_features_dimensions
if not hasattr(self.raysampler, "scene_extent"):
if "scene_extent" not in raysampler_args:
raise ValueError(
"SignedDistanceFunctionRenderer requires"
+ " a raysampler that defines the 'scene_extent' field"
+ " (this field is supported by, e.g., the adaptive raysampler - "
+ " self.raysampler_class_type='AdaptiveRaySampler')."
)
extra_args["object_bounding_sphere"] = self.raysampler.scene_extent
self.renderer_SignedDistanceFunctionRenderer_args.ray_tracer_args[
"object_bounding_sphere"
] = self.raysampler_AdaptiveRaySampler_args["scene_extent"]
renderer_args = getattr(self, "renderer_" + self.renderer_class_type + "_args")
self.renderer = registry.get(BaseRenderer, self.renderer_class_type)(
**renderer_args, **extra_args
**renderer_args
)
def create_implicit_function(self) -> None:
@@ -681,18 +672,6 @@ class GenericModel(ImplicitronModelBase): # pyre-ignore: 13
"""
pass
@classmethod
def implicit_function_tweak_args(cls, type, args: DictConfig) -> None:
"""
We don't expose certain implicit_function fields because we want to set
them based on other inputs.
"""
args.pop("feature_vector_size", None)
args.pop("encoding_dim", None)
args.pop("latent_dim", None)
args.pop("latent_dim_hypernet", None)
args.pop("color_dim", None)
def _construct_implicit_functions(self):
"""
After run_auto_creation has been called, the arguments
@@ -702,31 +681,32 @@ class GenericModel(ImplicitronModelBase): # pyre-ignore: 13
implicit function method. Then the required implicit
function(s) are initialized.
"""
extra_args = {}
if self.implicit_function_class_type in (
"NeuralRadianceFieldImplicitFunction",
"NeRFormerImplicitFunction",
):
extra_args["latent_dim"] = (
self._get_viewpooled_feature_dim()
+ self._get_global_encoder_encoding_dim()
)
extra_args["color_dim"] = self.render_features_dimensions
# nerf preprocessing
nerf_args = self.implicit_function_NeuralRadianceFieldImplicitFunction_args
nerformer_args = self.implicit_function_NeRFormerImplicitFunction_args
nerf_args["latent_dim"] = nerformer_args["latent_dim"] = (
self._get_viewpooled_feature_dim() + self._get_global_encoder_encoding_dim()
)
nerf_args["color_dim"] = nerformer_args[
"color_dim"
] = self.render_features_dimensions
if self.implicit_function_class_type == "IdrFeatureField":
extra_args["feature_vector_size"] = self.render_features_dimensions
extra_args["encoding_dim"] = self._get_global_encoder_encoding_dim()
# idr preprocessing
idr = self.implicit_function_IdrFeatureField_args
idr["feature_vector_size"] = self.render_features_dimensions
idr["encoding_dim"] = self._get_global_encoder_encoding_dim()
if self.implicit_function_class_type == "SRNImplicitFunction":
extra_args["latent_dim"] = (
self._get_viewpooled_feature_dim()
+ self._get_global_encoder_encoding_dim()
)
# srn preprocessing
srn = self.implicit_function_SRNImplicitFunction_args
srn.raymarch_function_args.latent_dim = (
self._get_viewpooled_feature_dim() + self._get_global_encoder_encoding_dim()
)
# srn_hypernet preprocessing
if self.implicit_function_class_type == "SRNHyperNetImplicitFunction":
extra_args["latent_dim"] = self._get_viewpooled_feature_dim()
extra_args["latent_dim_hypernet"] = self._get_global_encoder_encoding_dim()
srn_hypernet = self.implicit_function_SRNHyperNetImplicitFunction_args
srn_hypernet_args = srn_hypernet.hypernet_args
srn_hypernet_args.latent_dim_hypernet = self._get_global_encoder_encoding_dim()
srn_hypernet_args.latent_dim = self._get_viewpooled_feature_dim()
# check that for srn, srn_hypernet, idr we have self.num_passes=1
implicit_function_type = registry.get(
@@ -749,7 +729,7 @@ class GenericModel(ImplicitronModelBase): # pyre-ignore: 13
if config is None:
raise ValueError(f"{config_name} not present")
implicit_functions_list = [
ImplicitFunctionWrapper(implicit_function_type(**config, **extra_args))
ImplicitFunctionWrapper(implicit_function_type(**config))
for _ in range(self.num_passes)
]
return torch.nn.ModuleList(implicit_functions_list)
@@ -860,7 +840,7 @@ class GenericModel(ImplicitronModelBase): # pyre-ignore: 13
# Estimate the rasterization point radius so that we approximately fill
# the whole image given the number of rasterized points.
pt_radius = 2.0 / math.sqrt(xys.shape[1])
pt_radius = 2.0 * math.sqrt(xys.shape[1])
# Rasterize the samples.
features_depth_render, masks_render = rasterize_mc_samples(
@@ -920,7 +900,7 @@ def _chunk_generator(
f"by n_pts_per_ray ({n_pts_per_ray})"
)
n_rays = prod(spatial_dim)
n_rays = math.prod(spatial_dim)
# special handling for raytracing-based methods
n_chunks = -(-n_rays * max(n_pts_per_ray, 1) // chunk_size)
chunk_size_in_rays = -(-n_rays // n_chunks)
@@ -936,9 +916,9 @@ def _chunk_generator(
directions=ray_bundle.directions.reshape(batch_size, -1, 3)[
:, start_idx:end_idx
],
lengths=ray_bundle.lengths.reshape(batch_size, n_rays, n_pts_per_ray)[
:, start_idx:end_idx
],
lengths=ray_bundle.lengths.reshape(
batch_size, math.prod(spatial_dim), n_pts_per_ray
)[:, start_idx:end_idx],
xys=ray_bundle.xys.reshape(batch_size, -1, 2)[:, start_idx:end_idx],
)
extra_args = kwargs.copy()

View File

@@ -24,16 +24,15 @@ class Autodecoder(Configurable, torch.nn.Module):
"""
encoding_dim: int = 0
n_instances: int = 1
n_instances: int = 0
init_scale: float = 1.0
ignore_input: bool = False
def __post_init__(self):
super().__init__()
if self.n_instances <= 0:
raise ValueError(f"Invalid n_instances {self.n_instances}")
# Do not init the codes at all in case we have 0 instances.
return
self._autodecoder_codes = torch.nn.Embedding(
self.n_instances,
self.encoding_dim,
@@ -71,9 +70,13 @@ class Autodecoder(Configurable, torch.nn.Module):
return key_map
def calculate_squared_encoding_norm(self) -> Optional[torch.Tensor]:
if self.n_instances <= 0:
return None
return (self._autodecoder_codes.weight**2).mean() # pyre-ignore[16]
def get_encoding_dim(self) -> int:
if self.n_instances <= 0:
return 0
return self.encoding_dim
def forward(self, x: Union[torch.LongTensor, List[str]]) -> Optional[torch.Tensor]:
@@ -87,6 +90,9 @@ class Autodecoder(Configurable, torch.nn.Module):
codes: A tensor of shape `(N, self.encoding_dim)` containing the
key-specific autodecoder codes.
"""
if self.n_instances == 0:
return None
if self.ignore_input:
x = ["singleton"]

View File

@@ -42,13 +42,7 @@ class GlobalEncoderBase(ReplaceableBase):
"""
raise NotImplementedError()
def forward(
self,
*,
frame_timestamp: Optional[torch.Tensor] = None,
sequence_name: Optional[Union[torch.LongTensor, List[str]]] = None,
**kwargs,
) -> torch.Tensor:
def forward(self, **kwargs) -> torch.Tensor:
"""
Given a set of inputs to encode, generates a tensor containing the encoding.
@@ -76,14 +70,9 @@ class SequenceAutodecoder(GlobalEncoderBase, torch.nn.Module): # pyre-ignore: 1
return self.autodecoder.get_encoding_dim()
def forward(
self,
*,
frame_timestamp: Optional[torch.Tensor] = None,
sequence_name: Optional[Union[torch.LongTensor, List[str]]] = None,
**kwargs,
self, sequence_name: Union[torch.LongTensor, List[str]], **kwargs
) -> torch.Tensor:
if sequence_name is None:
raise ValueError("sequence_name must be provided.")
# run dtype checks and pass sequence_name to self.autodecoder
return self.autodecoder(sequence_name)
@@ -112,15 +101,7 @@ class HarmonicTimeEncoder(GlobalEncoderBase, torch.nn.Module):
def get_encoding_dim(self):
return self._harmonic_embedding.get_output_dim(1)
def forward(
self,
*,
frame_timestamp: Optional[torch.Tensor] = None,
sequence_name: Optional[Union[torch.LongTensor, List[str]]] = None,
**kwargs,
) -> torch.Tensor:
if frame_timestamp is None:
raise ValueError("frame_timestamp must be provided.")
def forward(self, frame_timestamp: torch.Tensor, **kwargs) -> torch.Tensor:
if frame_timestamp.shape[-1] != 1:
raise ValueError("Frame timestamp's last dimensions should be one.")
time = frame_timestamp / self.time_divisor

View File

@@ -4,7 +4,6 @@
from typing import Any, cast, Optional, Tuple
import torch
from omegaconf import DictConfig
from pytorch3d.common.linear_with_repeat import LinearWithRepeat
from pytorch3d.implicitron.third_party import hyperlayers, pytorch_prototyping
from pytorch3d.implicitron.tools.config import Configurable, registry, run_auto_creation
@@ -328,7 +327,6 @@ class SRNRaymarchHyperNet(Configurable, torch.nn.Module):
@registry.register
# pyre-fixme[13]: Uninitialized attribute
class SRNImplicitFunction(ImplicitFunctionBase, torch.nn.Module):
latent_dim: int = 0
raymarch_function: SRNRaymarchFunction
pixel_generator: SRNPixelGenerator
@@ -336,17 +334,6 @@ class SRNImplicitFunction(ImplicitFunctionBase, torch.nn.Module):
super().__init__()
run_auto_creation(self)
def create_raymarch_function(self) -> None:
self.raymarch_function = SRNRaymarchFunction(
latent_dim=self.latent_dim,
# pyre-ignore[32]
**self.raymarch_function_args,
)
@classmethod
def raymarch_function_tweak_args(cls, type, args: DictConfig) -> None:
args.pop("latent_dim", None)
def forward(
self,
ray_bundle: RayBundle,
@@ -384,8 +371,6 @@ class SRNHyperNetImplicitFunction(ImplicitFunctionBase, torch.nn.Module):
the cache.
"""
latent_dim_hypernet: int = 0
latent_dim: int = 0
hypernet: SRNRaymarchHyperNet
pixel_generator: SRNPixelGenerator
@@ -393,19 +378,6 @@ class SRNHyperNetImplicitFunction(ImplicitFunctionBase, torch.nn.Module):
super().__init__()
run_auto_creation(self)
def create_hypernet(self) -> None:
self.hypernet = SRNRaymarchHyperNet(
latent_dim=self.latent_dim,
latent_dim_hypernet=self.latent_dim_hypernet,
# pyre-ignore[32]
**self.hypernet_args,
)
@classmethod
def hypernet_tweak_args(cls, type, args: DictConfig) -> None:
args.pop("latent_dim", None)
args.pop("latent_dim_hypernet", None)
def forward(
self,
ray_bundle: RayBundle,

View File

@@ -4,10 +4,10 @@
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
import math
from typing import Callable, Optional
import torch
from pytorch3d.common.compat import prod
from pytorch3d.renderer.cameras import CamerasBase
@@ -52,7 +52,7 @@ def create_embeddings_for_implicit_function(
embeds = torch.empty(
bs,
1,
prod(spatial_size),
math.prod(spatial_size),
pts_per_ray,
0,
dtype=xyz_world.dtype,
@@ -62,7 +62,7 @@ def create_embeddings_for_implicit_function(
embeds = xyz_embedding_function(ray_points_for_embed).reshape(
bs,
1,
prod(spatial_size),
math.prod(spatial_size),
pts_per_ray,
-1,
) # flatten spatial, add n_src dim
@@ -73,7 +73,7 @@ def create_embeddings_for_implicit_function(
embed_shape = (
bs,
embeds_viewpooled.shape[1],
prod(spatial_size),
math.prod(spatial_size),
pts_per_ray,
-1,
)

View File

@@ -22,7 +22,7 @@ from .renderer.base import EvaluationMode
@registry.register
class ModelDBIR(ImplicitronModelBase):
class ModelDBIR(ImplicitronModelBase, torch.nn.Module):
"""
A simple depth-based image rendering model.

View File

@@ -53,12 +53,10 @@ class MultiPassEmissionAbsorptionRenderer( # pyre-ignore: 13
fine rendering pass during training.
n_pts_per_ray_fine_evaluation: The number of points sampled per ray for the
fine rendering pass during evaluation.
stratified_sampling_coarse_training: Enable/disable stratified sampling in the
refiner during training. Only matters if there are multiple implicit
functions (i.e. in GenericModel if num_passes>1).
stratified_sampling_coarse_evaluation: Enable/disable stratified sampling in
the refiner during evaluation. Only matters if there are multiple implicit
functions (i.e. in GenericModel if num_passes>1).
stratified_sampling_coarse_training: Enable/disable stratified sampling during
training.
stratified_sampling_coarse_evaluation: Enable/disable stratified sampling during
evaluation.
append_coarse_samples_to_fine: Add the fine ray points to the coarse points
after sampling.
density_noise_std_train: Standard deviation of the noise added to the

View File

@@ -218,7 +218,7 @@ class AdaptiveRaySampler(AbstractMaskRaySampler):
def _get_min_max_depth_bounds(self, cameras: CamerasBase) -> Tuple[float, float]:
"""
Returns the adaptively calculated near/far planes.
Returns the adaptivelly calculated near/far planes.
"""
min_depth, max_depth = camera_utils.get_min_max_depth_bounds(
cameras, self._scene_center, self.scene_extent

View File

@@ -3,16 +3,12 @@
# implicit_differentiable_renderer.py
# Copyright (c) 2020 Lior Yariv
import functools
import math
from typing import List, Optional, Tuple
import torch
from omegaconf import DictConfig
from pytorch3d.common.compat import prod
from pytorch3d.implicitron.tools.config import (
get_default_args_field,
registry,
run_auto_creation,
)
from pytorch3d.implicitron.tools.config import get_default_args_field, registry
from pytorch3d.implicitron.tools.utils import evaluating
from pytorch3d.renderer import RayBundle
@@ -22,10 +18,9 @@ from .rgb_net import RayNormalColoringNetwork
@registry.register
class SignedDistanceFunctionRenderer(BaseRenderer, torch.nn.Module): # pyre-ignore[13]
class SignedDistanceFunctionRenderer(BaseRenderer, torch.nn.Module):
render_features_dimensions: int = 3
object_bounding_sphere: float = 1.0
ray_tracer: RayTracing
ray_tracer_args: DictConfig = get_default_args_field(RayTracing)
ray_normal_coloring_network_args: DictConfig = get_default_args_field(
RayNormalColoringNetwork
)
@@ -42,7 +37,8 @@ class SignedDistanceFunctionRenderer(BaseRenderer, torch.nn.Module): # pyre-ign
f"Background color should have {render_features_dimensions} entries."
)
run_auto_creation(self)
self.ray_tracer = RayTracing(**self.ray_tracer_args)
self.object_bounding_sphere = self.ray_tracer_args.get("object_bounding_sphere")
self.ray_normal_coloring_network_args[
"feature_vector_size"
@@ -53,17 +49,6 @@ class SignedDistanceFunctionRenderer(BaseRenderer, torch.nn.Module): # pyre-ign
self.register_buffer("_bg_color", torch.tensor(self.bg_color), persistent=False)
@classmethod
def ray_tracer_tweak_args(cls, type, args: DictConfig) -> None:
del args["object_bounding_sphere"]
def create_ray_tracer(self) -> None:
self.ray_tracer = RayTracing(
# pyre-ignore[32]
**self.ray_tracer_args,
object_bounding_sphere=self.object_bounding_sphere,
)
def requires_object_mask(self) -> bool:
return True
@@ -105,13 +90,14 @@ class SignedDistanceFunctionRenderer(BaseRenderer, torch.nn.Module): # pyre-ign
# object_mask: silhouette of the object
batch_size, *spatial_size, _ = ray_bundle.lengths.shape
num_pixels = prod(spatial_size)
num_pixels = math.prod(spatial_size)
cam_loc = ray_bundle.origins.reshape(batch_size, -1, 3)
ray_dirs = ray_bundle.directions.reshape(batch_size, -1, 3)
object_mask = object_mask.reshape(batch_size, -1)
with torch.no_grad(), evaluating(implicit_function):
# pyre-fixme[29]: `Union[torch.Tensor, torch.nn.Module]` is not a function.
points, network_object_mask, dists = self.ray_tracer(
sdf=lambda x: implicit_function(x)[
:, 0
@@ -142,6 +128,7 @@ class SignedDistanceFunctionRenderer(BaseRenderer, torch.nn.Module): # pyre-ign
N = surface_points.shape[0]
# Sample points for the eikonal loss
# pyre-fixme[9]
eik_bounding_box: float = self.object_bounding_sphere
n_eik_points = batch_size * num_pixels // 2
eikonal_points = torch.empty(

View File

@@ -881,43 +881,7 @@ def get_default_args_field(
def create():
args = get_default_args(C, _do_not_process=_do_not_process)
if _hook is not None:
with open_dict(args):
_hook(args)
return args
return dataclasses.field(default_factory=create)
def _get_default_args_field_from_registry(
*,
base_class_wanted: Type[_X],
name: str,
_do_not_process: Tuple[type, ...] = (),
_hook: Optional[Callable[[DictConfig], None]] = None,
):
"""
Get a dataclass field which defaults to
get_default_args(registry.get(base_class_wanted, name)).
This is used internally in place of get_default_args_field in
order that default values are updated if a class is redefined.
Args:
base_class_wanted: As for registry.get.
name: As for registry.get.
_do_not_process: As for get_default_args
_hook: Function called on the result before returning.
Returns:
function to return new DictConfig object
"""
def create():
C = registry.get(base_class_wanted=base_class_wanted, name=name)
args = get_default_args(C, _do_not_process=_do_not_process)
if _hook is not None:
with open_dict(args):
_hook(args)
_hook(args)
return args
return dataclasses.field(default_factory=create)
@@ -1014,9 +978,8 @@ def _process_member(
setattr(
some_class,
args_name,
_get_default_args_field_from_registry(
base_class_wanted=type_,
name=derived_type.__name__,
get_default_args_field(
derived_type,
_do_not_process=_do_not_process + (some_class,),
_hook=hook_closed,
),

View File

@@ -74,7 +74,6 @@ class Stats(object):
"""
stats logging object useful for gathering statistics of training a deep net in pytorch
Example:
```
# init stats structure that logs statistics 'objective' and 'top1e'
stats = Stats( ('objective','top1e') )
network = init_net() # init a pytorch module (=nueral network)
@@ -95,7 +94,6 @@ class Stats(object):
# stores the training plots into '/tmp/epoch_stats.pdf'
# and plots into a visdom server running at localhost (if running)
stats.plot_stats(plot_file='/tmp/epoch_stats.pdf')
```
"""
def __init__(
@@ -198,6 +196,7 @@ class Stats(object):
if verbose:
print(f"Adding {add_log_var}")
self.log_vars.append(add_log_var)
# self.synchronize_logged_vars(self.log_vars, verbose=verbose)
def update(self, preds, time_start=None, freeze_iter=False, stat_set="train"):
@@ -229,6 +228,7 @@ class Stats(object):
elapsed = time.time() - time_start
time_per_it = float(elapsed) / float(it + 1)
val = time_per_it
# self.stats[stat_set]['sec/it'].update(time_per_it,epoch=epoch,n=1)
else:
if stat in preds:
try:
@@ -439,6 +439,7 @@ class Stats(object):
self.log_vars = log_vars # !!!
for stat_set in stat_sets:
reference_stat = list(self.stats[stat_set].keys())[0]
for stat in log_vars:
if stat not in self.stats[stat_set]:
if verbose:
@@ -465,11 +466,12 @@ class Stats(object):
lastep = self.epoch + 1
for ep in range(lastep):
self.stats[stat_set][stat].update(default_val, n=1, epoch=ep)
epoch_self = self.stats[stat_set][reference_stat].get_epoch()
epoch_generated = self.stats[stat_set][stat].get_epoch()
assert (
epoch_generated == self.epoch + 1
epoch_self == epoch_generated
), "bad epoch of synchronized log_var! %d vs %d" % (
self.epoch + 1,
epoch_self,
epoch_generated,
)

View File

@@ -157,6 +157,15 @@ def cat_dataclass(batch, tensor_collator: Callable):
return type(elem)(**collated)
def setattr_if_hasattr(obj, name, value):
"""
Same as setattr(obj, name, value), but does nothing in case `name` is
not an attribe of `obj`.
"""
if hasattr(obj, name):
setattr(obj, name, value)
class Timer:
"""
A simple class for timing execution.

View File

@@ -84,6 +84,8 @@ class VideoWriter:
or a 2-tuple defining the size of the output image.
"""
# pyre-fixme[6]: For 1st param expected `Union[PathLike[str], str]` but got
# `Optional[str]`.
outfile = os.path.join(self.cache_dir, self.regexp % self.frame_num)
if isinstance(frame, matplotlib.figure.Figure):
@@ -125,6 +127,8 @@ class VideoWriter:
video_path: The path to the generated video.
"""
# pyre-fixme[6]: For 1st param expected `Union[PathLike[str], str]` but got
# `Optional[str]`.
regexp = os.path.join(self.cache_dir, self.regexp)
if self.output_format == "visdom": # works for ppt too

View File

@@ -14,22 +14,20 @@ from visdom import Visdom
logger = logging.getLogger(__name__)
def get_visdom_env(visdom_env: str, exp_dir: str) -> str:
def get_visdom_env(cfg):
"""
Parse out visdom environment name from the input config.
Args:
visdom_env: Name of the wisdom environment, could be empty string.
exp_dir: Root experiment directory.
cfg: The global config file.
Returns:
visdom_env: The name of the visdom environment. If the given visdom_env is
empty, return the name of the bottom directory in exp_dir.
visdom_env: The name of the visdom environment.
"""
if len(visdom_env) == 0:
visdom_env = exp_dir.split("/")[-1]
if len(cfg.visdom_env) == 0:
visdom_env = cfg.exp_dir.split("/")[-1]
else:
visdom_env = visdom_env
visdom_env = cfg.visdom_env
return visdom_env

View File

@@ -215,6 +215,8 @@ def load_obj(
"""
data_dir = "./"
if isinstance(f, (str, bytes, Path)):
# pyre-fixme[6]: For 1st param expected `PathLike[Variable[AnyStr <: [str,
# bytes]]]` but got `Union[Path, bytes, str]`.
data_dir = os.path.dirname(f)
if path_manager is None:
path_manager = PathManager()

View File

@@ -65,6 +65,11 @@ from .mesh import (
TexturesVertex,
)
try:
from .opengl import EGLContext, global_device_context_store, MeshRasterizerOpenGL
except (ImportError, ModuleNotFoundError):
pass # opengl or pycuda.gl not available, or pytorch3_opengl not in TARGETS.
from .points import (
AlphaCompositor,
NormWeightedCompositor,

View File

@@ -1661,9 +1661,9 @@ def look_at_rotation(
def look_at_view_transform(
dist: _BatchFloatType = 1.0,
elev: _BatchFloatType = 0.0,
azim: _BatchFloatType = 0.0,
dist: float = 1.0,
elev: float = 0.0,
azim: float = 0.0,
degrees: bool = True,
eye: Optional[Union[Sequence, torch.Tensor]] = None,
at=((0, 0, 0),), # (1, 3)

View File

@@ -10,6 +10,8 @@ import torch
import torch.nn as nn
from ...structures.meshes import Meshes
from .rasterizer import MeshRasterizer
# A renderer class should be initialized with a
# function for rasterization and a function for shading.
@@ -30,11 +32,11 @@ from ...structures.meshes import Meshes
class MeshRenderer(nn.Module):
"""
A class for rendering a batch of heterogeneous meshes. The class should
be initialized with a rasterizer (a MeshRasterizer or a MeshRasterizerOpenGL)
and shader class which each have a forward function.
be initialized with a rasterizer and shader class which each have a forward
function.
"""
def __init__(self, rasterizer, shader) -> None:
def __init__(self, rasterizer: MeshRasterizer, shader) -> None:
super().__init__()
self.rasterizer = rasterizer
self.shader = shader
@@ -67,8 +69,8 @@ class MeshRenderer(nn.Module):
class MeshRendererWithFragments(nn.Module):
"""
A class for rendering a batch of heterogeneous meshes. The class should
be initialized with a rasterizer (a MeshRasterizer or a MeshRasterizerOpenGL)
and shader class which each have a forward function.
be initialized with a rasterizer and shader class which each have a forward
function.
In the forward pass this class returns the `fragments` from which intermediate
values such as the depth map can be easily extracted e.g.
@@ -78,7 +80,7 @@ class MeshRendererWithFragments(nn.Module):
depth = fragments.zbuf
"""
def __init__(self, rasterizer, shader) -> None:
def __init__(self, rasterizer: MeshRasterizer, shader) -> None:
super().__init__()
self.rasterizer = rasterizer
self.shader = shader

View File

@@ -130,9 +130,8 @@ class MeshRasterizerOpenGL(nn.Module):
Fragments output by MeshRasterizerOpenGL and MeshRasterizer should have near
identical pix_to_face, bary_coords and zbuf. However, MeshRasterizerOpenGL does not
return Fragments.dists which is only relevant to SoftPhongShader and
SoftSilhouetteShader. These do not work with MeshRasterizerOpenGL (because it is
not differentiable).
return Fragments.dists which is only relevant to SoftPhongShader which doesn't work
with MeshRasterizerOpenGL (because it is not differentiable).
"""
def __init__(

View File

@@ -474,12 +474,6 @@ class Renderer(torch.nn.Module):
rot_mat = axis_angle_to_matrix(rot_vec)
if first_R_then_T:
pos_vec = torch.matmul(rot_mat, pos_vec[..., None])[:, :, 0]
LOGGER.debug(
"Camera position: %s, rotation: %s. Focal length: %s.",
str(pos_vec),
str(rot_vec),
str(focal_length),
)
sensor_dir_x = torch.matmul(
rot_mat,
torch.tensor(
@@ -500,56 +494,20 @@ class Renderer(torch.nn.Module):
)[:, :, 0]
if right_handed:
sensor_dir_z *= -1
LOGGER.debug(
"Sensor direction vectors: %s, %s, %s.",
str(sensor_dir_x),
str(sensor_dir_y),
str(sensor_dir_z),
)
if orthogonal:
sensor_center = pos_vec
else:
sensor_center = pos_vec + focal_length * sensor_dir_z
LOGGER.debug("Sensor center: %s.", str(sensor_center))
sensor_luc = ( # Sensor left upper corner.
sensor_center
- sensor_dir_x * (sensor_size_x / 2.0)
- sensor_dir_y * (sensor_size_y / 2.0)
)
LOGGER.debug("Sensor luc: %s.", str(sensor_luc))
pixel_size_x = sensor_size_x / float(width)
pixel_size_y = sensor_size_y / float(height)
LOGGER.debug(
"Pixel sizes (x): %s, (y) %s.", str(pixel_size_x), str(pixel_size_y)
)
pixel_vec_x: torch.Tensor = sensor_dir_x * pixel_size_x
pixel_vec_y: torch.Tensor = sensor_dir_y * pixel_size_y
pixel_0_0_center = sensor_luc + 0.5 * pixel_vec_x + 0.5 * pixel_vec_y
LOGGER.debug(
"Pixel 0 centers: %s, vec x: %s, vec y: %s.",
str(pixel_0_0_center),
str(pixel_vec_x),
str(pixel_vec_y),
)
if not orthogonal:
LOGGER.debug(
"Camera horizontal fovs: %s deg.",
str(
2.0
* torch.atan(0.5 * sensor_size_x / focal_length)
/ math.pi
* 180.0
),
)
LOGGER.debug(
"Camera vertical fovs: %s deg.",
str(
2.0
* torch.atan(0.5 * sensor_size_y / focal_length)
/ math.pi
* 180.0
),
)
# Reduce dimension.
focal_length: torch.Tensor = focal_length[:, 0]
if batch_processing:

View File

@@ -323,6 +323,7 @@ def random_quaternions(
"""
if isinstance(device, str):
device = torch.device(device)
# pyre-fixme[6]: For 2nd param expected `dtype` but got `Optional[dtype]`.
o = torch.randn((n, 4), dtype=dtype, device=device)
s = (o * o).sum(1)
o = o / _copysign(torch.sqrt(s), o[:, 0])[:, None]

View File

@@ -8,7 +8,6 @@
import glob
import os
import runpy
import sys
import warnings
from typing import List, Optional
@@ -36,13 +35,6 @@ def get_existing_ccbin(nvcc_args: List[str]) -> Optional[str]:
def get_extensions():
no_extension = os.getenv("PYTORCH3D_NO_EXTENSION", "0") == "1"
if no_extension:
msg = "SKIPPING EXTENSION BUILD. PYTORCH3D WILL NOT WORK!"
print(msg, file=sys.stderr)
warnings.warn(msg)
return []
this_dir = os.path.dirname(os.path.abspath(__file__))
extensions_dir = os.path.join(this_dir, "pytorch3d", "csrc")
sources = glob.glob(os.path.join(extensions_dir, "**", "*.cpp"), recursive=True)
@@ -54,10 +46,7 @@ def get_extensions():
include_dirs = [extensions_dir]
force_cuda = os.getenv("FORCE_CUDA", "0") == "1"
force_no_cuda = os.getenv("PYTORCH3D_FORCE_NO_CUDA", "0") == "1"
if (
not force_no_cuda and torch.cuda.is_available() and CUDA_HOME is not None
) or force_cuda:
if (torch.cuda.is_available() and CUDA_HOME is not None) or force_cuda:
extension = CUDAExtension
sources += source_cuda
define_macros += [("WITH_CUDA", None)]
@@ -139,7 +128,7 @@ if os.getenv("PYTORCH3D_NO_NINJA", "0") == "1":
else:
BuildExtension = torch.utils.cpp_extension.BuildExtension
trainer = "pytorch3d.implicitron_trainer"
trainer = "projects.implicitron_trainer"
setup(
name="pytorch3d",
@@ -149,10 +138,8 @@ setup(
description="PyTorch3D is FAIR's library of reusable components "
"for deep Learning with 3D data.",
packages=find_packages(
exclude=("configs", "tests", "tests.*", "docs.*", "projects.*")
)
+ [trainer],
package_dir={trainer: "projects/implicitron_trainer"},
exclude=("configs", "tests", "tests.*", "docs.*", "projects.nerf.*")
),
install_requires=["fvcore", "iopath"],
extras_require={
"all": ["matplotlib", "tqdm>4.29.0", "imageio", "ipywidgets"],

View File

@@ -2,6 +2,6 @@
This is copied version of docs/tutorials/data/cow_mesh with removed line 6159 (usemtl material_1) to test behavior without usemtl material_1 declaration.
Thank you to Keenan Crane for allowing the cow mesh model to be used freely in the public domain.
Thank you to Keenen Crane for allowing the cow mesh model to be used freely in the public domain.
###### Source: http://www.cs.cmu.edu/~kmcrane/Projects/ModelRepository/

View File

@@ -52,11 +52,17 @@ dataset_map_provider_JsonIndexDatasetMapProviderV2_args:
dataset_class_type: JsonIndexDataset
path_manager_factory_class_type: PathManagerFactory
dataset_JsonIndexDataset_args:
path_manager: null
frame_annotations_file: ''
sequence_annotations_file: ''
subset_lists_file: ''
subsets: null
limit_to: 0
limit_sequences_to: 0
pick_sequence: []
exclude_sequence: []
limit_category_to: []
dataset_root: ''
load_images: true
load_depths: true
load_depth_masks: true
@@ -74,6 +80,7 @@ dataset_map_provider_JsonIndexDatasetMapProviderV2_args:
n_frames_per_sequence: -1
seed: 0
sort_frames: false
eval_batches: null
path_manager_factory_PathManagerFactory_args:
silence_logs: true
dataset_map_provider_LlffDatasetMapProvider_args:
@@ -83,16 +90,6 @@ dataset_map_provider_LlffDatasetMapProvider_args:
n_known_frames_for_test: null
path_manager_factory_PathManagerFactory_args:
silence_logs: true
downscale_factor: 4
dataset_map_provider_RenderedMeshDatasetMapProvider_args:
num_views: 40
data_file: null
azimuth_range: 180.0
resolution: 128
use_point_light: true
path_manager_factory_class_type: PathManagerFactory
path_manager_factory_PathManagerFactory_args:
silence_logs: true
data_loader_map_provider_SequenceDataLoaderMapProvider_args:
batch_size: 1
num_workers: 0
@@ -106,9 +103,3 @@ data_loader_map_provider_SequenceDataLoaderMapProvider_args:
sample_consecutive_frames: false
consecutive_frames_max_gap: 0
consecutive_frames_max_gap_seconds: 0.1
data_loader_map_provider_SimpleDataLoaderMapProvider_args:
batch_size: 1
num_workers: 0
dataset_length_train: 0
dataset_length_val: 0
dataset_length_test: 0

View File

@@ -1,24 +1,3 @@
log_vars:
- loss_rgb_psnr_fg
- loss_rgb_psnr
- loss_rgb_mse
- loss_rgb_huber
- loss_depth_abs
- loss_depth_abs_fg
- loss_mask_neg_iou
- loss_mask_bce
- loss_mask_beta_prior
- loss_eikonal
- loss_density_tv
- loss_depth_neg_penalty
- loss_autodecoder_norm
- loss_prev_stage_rgb_mse
- loss_prev_stage_rgb_psnr_fg
- loss_prev_stage_rgb_psnr
- loss_prev_stage_mask_bce
- objective
- epoch
- sec/it
mask_images: true
mask_depths: true
render_image_width: 400
@@ -49,13 +28,38 @@ loss_weights:
loss_prev_stage_rgb_mse: 1.0
loss_mask_bce: 0.0
loss_prev_stage_mask_bce: 0.0
log_vars:
- loss_rgb_psnr_fg
- loss_rgb_psnr
- loss_rgb_mse
- loss_rgb_huber
- loss_depth_abs
- loss_depth_abs_fg
- loss_mask_neg_iou
- loss_mask_bce
- loss_mask_beta_prior
- loss_eikonal
- loss_density_tv
- loss_depth_neg_penalty
- loss_autodecoder_norm
- loss_prev_stage_rgb_mse
- loss_prev_stage_rgb_psnr_fg
- loss_prev_stage_rgb_psnr
- loss_prev_stage_mask_bce
- objective
- epoch
- sec/it
global_encoder_SequenceAutodecoder_args:
autodecoder_args:
encoding_dim: 0
n_instances: 1
n_instances: 0
init_scale: 1.0
ignore_input: false
raysampler_AdaptiveRaySampler_args:
image_width: 400
image_height: 400
sampling_mode_training: mask_sample
sampling_mode_evaluation: full_grid
n_pts_per_ray_training: 64
n_pts_per_ray_evaluation: 64
n_rays_per_image_sampled_from_mask: 1024
@@ -103,6 +107,7 @@ view_pooler_args:
weight_by_ray_angle_gamma: 1.0
min_ray_angle_weight: 0.1
implicit_function_IdrFeatureField_args:
feature_vector_size: 3
d_in: 3
d_out: 1
dims:
@@ -120,5 +125,6 @@ implicit_function_IdrFeatureField_args:
weight_norm: true
n_harmonic_functions_xyz: 1729
pooled_feature_dim: 0
encoding_dim: 0
view_metrics_ViewMetrics_args: {}
regularization_metrics_RegularizationMetrics_args: {}

View File

@@ -378,20 +378,14 @@ class TestConfig(unittest.TestCase):
with self.assertWarnsRegex(
UserWarning, "New implementation of Grape is being chosen."
):
defaulted_bowl = FruitBowl()
self.assertIsInstance(defaulted_bowl.main_fruit, Grape)
self.assertEqual(defaulted_bowl.main_fruit.large, True)
self.assertEqual(defaulted_bowl.main_fruit.get_color(), "green")
bowl = FruitBowl(**bowl_args)
self.assertIsInstance(bowl.main_fruit, Grape)
with self.assertWarnsRegex(
UserWarning, "New implementation of Grape is being chosen."
):
args_bowl = FruitBowl(**bowl_args)
self.assertIsInstance(args_bowl.main_fruit, Grape)
# Redefining the same class won't help with defaults because encoded in args
self.assertEqual(args_bowl.main_fruit.large, False)
self.assertEqual(bowl.main_fruit.large, False)
# But the override worked.
self.assertEqual(args_bowl.main_fruit.get_color(), "green")
self.assertEqual(bowl.main_fruit.get_color(), "green")
# 2. Try redefining without the dataclass modifier
# This relies on the fact that default creation processes the class.
@@ -403,7 +397,7 @@ class TestConfig(unittest.TestCase):
with self.assertWarnsRegex(
UserWarning, "New implementation of Grape is being chosen."
):
FruitBowl(**bowl_args)
bowl = FruitBowl(**bowl_args)
# 3. Adding a new class doesn't get picked up, because the first
# get_default_args call has frozen FruitBowl. This is intrinsic to
@@ -691,17 +685,12 @@ class TestConfig(unittest.TestCase):
fruit2_class_type: str = "Pear"
a: A
a2: A
a3: A
@classmethod
def a_tweak_args(cls, type, args):
assert type == A
args.n = 993
@classmethod
def a3_tweak_args(cls, type, args):
del args["n"]
@classmethod
def fruit_tweak_args(cls, type, args):
assert issubclass(type, Fruit)
@@ -712,7 +701,6 @@ class TestConfig(unittest.TestCase):
args = get_default_args(Wrapper)
self.assertEqual(args.a_args.n, 993)
self.assertEqual(args.a2_args.n, 9)
self.assertEqual(args.a3_args, {})
self.assertEqual(args.fruit_Pear_args.n_pips, 19)
self.assertEqual(args.fruit2_Pear_args.n_pips, 13)

View File

@@ -90,5 +90,5 @@ class TestGenericModel(unittest.TestCase):
remove_unused_components(instance_args)
yaml = OmegaConf.to_yaml(instance_args, sort_keys=False)
if DEBUG:
(DATA_DIR / "overrides_.yaml").write_text(yaml)
(DATA_DIR / "overrides.yaml_").write_text(yaml)
self.assertEqual(yaml, (DATA_DIR / "overrides.yaml").read_text())

View File

@@ -1,57 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
import os
import unittest
import torch
from pytorch3d.implicitron.dataset.dataset_base import FrameData
from pytorch3d.implicitron.dataset.rendered_mesh_dataset_map_provider import (
RenderedMeshDatasetMapProvider,
)
from pytorch3d.implicitron.tools.config import expand_args_fields
from pytorch3d.renderer import FoVPerspectiveCameras
from tests.common_testing import TestCaseMixin
inside_re_worker = os.environ.get("INSIDE_RE_WORKER", False)
class TestDataCow(TestCaseMixin, unittest.TestCase):
def test_simple(self):
if inside_re_worker:
return
expand_args_fields(RenderedMeshDatasetMapProvider)
self._runtest(use_point_light=True, num_views=4)
self._runtest(use_point_light=False, num_views=4)
def _runtest(self, **kwargs):
provider = RenderedMeshDatasetMapProvider(**kwargs)
dataset_map = provider.get_dataset_map()
known_matrix = torch.zeros(1, 4, 4)
known_matrix[0, 0, 0] = 1.7321
known_matrix[0, 1, 1] = 1.7321
known_matrix[0, 2, 2] = 1.0101
known_matrix[0, 3, 2] = -1.0101
known_matrix[0, 2, 3] = 1
self.assertIsNone(dataset_map.val)
self.assertIsNone(dataset_map.test)
self.assertEqual(len(dataset_map.train), provider.num_views)
value = dataset_map.train[0]
self.assertIsInstance(value, FrameData)
self.assertEqual(value.image_rgb.shape, (3, 128, 128))
self.assertEqual(value.fg_probability.shape, (1, 128, 128))
# corner of image is background
self.assertEqual(value.fg_probability[0, 0, 0], 0)
self.assertEqual(value.fg_probability.max(), 1.0)
self.assertIsInstance(value.camera, FoVPerspectiveCameras)
self.assertEqual(len(value.camera), 1)
self.assertIsNone(value.camera.K)
matrix = value.camera.get_projection_transform().get_matrix()
self.assertClose(matrix, known_matrix, atol=1e-4)

View File

@@ -69,7 +69,6 @@ class TestDataLlff(TestCaseMixin, unittest.TestCase):
provider = LlffDatasetMapProvider(
base_dir="manifold://co3d/tree/nerf_data/nerf_llff_data/fern",
object_name="fern",
downscale_factor=8,
)
dataset_map = provider.get_dataset_map()
known_matrix = torch.zeros(1, 4, 4)

View File

@@ -10,10 +10,6 @@ import unittest.mock
import torch
from omegaconf import OmegaConf
from pytorch3d.implicitron.dataset.data_loader_map_provider import (
SequenceDataLoaderMapProvider,
SimpleDataLoaderMapProvider,
)
from pytorch3d.implicitron.dataset.data_source import ImplicitronDataSource
from pytorch3d.implicitron.dataset.json_index_dataset import JsonIndexDataset
from pytorch3d.implicitron.tools.config import get_default_args
@@ -68,6 +64,7 @@ class TestDataSource(unittest.TestCase):
return
args = get_default_args(ImplicitronDataSource)
args.dataset_map_provider_class_type = "JsonIndexDatasetMapProvider"
args.data_loader_map_provider_class_type = "SequenceDataLoaderMapProvider"
dataset_args = args.dataset_map_provider_JsonIndexDatasetMapProvider_args
dataset_args.category = "skateboard"
dataset_args.test_restrict_sequence_id = 0
@@ -76,35 +73,8 @@ class TestDataSource(unittest.TestCase):
dataset_args.dataset_root = "manifold://co3d/tree/extracted"
data_source = ImplicitronDataSource(**args)
self.assertIsInstance(
data_source.data_loader_map_provider, SequenceDataLoaderMapProvider
)
_, data_loaders = data_source.get_datasets_and_dataloaders()
self.assertEqual(len(data_loaders.train), 81)
for i in data_loaders.train:
self.assertEqual(i.frame_type, ["test_known"])
break
def test_simple(self):
if os.environ.get("INSIDE_RE_WORKER") is not None:
return
args = get_default_args(ImplicitronDataSource)
args.dataset_map_provider_class_type = "JsonIndexDatasetMapProvider"
args.data_loader_map_provider_class_type = "SimpleDataLoaderMapProvider"
dataset_args = args.dataset_map_provider_JsonIndexDatasetMapProvider_args
dataset_args.category = "skateboard"
dataset_args.test_restrict_sequence_id = 0
dataset_args.n_frames_per_sequence = -1
dataset_args.dataset_root = "manifold://co3d/tree/extracted"
data_source = ImplicitronDataSource(**args)
self.assertIsInstance(
data_source.data_loader_map_provider, SimpleDataLoaderMapProvider
)
_, data_loaders = data_source.get_datasets_and_dataloaders()
self.assertEqual(len(data_loaders.train), 81)
for i in data_loaders.train:
self.assertEqual(i.frame_type, ["test_known"])
break

Some files were not shown because too many files have changed in this diff Show More