mirror of
https://github.com/facebookresearch/pytorch3d.git
synced 2026-02-28 09:16:00 +08:00
Compare commits
1 Commits
v0.7.0
...
classner-p
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e7c1f026ea |
@@ -159,7 +159,7 @@ jobs:
|
||||
binary_macos_wheel:
|
||||
<<: *binary_common
|
||||
macos:
|
||||
xcode: "13.4.1"
|
||||
xcode: "12.0"
|
||||
steps:
|
||||
- checkout
|
||||
- run:
|
||||
|
||||
@@ -159,7 +159,7 @@ jobs:
|
||||
binary_macos_wheel:
|
||||
<<: *binary_common
|
||||
macos:
|
||||
xcode: "13.4.1"
|
||||
xcode: "12.0"
|
||||
steps:
|
||||
- checkout
|
||||
- run:
|
||||
|
||||
@@ -12,7 +12,6 @@ Key features include:
|
||||
- Data structure for storing and manipulating triangle meshes
|
||||
- Efficient operations on triangle meshes (projective transformations, graph convolution, sampling, loss functions)
|
||||
- A differentiable mesh renderer
|
||||
- Implicitron, see [its README](projects/implicitron_trainer), a framework for new-view synthesis via implicit representations.
|
||||
|
||||
PyTorch3D is designed to integrate smoothly with deep learning methods for predicting and manipulating 3D data.
|
||||
For this reason, all operators in PyTorch3D:
|
||||
@@ -94,7 +93,6 @@ In alphabetical order:
|
||||
|
||||
* Amitav Baruah
|
||||
* Steve Branson
|
||||
* Krzysztof Chalupka
|
||||
* Luya Gao
|
||||
* Georgia Gkioxari
|
||||
* Taylor Gordon
|
||||
|
||||
@@ -89,7 +89,7 @@
|
||||
"except ModuleNotFoundError:\n",
|
||||
" need_pytorch3d=True\n",
|
||||
"if need_pytorch3d:\n",
|
||||
" if torch.__version__.startswith(\"1.12.\") and sys.platform.startswith(\"linux\"):\n",
|
||||
" if torch.__version__.startswith(\"1.11.\") and sys.platform.startswith(\"linux\"):\n",
|
||||
" # We try to install PyTorch3D via a released wheel.\n",
|
||||
" pyt_version_str=torch.__version__.split(\"+\")[0].replace(\".\", \"\")\n",
|
||||
" version_str=\"\".join([\n",
|
||||
|
||||
@@ -76,7 +76,7 @@
|
||||
"except ModuleNotFoundError:\n",
|
||||
" need_pytorch3d=True\n",
|
||||
"if need_pytorch3d:\n",
|
||||
" if torch.__version__.startswith(\"1.12.\") and sys.platform.startswith(\"linux\"):\n",
|
||||
" if torch.__version__.startswith(\"1.11.\") and sys.platform.startswith(\"linux\"):\n",
|
||||
" # We try to install PyTorch3D via a released wheel.\n",
|
||||
" pyt_version_str=torch.__version__.split(\"+\")[0].replace(\".\", \"\")\n",
|
||||
" version_str=\"\".join([\n",
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
# Acknowledgements
|
||||
|
||||
Thank you to Keenan Crane for allowing the cow mesh model to be used freely in the public domain.
|
||||
Thank you to Keenen Crane for allowing the cow mesh model to be used freely in the public domain.
|
||||
|
||||
###### Source: http://www.cs.cmu.edu/~kmcrane/Projects/ModelRepository/
|
||||
|
||||
@@ -51,7 +51,7 @@
|
||||
"except ModuleNotFoundError:\n",
|
||||
" need_pytorch3d=True\n",
|
||||
"if need_pytorch3d:\n",
|
||||
" if torch.__version__.startswith(\"1.12.\") and sys.platform.startswith(\"linux\"):\n",
|
||||
" if torch.__version__.startswith(\"1.11.\") and sys.platform.startswith(\"linux\"):\n",
|
||||
" # We try to install PyTorch3D via a released wheel.\n",
|
||||
" pyt_version_str=torch.__version__.split(\"+\")[0].replace(\".\", \"\")\n",
|
||||
" version_str=\"\".join([\n",
|
||||
|
||||
@@ -90,7 +90,7 @@
|
||||
"except ModuleNotFoundError:\n",
|
||||
" need_pytorch3d=True\n",
|
||||
"if need_pytorch3d:\n",
|
||||
" if torch.__version__.startswith(\"1.12.\") and sys.platform.startswith(\"linux\"):\n",
|
||||
" if torch.__version__.startswith(\"1.11.\") and sys.platform.startswith(\"linux\"):\n",
|
||||
" # We try to install PyTorch3D via a released wheel.\n",
|
||||
" pyt_version_str=torch.__version__.split(\"+\")[0].replace(\".\", \"\")\n",
|
||||
" version_str=\"\".join([\n",
|
||||
|
||||
@@ -56,7 +56,7 @@
|
||||
"except ModuleNotFoundError:\n",
|
||||
" need_pytorch3d=True\n",
|
||||
"if need_pytorch3d:\n",
|
||||
" if torch.__version__.startswith(\"1.12.\") and sys.platform.startswith(\"linux\"):\n",
|
||||
" if torch.__version__.startswith(\"1.11.\") and sys.platform.startswith(\"linux\"):\n",
|
||||
" # We try to install PyTorch3D via a released wheel.\n",
|
||||
" pyt_version_str=torch.__version__.split(\"+\")[0].replace(\".\", \"\")\n",
|
||||
" version_str=\"\".join([\n",
|
||||
|
||||
@@ -68,7 +68,7 @@
|
||||
"except ModuleNotFoundError:\n",
|
||||
" need_pytorch3d=True\n",
|
||||
"if need_pytorch3d:\n",
|
||||
" if torch.__version__.startswith(\"1.12.\") and sys.platform.startswith(\"linux\"):\n",
|
||||
" if torch.__version__.startswith(\"1.11.\") and sys.platform.startswith(\"linux\"):\n",
|
||||
" # We try to install PyTorch3D via a released wheel.\n",
|
||||
" pyt_version_str=torch.__version__.split(\"+\")[0].replace(\".\", \"\")\n",
|
||||
" version_str=\"\".join([\n",
|
||||
|
||||
@@ -47,7 +47,7 @@
|
||||
"except ModuleNotFoundError:\n",
|
||||
" need_pytorch3d=True\n",
|
||||
"if need_pytorch3d:\n",
|
||||
" if torch.__version__.startswith(\"1.12.\") and sys.platform.startswith(\"linux\"):\n",
|
||||
" if torch.__version__.startswith(\"1.11.\") and sys.platform.startswith(\"linux\"):\n",
|
||||
" # We try to install PyTorch3D via a released wheel.\n",
|
||||
" pyt_version_str=torch.__version__.split(\"+\")[0].replace(\".\", \"\")\n",
|
||||
" version_str=\"\".join([\n",
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,913 +0,0 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"customInput": null,
|
||||
"customOutput": null,
|
||||
"executionStartTime": 1659619824914,
|
||||
"executionStopTime": 1659619825485,
|
||||
"originalKey": "d38652e8-200a-413c-a36a-f4d349b78a9d",
|
||||
"requestMsgId": "641de8aa-0e42-4446-9304-c160a2d226bf",
|
||||
"showInput": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Copyright (c) Meta Platforms, Inc. and affiliates. All rights reserved."
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"customInput": null,
|
||||
"originalKey": "a48a9dcf-e80f-474b-a0c4-2c9a765b15c5",
|
||||
"showInput": false
|
||||
},
|
||||
"source": [
|
||||
"# A simple model using Implicitron\n",
|
||||
"\n",
|
||||
"In this demo, we use the VolumeRenderer from PyTorch3D as a custom implicit function in Implicitron. We will see\n",
|
||||
"* some of the main objects in Implicitron\n",
|
||||
"* how to plug in a custom part of a model"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"customInput": null,
|
||||
"originalKey": "51337c0e-ad27-4b75-ad6a-737dca5d7b95",
|
||||
"showInput": false
|
||||
},
|
||||
"source": [
|
||||
"## 0. Install and import modules\n",
|
||||
"\n",
|
||||
"Ensure `torch` and `torchvision` are installed. If `pytorch3d` is not installed, install it using the following cell:\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"customInput": null,
|
||||
"customOutput": null,
|
||||
"executionStartTime": 1659619898147,
|
||||
"executionStopTime": 1659619898274,
|
||||
"originalKey": "76f1ecd4-6b73-4214-81b0-118ef8d86872",
|
||||
"requestMsgId": "deb6a860-6923-4227-abef-d31388b5142d",
|
||||
"showInput": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"import sys\n",
|
||||
"import torch\n",
|
||||
"need_pytorch3d=False\n",
|
||||
"try:\n",
|
||||
" import pytorch3d\n",
|
||||
"except ModuleNotFoundError:\n",
|
||||
" need_pytorch3d=True\n",
|
||||
"if need_pytorch3d:\n",
|
||||
" if torch.__version__.startswith(\"1.12.\") and sys.platform.startswith(\"linux\"):\n",
|
||||
" # We try to install PyTorch3D via a released wheel.\n",
|
||||
" pyt_version_str=torch.__version__.split(\"+\")[0].replace(\".\", \"\")\n",
|
||||
" version_str=\"\".join([\n",
|
||||
" f\"py3{sys.version_info.minor}_cu\",\n",
|
||||
" torch.version.cuda.replace(\".\",\"\"),\n",
|
||||
" f\"_pyt{pyt_version_str}\"\n",
|
||||
" ])\n",
|
||||
" !pip install fvcore iopath\n",
|
||||
" !pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/{version_str}/download.html\n",
|
||||
" else:\n",
|
||||
" # We try to install PyTorch3D from source.\n",
|
||||
" !curl -LO https://github.com/NVIDIA/cub/archive/1.10.0.tar.gz\n",
|
||||
" !tar xzf 1.10.0.tar.gz\n",
|
||||
" os.environ[\"CUB_HOME\"] = os.getcwd() + \"/cub-1.10.0\"\n",
|
||||
" !pip install 'git+https://github.com/facebookresearch/pytorch3d.git@stable'"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"customInput": null,
|
||||
"originalKey": "2c1020e6-eb4a-4644-9719-9147500d8e4f",
|
||||
"showInput": false
|
||||
},
|
||||
"source": [
|
||||
"Ensure omegaconf and visdom are installed. If not, run this cell. (It should not be necessary to restart the runtime.)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"customInput": null,
|
||||
"customOutput": null,
|
||||
"originalKey": "9e751931-a38d-44c9-9ff1-ac2f7d3a3f99",
|
||||
"showInput": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install omegaconf visdom"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"code_folding": [],
|
||||
"collapsed": false,
|
||||
"customOutput": null,
|
||||
"executionStartTime": 1659612480556,
|
||||
"executionStopTime": 1659612480644,
|
||||
"hidden_ranges": [],
|
||||
"originalKey": "86807e4a-1675-4520-a033-c7af85b233ec",
|
||||
"requestMsgId": "880a7e20-4a90-4b37-a5eb-bccc0b23cac6"
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import logging\n",
|
||||
"from typing import Tuple\n",
|
||||
"\n",
|
||||
"import matplotlib.animation as animation\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"import numpy as np\n",
|
||||
"import torch\n",
|
||||
"import tqdm\n",
|
||||
"from IPython.display import HTML\n",
|
||||
"from omegaconf import OmegaConf\n",
|
||||
"from PIL import Image\n",
|
||||
"from pytorch3d.implicitron.dataset.dataset_base import FrameData\n",
|
||||
"from pytorch3d.implicitron.dataset.rendered_mesh_dataset_map_provider import RenderedMeshDatasetMapProvider\n",
|
||||
"from pytorch3d.implicitron.models.generic_model import GenericModel\n",
|
||||
"from pytorch3d.implicitron.models.implicit_function.base import ImplicitFunctionBase\n",
|
||||
"from pytorch3d.implicitron.models.renderer.base import EvaluationMode\n",
|
||||
"from pytorch3d.implicitron.tools.config import expand_args_fields, get_default_args, registry, remove_unused_components\n",
|
||||
"from pytorch3d.renderer import RayBundle\n",
|
||||
"from pytorch3d.renderer.implicit.renderer import VolumeSampler\n",
|
||||
"from pytorch3d.structures import Volumes\n",
|
||||
"from pytorch3d.vis.plotly_vis import plot_batch_individually, plot_scene"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"code_folding": [],
|
||||
"collapsed": false,
|
||||
"customInput": null,
|
||||
"customOutput": null,
|
||||
"executionStartTime": 1659610929375,
|
||||
"executionStopTime": 1659610929383,
|
||||
"hidden_ranges": [],
|
||||
"originalKey": "b2d9f5bd-a9d4-4f78-b21e-92f2658e0fe9",
|
||||
"requestMsgId": "7e43e623-4030-438b-af4e-b96170c9a052",
|
||||
"showInput": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"output_resolution = 80"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"code_folding": [],
|
||||
"collapsed": false,
|
||||
"customInput": null,
|
||||
"customOutput": null,
|
||||
"executionStartTime": 1659610930042,
|
||||
"executionStopTime": 1659610930050,
|
||||
"hidden_ranges": [],
|
||||
"originalKey": "0b0c2087-4c86-4c57-b0ee-6f48a70a9c78",
|
||||
"requestMsgId": "46883aad-f00b-4fd4-ac17-eec0b2ac272a",
|
||||
"showInput": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"torch.set_printoptions(sci_mode=False)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"customInput": null,
|
||||
"originalKey": "37809d0d-b02e-42df-85b6-cdd038373653",
|
||||
"showInput": false
|
||||
},
|
||||
"source": [
|
||||
"## 1. Load renders of a mesh (the cow mesh) as a dataset\n",
|
||||
"\n",
|
||||
"A dataset's train, val and test parts in Implicitron are represented as a `dataset_map`, and provided by an implementation of `DatasetMapProvider`. \n",
|
||||
"`RenderedMeshDatasetMapProvider` is one which generates a single-scene dataset with only a train component by taking a mesh and rendering it.\n",
|
||||
"We use it with the cow mesh."
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"customInput": null,
|
||||
"customOutput": null,
|
||||
"executionStartTime": 1659620739780,
|
||||
"executionStopTime": 1659620739914,
|
||||
"originalKey": "cc68cb9c-b8bf-4e9e-bef1-2cfafdf6caa2",
|
||||
"requestMsgId": "398cfcae-5d43-4b6f-9c75-db3d297364d4",
|
||||
"showInput": false
|
||||
},
|
||||
"source": [
|
||||
"If running this notebook using **Google Colab**, run the following cell to fetch the mesh obj and texture files and save it at the path data/cow_mesh.\n",
|
||||
"If running locally, the data is already available at the correct path."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"customInput": null,
|
||||
"customOutput": null,
|
||||
"originalKey": "2c55e002-a885-4169-8fdc-af9078b05968",
|
||||
"showInput": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!mkdir -p data/cow_mesh\n",
|
||||
"!wget -P data/cow_mesh https://dl.fbaipublicfiles.com/pytorch3d/data/cow_mesh/cow.obj\n",
|
||||
"!wget -P data/cow_mesh https://dl.fbaipublicfiles.com/pytorch3d/data/cow_mesh/cow.mtl\n",
|
||||
"!wget -P data/cow_mesh https://dl.fbaipublicfiles.com/pytorch3d/data/cow_mesh/cow_texture.png"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"customInput": null,
|
||||
"originalKey": "2a976be8-01bf-4a1c-a6e7-61d5d08c3dbd",
|
||||
"showInput": false
|
||||
},
|
||||
"source": [
|
||||
"If we want to instantiate one of Implicitron's configurable objects, such as `RenderedMeshDatasetMapProvider`, without using the OmegaConf initialisation (get_default_args), we need to call `expand_args_fields` on the class first."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"code_folding": [],
|
||||
"collapsed": false,
|
||||
"customOutput": null,
|
||||
"executionStartTime": 1659621652237,
|
||||
"executionStopTime": 1659621652903,
|
||||
"hidden_ranges": [],
|
||||
"originalKey": "eb77aaec-048c-40bd-bd69-0e66b6ab60b1",
|
||||
"requestMsgId": "09b9975c-ff86-41c9-b4a9-975d23afc562",
|
||||
"showInput": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"expand_args_fields(RenderedMeshDatasetMapProvider)\n",
|
||||
"cow_provider = RenderedMeshDatasetMapProvider(\n",
|
||||
" data_file=\"data/cow_mesh/cow.obj\",\n",
|
||||
" use_point_light=False,\n",
|
||||
" resolution=output_resolution,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"code_folding": [],
|
||||
"collapsed": false,
|
||||
"customInput": null,
|
||||
"customOutput": null,
|
||||
"executionStartTime": 1659610966145,
|
||||
"executionStopTime": 1659610966255,
|
||||
"hidden_ranges": [],
|
||||
"originalKey": "8210e15b-da48-4306-a49a-41c4e7e7d42f",
|
||||
"requestMsgId": "c243edd2-a106-4fba-8471-dfa4f99a2088",
|
||||
"showInput": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"dataset_map = cow_provider.get_dataset_map()\n",
|
||||
"tr_cameras = [training_frame.camera for training_frame in dataset_map.train]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"code_folding": [],
|
||||
"collapsed": false,
|
||||
"customInput": null,
|
||||
"customOutput": null,
|
||||
"executionStartTime": 1659610967703,
|
||||
"executionStopTime": 1659610967848,
|
||||
"hidden_ranges": [],
|
||||
"originalKey": "458d72ad-d9a7-4f13-b5b7-90d2aec61c16",
|
||||
"requestMsgId": "7f9431f3-8717-4d89-a7fe-1420dd0e00c4",
|
||||
"showInput": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# The cameras are all in the XZ plane, in a circle about 2.7 from the origin\n",
|
||||
"centers = torch.cat([i.get_camera_center() for i in tr_cameras])\n",
|
||||
"print(centers.min(0).values)\n",
|
||||
"print(centers.max(0).values)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"code_folding": [],
|
||||
"collapsed": false,
|
||||
"customInput": null,
|
||||
"customOutput": null,
|
||||
"executionStartTime": 1659552920194,
|
||||
"executionStopTime": 1659552923122,
|
||||
"hidden_ranges": [],
|
||||
"originalKey": "931e712b-b141-437a-97fb-dc2a07ce3458",
|
||||
"requestMsgId": "931e712b-b141-437a-97fb-dc2a07ce3458",
|
||||
"showInput": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# visualization of the cameras\n",
|
||||
"plot = plot_scene({\"k\": {i: camera for i, camera in enumerate(tr_cameras)}}, camera_scale=0.25)\n",
|
||||
"plot.layout.scene.aspectmode = \"data\"\n",
|
||||
"plot"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"customInput": null,
|
||||
"originalKey": "afa9c02d-f76b-4f68-83e9-9733c615406b",
|
||||
"showInput": false
|
||||
},
|
||||
"source": [
|
||||
"## 2. Custom implicit function 🧊\n",
|
||||
"\n",
|
||||
"At the core of neural rendering methods are functions of spatial coordinates called implicit functions, which are used in some kind of rendering process.\n",
|
||||
"(Often those functions can additionally take other data as well, such as view direction.)\n",
|
||||
"A common rendering process is ray marching over densities and colors provided by an implicit function.\n",
|
||||
"In our case, taking samples from a 3D volume grid is a very simple function of spatial coordinates. \n",
|
||||
"\n",
|
||||
"Here we define our own implicit function, which uses PyTorch3D's existing functionality for sampling from a volume grid.\n",
|
||||
"We do this by subclassing `ImplicitFunctionBase`.\n",
|
||||
"We need to register our subclass with a special decorator.\n",
|
||||
"We use Python's dataclass annotations for configuring the module."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"code_folding": [],
|
||||
"collapsed": false,
|
||||
"customInput": null,
|
||||
"customOutput": null,
|
||||
"executionStartTime": 1659613575850,
|
||||
"executionStopTime": 1659613575940,
|
||||
"hidden_ranges": [],
|
||||
"originalKey": "61b55043-dc52-4de7-992e-e2195edd2123",
|
||||
"requestMsgId": "dfaace3c-098c-4ffe-9240-6a7ae0ff271e",
|
||||
"showInput": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"@registry.register\n",
|
||||
"class MyVolumes(ImplicitFunctionBase, torch.nn.Module):\n",
|
||||
" grid_resolution: int = 50 # common HWD of volumes, the number of voxels in each direction\n",
|
||||
" extent: float = 1.0 # In world coordinates, the volume occupies is [-extent, extent] along each axis\n",
|
||||
"\n",
|
||||
" def __post_init__(self):\n",
|
||||
" # We have to call this explicitly if there are other base classes like Module\n",
|
||||
" super().__init__()\n",
|
||||
"\n",
|
||||
" # We define parameters like other torch.nn.Module objects.\n",
|
||||
" # In this case, both our parameter tensors are trainable; they govern the contents of the volume grid.\n",
|
||||
" density = torch.full((self.grid_resolution, self.grid_resolution, self.grid_resolution), -2.0)\n",
|
||||
" self.density = torch.nn.Parameter(density)\n",
|
||||
" color = torch.full((3, self.grid_resolution, self.grid_resolution, self.grid_resolution), 0.0)\n",
|
||||
" self.color = torch.nn.Parameter(color)\n",
|
||||
" self.density_activation = torch.nn.Softplus()\n",
|
||||
"\n",
|
||||
" def forward(\n",
|
||||
" self,\n",
|
||||
" ray_bundle: RayBundle,\n",
|
||||
" fun_viewpool=None,\n",
|
||||
" global_code=None,\n",
|
||||
" ):\n",
|
||||
" densities = self.density_activation(self.density[None, None])\n",
|
||||
" voxel_size = 2.0 * float(self.extent) / self.grid_resolution\n",
|
||||
" features = self.color.sigmoid()[None]\n",
|
||||
"\n",
|
||||
" # Like other PyTorch3D structures, the actual Volumes object should only exist as long\n",
|
||||
" # as one iteration of training. It is local to this function.\n",
|
||||
"\n",
|
||||
" volume = Volumes(densities=densities, features=features, voxel_size=voxel_size)\n",
|
||||
" sampler = VolumeSampler(volumes=volume)\n",
|
||||
" densities, features = sampler(ray_bundle)\n",
|
||||
"\n",
|
||||
" # When an implicit function is used for raymarching, i.e. for MultiPassEmissionAbsorptionRenderer,\n",
|
||||
" # it must return (densities, features, an auxiliary tuple)\n",
|
||||
" return densities, features, {}\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"customInput": null,
|
||||
"originalKey": "abaf2cd6-1b68-400e-a142-8fb9f49953f3",
|
||||
"showInput": false
|
||||
},
|
||||
"source": [
|
||||
"## 3. Construct the model object.\n",
|
||||
"\n",
|
||||
"The main model object in PyTorch3D is `GenericModel`, which has pluggable components for the major steps, including the renderer and the implicit function(s).\n",
|
||||
"There are two ways to construct it which are equivalent here."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"customInput": null,
|
||||
"customOutput": null,
|
||||
"executionStartTime": 1659621267561,
|
||||
"executionStopTime": 1659621267938,
|
||||
"originalKey": "f26c3dce-fbae-4592-bd0e-e4a8abc57c2c",
|
||||
"requestMsgId": "9213687e-1caf-46a8-a4e5-a9c531530092",
|
||||
"showInput": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"CONSTRUCT_MODEL_FROM_CONFIG = True\n",
|
||||
"if CONSTRUCT_MODEL_FROM_CONFIG:\n",
|
||||
" # Via a DictConfig - this is how our training loop with hydra works\n",
|
||||
" cfg = get_default_args(GenericModel)\n",
|
||||
" cfg.implicit_function_class_type = \"MyVolumes\"\n",
|
||||
" cfg.render_image_height=output_resolution\n",
|
||||
" cfg.render_image_width=output_resolution\n",
|
||||
" cfg.loss_weights={\"loss_rgb_huber\": 1.0}\n",
|
||||
" cfg.tqdm_trigger_threshold=19000\n",
|
||||
" cfg.raysampler_AdaptiveRaySampler_args.scene_extent= 4.0\n",
|
||||
" gm = GenericModel(**cfg)\n",
|
||||
"else:\n",
|
||||
" # constructing GenericModel directly\n",
|
||||
" expand_args_fields(GenericModel)\n",
|
||||
" gm = GenericModel(\n",
|
||||
" implicit_function_class_type=\"MyVolumes\",\n",
|
||||
" render_image_height=output_resolution,\n",
|
||||
" render_image_width=output_resolution,\n",
|
||||
" loss_weights={\"loss_rgb_huber\": 1.0},\n",
|
||||
" tqdm_trigger_threshold=19000,\n",
|
||||
" raysampler_AdaptiveRaySampler_args = {\"scene_extent\": 4.0}\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" # In this case we can get the equivalent DictConfig cfg object to the way gm is configured as follows\n",
|
||||
" cfg = OmegaConf.structured(gm)\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"code_folding": [],
|
||||
"collapsed": false,
|
||||
"customInput": null,
|
||||
"customOutput": null,
|
||||
"executionStartTime": 1659611214689,
|
||||
"executionStopTime": 1659611214748,
|
||||
"hidden_ranges": [],
|
||||
"originalKey": "4e659f7d-ce66-4999-83de-005eb09d7705",
|
||||
"requestMsgId": "7b815b2b-cf19-44d0-ae89-76fde6df35ec",
|
||||
"showInput": false
|
||||
},
|
||||
"source": [
|
||||
" The default renderer is an emission-absorbtion raymarcher. We keep that default."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"code_folding": [],
|
||||
"collapsed": false,
|
||||
"customInput": null,
|
||||
"customOutput": null,
|
||||
"executionStartTime": 1659621268007,
|
||||
"executionStopTime": 1659621268190,
|
||||
"hidden_ranges": [],
|
||||
"originalKey": "d37ae488-c57c-44d3-9def-825dc1a6495b",
|
||||
"requestMsgId": "71143ec1-730f-4876-8a14-e46eea9d6dd1",
|
||||
"showInput": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# We can display the configuration in use as follows.\n",
|
||||
"remove_unused_components(cfg)\n",
|
||||
"yaml = OmegaConf.to_yaml(cfg, sort_keys=False)\n",
|
||||
"%page -r yaml"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"code_folding": [],
|
||||
"collapsed": false,
|
||||
"customInput": null,
|
||||
"customOutput": null,
|
||||
"executionStartTime": 1659621268727,
|
||||
"executionStopTime": 1659621268776,
|
||||
"hidden_ranges": [],
|
||||
"originalKey": "52e53179-3c6e-4c1f-a38a-3a6d803687bb",
|
||||
"requestMsgId": "05de9bc3-3f74-4a6f-851c-9ec919b59506",
|
||||
"showInput": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"device = torch.device(\"cuda:0\")\n",
|
||||
"gm.to(device)\n",
|
||||
"assert next(gm.parameters()).is_cuda"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"customInput": null,
|
||||
"originalKey": "528a7d53-c645-49c2-9021-09adbb18cd23",
|
||||
"showInput": false
|
||||
},
|
||||
"source": [
|
||||
"## 4. train the model "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"code_folding": [],
|
||||
"collapsed": false,
|
||||
"customInput": null,
|
||||
"customOutput": null,
|
||||
"executionStartTime": 1659621270236,
|
||||
"executionStopTime": 1659621270446,
|
||||
"hidden_ranges": [],
|
||||
"originalKey": "953280bd-3161-42ba-8dcb-0c8ef2d5cc25",
|
||||
"requestMsgId": "9bba424b-7bfd-4e5a-9d79-ae316e20bab0",
|
||||
"showInput": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"train_data_collated = [FrameData.collate([frame.to(device)]) for frame in dataset_map.train]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"code_folding": [],
|
||||
"collapsed": false,
|
||||
"customInput": null,
|
||||
"customOutput": null,
|
||||
"executionStartTime": 1659621270815,
|
||||
"executionStopTime": 1659621270948,
|
||||
"hidden_ranges": [],
|
||||
"originalKey": "2fcf07f0-0c28-49c7-8c76-1c9a9d810167",
|
||||
"requestMsgId": "821deb43-6084-4ece-83c3-dee214562c47",
|
||||
"showInput": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"gm.train()\n",
|
||||
"optimizer = torch.optim.Adam(gm.parameters(), lr=0.1)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"code_folding": [],
|
||||
"collapsed": false,
|
||||
"customOutput": null,
|
||||
"executionStartTime": 1659621271875,
|
||||
"executionStopTime": 1659621298146,
|
||||
"hidden_ranges": [],
|
||||
"originalKey": "105099f7-ed0c-4e7f-a976-61a93fd0a8fe",
|
||||
"requestMsgId": "0c87c108-83e3-4129-ad02-85e0140f1368",
|
||||
"showInput": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"iterator = tqdm.tqdm(range(2000))\n",
|
||||
"for n_batch in iterator:\n",
|
||||
" optimizer.zero_grad()\n",
|
||||
"\n",
|
||||
" frame = train_data_collated[n_batch % len(dataset_map.train)]\n",
|
||||
" out = gm(**frame, evaluation_mode=EvaluationMode.TRAINING)\n",
|
||||
" out[\"objective\"].backward()\n",
|
||||
" if n_batch % 100 == 0:\n",
|
||||
" iterator.set_postfix_str(f\"loss: {float(out['objective']):.5f}\")\n",
|
||||
" optimizer.step()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"customInput": null,
|
||||
"customOutput": null,
|
||||
"executionStartTime": 1659535024768,
|
||||
"executionStopTime": 1659535024906,
|
||||
"originalKey": "e3cd494a-536b-48bc-8290-c048118c82eb",
|
||||
"requestMsgId": "e3cd494a-536b-48bc-8290-c048118c82eb",
|
||||
"showInput": false
|
||||
},
|
||||
"source": [
|
||||
"## 5. Evaluate the module\n",
|
||||
"\n",
|
||||
"We generate complete images from all the viewpoints to see how they look."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"code_folding": [],
|
||||
"collapsed": false,
|
||||
"customInput": null,
|
||||
"customOutput": null,
|
||||
"executionStartTime": 1659621299859,
|
||||
"executionStopTime": 1659621311133,
|
||||
"hidden_ranges": [],
|
||||
"originalKey": "fbe1b2ea-cc24-4b20-a2d7-0249185e34a5",
|
||||
"requestMsgId": "771ef1f8-5eee-4932-9e81-33604bf0512a",
|
||||
"showInput": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def to_numpy_image(image):\n",
|
||||
" # Takes an image of shape (C, H, W) in [0,1], where C=3 or 1\n",
|
||||
" # to a numpy uint image of shape (H, W, 3)\n",
|
||||
" return (image * 255).to(torch.uint8).permute(1, 2, 0).detach().cpu().expand(-1, -1, 3).numpy()\n",
|
||||
"def resize_image(image):\n",
|
||||
" # Takes images of shape (B, C, H, W) to (B, C, output_resolution, output_resolution)\n",
|
||||
" return torch.nn.functional.interpolate(image, size=(output_resolution, output_resolution))\n",
|
||||
"\n",
|
||||
"gm.eval()\n",
|
||||
"images = []\n",
|
||||
"expected = []\n",
|
||||
"masks = []\n",
|
||||
"masks_expected = []\n",
|
||||
"for frame in tqdm.tqdm(train_data_collated):\n",
|
||||
" with torch.no_grad():\n",
|
||||
" out = gm(**frame, evaluation_mode=EvaluationMode.EVALUATION)\n",
|
||||
"\n",
|
||||
" image_rgb = to_numpy_image(out[\"images_render\"][0])\n",
|
||||
" mask = to_numpy_image(out[\"masks_render\"][0])\n",
|
||||
" expd = to_numpy_image(resize_image(frame.image_rgb)[0])\n",
|
||||
" mask_expected = to_numpy_image(resize_image(frame.fg_probability)[0])\n",
|
||||
"\n",
|
||||
" images.append(image_rgb)\n",
|
||||
" masks.append(mask)\n",
|
||||
" expected.append(expd)\n",
|
||||
" masks_expected.append(mask_expected)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"customInput": null,
|
||||
"customOutput": null,
|
||||
"executionStartTime": 1659614622542,
|
||||
"executionStopTime": 1659614622757,
|
||||
"originalKey": "24953039-9780-40fd-bd81-5d63e9f40069",
|
||||
"requestMsgId": "7af895a3-dfe4-4c28-ac3b-4ff0fbb40c7f",
|
||||
"showInput": false
|
||||
},
|
||||
"source": [
|
||||
"We draw a grid showing predicted image and expected image, followed by predicted mask and expected mask, from each viewpoint. \n",
|
||||
"This is a grid of four rows of images, wrapped in to several large rows, i.e..\n",
|
||||
"<small><center>\n",
|
||||
"```\n",
|
||||
"┌────────┬────────┐ ┌────────┐\n",
|
||||
"│pred │pred │ │pred │\n",
|
||||
"│image │image │ │image │\n",
|
||||
"│1 │2 │ │n │\n",
|
||||
"├────────┼────────┤ ├────────┤\n",
|
||||
"│expected│expected│ │expected│\n",
|
||||
"│image │image │ ... │image │\n",
|
||||
"│1 │2 │ │n │\n",
|
||||
"├────────┼────────┤ ├────────┤\n",
|
||||
"│pred │pred │ │pred │\n",
|
||||
"│mask │mask │ │mask │\n",
|
||||
"│1 │2 │ │n │\n",
|
||||
"├────────┼────────┤ ├────────┤\n",
|
||||
"│expected│expected│ │expected│\n",
|
||||
"│mask │mask │ │mask │\n",
|
||||
"│1 │2 │ │n │\n",
|
||||
"├────────┼────────┤ ├────────┤\n",
|
||||
"│pred │pred │ │pred │\n",
|
||||
"│image │image │ │image │\n",
|
||||
"│n+1 │n+1 │ │2n │\n",
|
||||
"├────────┼────────┤ ├────────┤\n",
|
||||
"│expected│expected│ │expected│\n",
|
||||
"│image │image │ ... │image │\n",
|
||||
"│n+1 │n+2 │ │2n │\n",
|
||||
"├────────┼────────┤ ├────────┤\n",
|
||||
"│pred │pred │ │pred │\n",
|
||||
"│mask │mask │ │mask │\n",
|
||||
"│n+1 │n+2 │ │2n │\n",
|
||||
"├────────┼────────┤ ├────────┤\n",
|
||||
"│expected│expected│ │expected│\n",
|
||||
"│mask │mask │ │mask │\n",
|
||||
"│n+1 │n+2 │ │2n │\n",
|
||||
"└────────┴────────┘ └────────┘\n",
|
||||
" ...\n",
|
||||
"```\n",
|
||||
"</center></small>"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"code_folding": [],
|
||||
"collapsed": false,
|
||||
"customInput": null,
|
||||
"customOutput": null,
|
||||
"executionStartTime": 1659621313894,
|
||||
"executionStopTime": 1659621314042,
|
||||
"hidden_ranges": [],
|
||||
"originalKey": "c488a34a-e46d-4649-93fb-4b1bb5a0e439",
|
||||
"requestMsgId": "4221e632-fca1-4fe5-b2e3-f92c37aa40e4",
|
||||
"showInput": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"images_to_display = [images.copy(), expected.copy(), masks.copy(), masks_expected.copy()]\n",
|
||||
"n_rows = 4\n",
|
||||
"n_images = len(images)\n",
|
||||
"blank_image = images[0] * 0\n",
|
||||
"n_per_row = 1+(n_images-1)//n_rows\n",
|
||||
"for _ in range(n_per_row*n_rows - n_images):\n",
|
||||
" for group in images_to_display:\n",
|
||||
" group.append(blank_image)\n",
|
||||
"\n",
|
||||
"images_to_display_listed = [[[i] for i in j] for j in images_to_display]\n",
|
||||
"split = []\n",
|
||||
"for row in range(n_rows):\n",
|
||||
" for group in images_to_display_listed:\n",
|
||||
" split.append(group[row*n_per_row:(row+1)*n_per_row]) \n",
|
||||
"\n",
|
||||
"Image.fromarray(np.block(split))\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"code_folding": [],
|
||||
"collapsed": false,
|
||||
"customInput": null,
|
||||
"customOutput": null,
|
||||
"executionStartTime": 1659621323795,
|
||||
"executionStopTime": 1659621323820,
|
||||
"hidden_ranges": [],
|
||||
"originalKey": "49eab9e1-4fe2-4fbe-b4f3-7b6953340170",
|
||||
"requestMsgId": "85b402ad-f903-431f-a13e-c2d697e869bb",
|
||||
"showInput": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Print the maximum channel intensity in the first image.\n",
|
||||
"print(images[1].max()/255)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"code_folding": [],
|
||||
"collapsed": false,
|
||||
"customInput": null,
|
||||
"customOutput": null,
|
||||
"executionStartTime": 1659621408642,
|
||||
"executionStopTime": 1659621409559,
|
||||
"hidden_ranges": [],
|
||||
"originalKey": "137d2c43-d39d-4266-ac5e-2b714da5e0ee",
|
||||
"requestMsgId": "8e27ec57-c2d6-4ae0-be69-b63b6af929ff",
|
||||
"showInput": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"plt.ioff()\n",
|
||||
"fig, ax = plt.subplots(figsize=(3,3))\n",
|
||||
"\n",
|
||||
"ax.grid(None)\n",
|
||||
"ims = [[ax.imshow(im, animated=True)] for im in images]\n",
|
||||
"ani = animation.ArtistAnimation(fig, ims, interval=80, blit=True)\n",
|
||||
"ani_html = ani.to_jshtml()\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"customInput": null,
|
||||
"customOutput": null,
|
||||
"executionStartTime": 1659621409620,
|
||||
"executionStopTime": 1659621409725,
|
||||
"originalKey": "783e70d6-7cf1-4d76-a126-ba11ffc2f5be",
|
||||
"requestMsgId": "b6843506-c5fa-4508-80fc-8ecae51a934a",
|
||||
"showInput": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"HTML(ani_html)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {
|
||||
"collapsed": false,
|
||||
"customInput": null,
|
||||
"customOutput": null,
|
||||
"executionStartTime": 1659614670081,
|
||||
"executionStopTime": 1659614670168,
|
||||
"originalKey": "0286c350-2362-4f47-8181-2fc2ba51cfcf",
|
||||
"requestMsgId": "976f4db9-d4c7-466c-bcfd-218234400226",
|
||||
"showInput": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# If you want to see the output of the model with the volume forced to opaque white, run this and re-evaluate\n",
|
||||
"# with torch.no_grad():\n",
|
||||
"# gm._implicit_functions[0]._fn.density.fill_(9.0)\n",
|
||||
"# gm._implicit_functions[0]._fn.color.fill_(9.0)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"bento_stylesheets": {
|
||||
"bento/extensions/flow/main.css": true,
|
||||
"bento/extensions/kernel_selector/main.css": true,
|
||||
"bento/extensions/kernel_ui/main.css": true,
|
||||
"bento/extensions/new_kernel/main.css": true,
|
||||
"bento/extensions/system_usage/main.css": true,
|
||||
"bento/extensions/theme/main.css": true
|
||||
},
|
||||
"captumWidgetMessage": {},
|
||||
"dataExplorerConfig": {},
|
||||
"kernelspec": {
|
||||
"display_name": "pytorch3d",
|
||||
"language": "python",
|
||||
"metadata": {
|
||||
"cinder_runtime": false,
|
||||
"fbpkg_supported": true,
|
||||
"is_prebuilt": true,
|
||||
"kernel_name": "bento_kernel_pytorch3d",
|
||||
"nightly_builds": true
|
||||
},
|
||||
"name": "bento_kernel_pytorch3d"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3"
|
||||
},
|
||||
"last_base_url": "https://9177.od.fbinfra.net:443/",
|
||||
"last_kernel_id": "bb33cd83-7924-489a-8bd8-2d9d62eb0126",
|
||||
"last_msg_id": "99f7088e-d22b355b859660479ef0574e_5743",
|
||||
"last_server_session_id": "2944b203-9ea8-4c0e-9634-645dfea5f26b",
|
||||
"outputWidgetContext": {}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
||||
@@ -50,7 +50,7 @@
|
||||
"except ModuleNotFoundError:\n",
|
||||
" need_pytorch3d=True\n",
|
||||
"if need_pytorch3d:\n",
|
||||
" if torch.__version__.startswith(\"1.12.\") and sys.platform.startswith(\"linux\"):\n",
|
||||
" if torch.__version__.startswith(\"1.11.\") and sys.platform.startswith(\"linux\"):\n",
|
||||
" # We try to install PyTorch3D via a released wheel.\n",
|
||||
" pyt_version_str=torch.__version__.split(\"+\")[0].replace(\".\", \"\")\n",
|
||||
" version_str=\"\".join([\n",
|
||||
|
||||
@@ -57,7 +57,7 @@
|
||||
"except ModuleNotFoundError:\n",
|
||||
" need_pytorch3d=True\n",
|
||||
"if need_pytorch3d:\n",
|
||||
" if torch.__version__.startswith(\"1.12.\") and sys.platform.startswith(\"linux\"):\n",
|
||||
" if torch.__version__.startswith(\"1.11.\") and sys.platform.startswith(\"linux\"):\n",
|
||||
" # We try to install PyTorch3D via a released wheel.\n",
|
||||
" pyt_version_str=torch.__version__.split(\"+\")[0].replace(\".\", \"\")\n",
|
||||
" version_str=\"\".join([\n",
|
||||
|
||||
@@ -73,7 +73,7 @@
|
||||
"except ModuleNotFoundError:\n",
|
||||
" need_pytorch3d=True\n",
|
||||
"if need_pytorch3d:\n",
|
||||
" if torch.__version__.startswith(\"1.12.\") and sys.platform.startswith(\"linux\"):\n",
|
||||
" if torch.__version__.startswith(\"1.11.\") and sys.platform.startswith(\"linux\"):\n",
|
||||
" # We try to install PyTorch3D via a released wheel.\n",
|
||||
" pyt_version_str=torch.__version__.split(\"+\")[0].replace(\".\", \"\")\n",
|
||||
" version_str=\"\".join([\n",
|
||||
|
||||
@@ -44,8 +44,6 @@ def generate_cow_renders(
|
||||
data_dir: The folder that contains the cow mesh files. If the cow mesh
|
||||
files do not exist in the folder, this function will automatically
|
||||
download them.
|
||||
azimuth_range: number of degrees on each side of the start position to
|
||||
take samples
|
||||
|
||||
Returns:
|
||||
cameras: A batch of `num_views` `FoVPerspectiveCameras` from which the
|
||||
|
||||
@@ -47,6 +47,7 @@ test:
|
||||
- imageio
|
||||
- hydra-core
|
||||
- accelerate
|
||||
- lpips
|
||||
commands:
|
||||
#pytest .
|
||||
python -m unittest discover -v -s tests -t .
|
||||
|
||||
@@ -37,7 +37,7 @@ See [Running](#running) section below for examples of training and evaluation co
|
||||
|
||||
To plug in custom implementations, for example, of renderer or implicit-function protocols, you need to create your own runner script and import the plug-in implementations there.
|
||||
First, install PyTorch3D and Implicitron dependencies as described in the previous section.
|
||||
Then, implement the custom script; copying `pytorch3d/projects/implicitron_trainer` is a good place to start.
|
||||
Then, implement the custom script; copying `pytorch3d/projects/implicitron_trainer/experiment.py` is a good place to start.
|
||||
See [Custom plugins](#custom-plugins) for more information on how to import implementations and enable them in the configs.
|
||||
|
||||
|
||||
@@ -203,29 +203,14 @@ to replace the implementation and potentially override the parameters.
|
||||
|
||||
# Code and config structure
|
||||
|
||||
The main object for this trainer loop is `Experiment`. It has four top-level replaceable components.
|
||||
|
||||
* `data_source`: This is a `DataSourceBase` which defaults to `ImplicitronDataSource`.
|
||||
It constructs the data sets and dataloaders.
|
||||
* `model_factory`: This is a `ModelFactoryBase` which defaults to `ImplicitronModelFactory`.
|
||||
It constructs the model, which is usually an instance of implicitron's main `GenericModel` class, and can load its weights from a checkpoint.
|
||||
* `optimizer_factory`: This is an `OptimizerFactoryBase` which defaults to `ImplicitronOptimizerFactory`.
|
||||
It constructs the optimizer and can load its weights from a checkpoint.
|
||||
* `training_loop`: This is a `TrainingLoopBase` which defaults to `ImplicitronTrainingLoop` and defines the main training loop.
|
||||
|
||||
As per above, the config structure is parsed automatically from the module hierarchy.
|
||||
In particular, for ImplicitronModelFactory with generic model, model parameters are contained in the `model_factory_ImplicitronModelFactory_args.model_GenericModel_args` node, and dataset parameters in `data_source_ImplicitronDataSource_args` node.
|
||||
In particular, model parameters are contained in `generic_model_args` node, and dataset parameters in `data_source_args` node.
|
||||
|
||||
Here is the class structure of GenericModel (single-line edges show aggregation, while double lines show available implementations):
|
||||
Here is the class structure (single-line edges show aggregation, while double lines show available implementations):
|
||||
```
|
||||
model_GenericModel_args: GenericModel
|
||||
└-- global_encoder_*_args: GlobalEncoderBase
|
||||
╘== SequenceAutodecoder
|
||||
└-- autodecoder_args: Autodecoder
|
||||
╘== HarmonicTimeEncoder
|
||||
└-- raysampler_*_args: RaySampler
|
||||
╘== AdaptiveRaysampler
|
||||
╘== NearFarRaysampler
|
||||
generic_model_args: GenericModel
|
||||
└-- sequence_autodecoder_args: Autodecoder
|
||||
└-- raysampler_args: RaySampler
|
||||
└-- renderer_*_args: BaseRenderer
|
||||
╘== MultiPassEmissionAbsorptionRenderer
|
||||
╘== LSTMRenderer
|
||||
@@ -243,16 +228,19 @@ model_GenericModel_args: GenericModel
|
||||
╘== IdrFeatureField
|
||||
└-- image_feature_extractor_*_args: FeatureExtractorBase
|
||||
╘== ResNetFeatureExtractor
|
||||
└-- view_pooler_args: ViewPooler
|
||||
└-- view_sampler_args: ViewSampler
|
||||
└-- feature_aggregator_*_args: FeatureAggregatorBase
|
||||
╘== IdentityFeatureAggregator
|
||||
╘== AngleWeightedIdentityFeatureAggregator
|
||||
╘== AngleWeightedReductionFeatureAggregator
|
||||
╘== ReductionFeatureAggregator
|
||||
└-- view_sampler_args: ViewSampler
|
||||
└-- feature_aggregator_*_args: FeatureAggregatorBase
|
||||
╘== IdentityFeatureAggregator
|
||||
╘== AngleWeightedIdentityFeatureAggregator
|
||||
╘== AngleWeightedReductionFeatureAggregator
|
||||
╘== ReductionFeatureAggregator
|
||||
solver_args: init_optimizer
|
||||
data_source_args: ImplicitronDataSource
|
||||
└-- dataset_map_provider_*_args
|
||||
└-- data_loader_map_provider_*_args
|
||||
```
|
||||
|
||||
Please look at the annotations of the respective classes or functions for the lists of hyperparameters. `tests/experiment.yaml` shows every possible option if you have no user-defined classes.
|
||||
Please look at the annotations of the respective classes or functions for the lists of hyperparameters.
|
||||
|
||||
# Reproducing CO3D experiments
|
||||
|
||||
|
||||
@@ -2,11 +2,10 @@ defaults:
|
||||
- default_config
|
||||
- _self_
|
||||
exp_dir: ./data/exps/base/
|
||||
training_loop_ImplicitronTrainingLoop_args:
|
||||
visdom_port: 8097
|
||||
visualize_interval: 0
|
||||
max_epochs: 1000
|
||||
data_source_ImplicitronDataSource_args:
|
||||
architecture: generic
|
||||
visualize_interval: 0
|
||||
visdom_port: 8097
|
||||
data_source_args:
|
||||
data_loader_map_provider_class_type: SequenceDataLoaderMapProvider
|
||||
dataset_map_provider_class_type: JsonIndexDatasetMapProvider
|
||||
data_loader_map_provider_SequenceDataLoaderMapProvider_args:
|
||||
@@ -22,59 +21,55 @@ data_source_ImplicitronDataSource_args:
|
||||
load_point_clouds: false
|
||||
mask_depths: false
|
||||
mask_images: false
|
||||
model_factory_ImplicitronModelFactory_args:
|
||||
model_GenericModel_args:
|
||||
loss_weights:
|
||||
loss_mask_bce: 1.0
|
||||
loss_prev_stage_mask_bce: 1.0
|
||||
loss_autodecoder_norm: 0.01
|
||||
loss_rgb_mse: 1.0
|
||||
loss_prev_stage_rgb_mse: 1.0
|
||||
output_rasterized_mc: false
|
||||
chunk_size_grid: 102400
|
||||
render_image_height: 400
|
||||
render_image_width: 400
|
||||
num_passes: 2
|
||||
implicit_function_NeuralRadianceFieldImplicitFunction_args:
|
||||
n_harmonic_functions_xyz: 10
|
||||
n_harmonic_functions_dir: 4
|
||||
n_hidden_neurons_xyz: 256
|
||||
n_hidden_neurons_dir: 128
|
||||
n_layers_xyz: 8
|
||||
append_xyz:
|
||||
- 5
|
||||
raysampler_AdaptiveRaySampler_args:
|
||||
n_rays_per_image_sampled_from_mask: 1024
|
||||
scene_extent: 8.0
|
||||
n_pts_per_ray_training: 64
|
||||
n_pts_per_ray_evaluation: 64
|
||||
stratified_point_sampling_training: true
|
||||
stratified_point_sampling_evaluation: false
|
||||
renderer_MultiPassEmissionAbsorptionRenderer_args:
|
||||
n_pts_per_ray_fine_training: 64
|
||||
n_pts_per_ray_fine_evaluation: 64
|
||||
append_coarse_samples_to_fine: true
|
||||
density_noise_std_train: 1.0
|
||||
view_pooler_args:
|
||||
view_sampler_args:
|
||||
masked_sampling: false
|
||||
image_feature_extractor_ResNetFeatureExtractor_args:
|
||||
stages:
|
||||
- 1
|
||||
- 2
|
||||
- 3
|
||||
- 4
|
||||
proj_dim: 16
|
||||
image_rescale: 0.32
|
||||
first_max_pool: false
|
||||
optimizer_factory_ImplicitronOptimizerFactory_args:
|
||||
breed: Adam
|
||||
weight_decay: 0.0
|
||||
lr_policy: MultiStepLR
|
||||
multistep_lr_milestones: []
|
||||
generic_model_args:
|
||||
loss_weights:
|
||||
loss_mask_bce: 1.0
|
||||
loss_prev_stage_mask_bce: 1.0
|
||||
loss_autodecoder_norm: 0.01
|
||||
loss_rgb_mse: 1.0
|
||||
loss_prev_stage_rgb_mse: 1.0
|
||||
output_rasterized_mc: false
|
||||
chunk_size_grid: 102400
|
||||
render_image_height: 400
|
||||
render_image_width: 400
|
||||
num_passes: 2
|
||||
implicit_function_NeuralRadianceFieldImplicitFunction_args:
|
||||
n_harmonic_functions_xyz: 10
|
||||
n_harmonic_functions_dir: 4
|
||||
n_hidden_neurons_xyz: 256
|
||||
n_hidden_neurons_dir: 128
|
||||
n_layers_xyz: 8
|
||||
append_xyz:
|
||||
- 5
|
||||
latent_dim: 0
|
||||
raysampler_AdaptiveRaySampler_args:
|
||||
n_rays_per_image_sampled_from_mask: 1024
|
||||
scene_extent: 8.0
|
||||
n_pts_per_ray_training: 64
|
||||
n_pts_per_ray_evaluation: 64
|
||||
stratified_point_sampling_training: true
|
||||
stratified_point_sampling_evaluation: false
|
||||
renderer_MultiPassEmissionAbsorptionRenderer_args:
|
||||
n_pts_per_ray_fine_training: 64
|
||||
n_pts_per_ray_fine_evaluation: 64
|
||||
append_coarse_samples_to_fine: true
|
||||
density_noise_std_train: 1.0
|
||||
view_pooler_args:
|
||||
view_sampler_args:
|
||||
masked_sampling: false
|
||||
image_feature_extractor_ResNetFeatureExtractor_args:
|
||||
stages:
|
||||
- 1
|
||||
- 2
|
||||
- 3
|
||||
- 4
|
||||
proj_dim: 16
|
||||
image_rescale: 0.32
|
||||
first_max_pool: false
|
||||
solver_args:
|
||||
breed: adam
|
||||
lr: 0.0005
|
||||
gamma: 0.1
|
||||
lr_policy: multistep
|
||||
max_epochs: 2000
|
||||
momentum: 0.9
|
||||
betas:
|
||||
- 0.9
|
||||
- 0.999
|
||||
weight_decay: 0.0
|
||||
|
||||
@@ -1,18 +1,17 @@
|
||||
model_factory_ImplicitronModelFactory_args:
|
||||
model_GenericModel_args:
|
||||
image_feature_extractor_class_type: ResNetFeatureExtractor
|
||||
image_feature_extractor_ResNetFeatureExtractor_args:
|
||||
add_images: true
|
||||
add_masks: true
|
||||
first_max_pool: true
|
||||
image_rescale: 0.375
|
||||
l2_norm: true
|
||||
name: resnet34
|
||||
normalize_image: true
|
||||
pretrained: true
|
||||
stages:
|
||||
- 1
|
||||
- 2
|
||||
- 3
|
||||
- 4
|
||||
proj_dim: 32
|
||||
generic_model_args:
|
||||
image_feature_extractor_class_type: ResNetFeatureExtractor
|
||||
image_feature_extractor_ResNetFeatureExtractor_args:
|
||||
add_images: true
|
||||
add_masks: true
|
||||
first_max_pool: true
|
||||
image_rescale: 0.375
|
||||
l2_norm: true
|
||||
name: resnet34
|
||||
normalize_image: true
|
||||
pretrained: true
|
||||
stages:
|
||||
- 1
|
||||
- 2
|
||||
- 3
|
||||
- 4
|
||||
proj_dim: 32
|
||||
|
||||
@@ -1,18 +1,17 @@
|
||||
model_factory_ImplicitronModelFactory_args:
|
||||
model_GenericModel_args:
|
||||
image_feature_extractor_class_type: ResNetFeatureExtractor
|
||||
image_feature_extractor_ResNetFeatureExtractor_args:
|
||||
add_images: true
|
||||
add_masks: true
|
||||
first_max_pool: false
|
||||
image_rescale: 0.375
|
||||
l2_norm: true
|
||||
name: resnet34
|
||||
normalize_image: true
|
||||
pretrained: true
|
||||
stages:
|
||||
- 1
|
||||
- 2
|
||||
- 3
|
||||
- 4
|
||||
proj_dim: 16
|
||||
generic_model_args:
|
||||
image_feature_extractor_class_type: ResNetFeatureExtractor
|
||||
image_feature_extractor_ResNetFeatureExtractor_args:
|
||||
add_images: true
|
||||
add_masks: true
|
||||
first_max_pool: false
|
||||
image_rescale: 0.375
|
||||
l2_norm: true
|
||||
name: resnet34
|
||||
normalize_image: true
|
||||
pretrained: true
|
||||
stages:
|
||||
- 1
|
||||
- 2
|
||||
- 3
|
||||
- 4
|
||||
proj_dim: 16
|
||||
|
||||
@@ -1,19 +1,18 @@
|
||||
model_factory_ImplicitronModelFactory_args:
|
||||
model_GenericModel_args:
|
||||
image_feature_extractor_class_type: ResNetFeatureExtractor
|
||||
image_feature_extractor_ResNetFeatureExtractor_args:
|
||||
stages:
|
||||
- 1
|
||||
- 2
|
||||
- 3
|
||||
first_max_pool: false
|
||||
proj_dim: -1
|
||||
l2_norm: false
|
||||
image_rescale: 0.375
|
||||
name: resnet34
|
||||
normalize_image: true
|
||||
pretrained: true
|
||||
view_pooler_args:
|
||||
feature_aggregator_AngleWeightedReductionFeatureAggregator_args:
|
||||
reduction_functions:
|
||||
- AVG
|
||||
generic_model_args:
|
||||
image_feature_extractor_class_type: ResNetFeatureExtractor
|
||||
image_feature_extractor_ResNetFeatureExtractor_args:
|
||||
stages:
|
||||
- 1
|
||||
- 2
|
||||
- 3
|
||||
first_max_pool: false
|
||||
proj_dim: -1
|
||||
l2_norm: false
|
||||
image_rescale: 0.375
|
||||
name: resnet34
|
||||
normalize_image: true
|
||||
pretrained: true
|
||||
view_pooler_args:
|
||||
feature_aggregator_AngleWeightedReductionFeatureAggregator_args:
|
||||
reduction_functions:
|
||||
- AVG
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
defaults:
|
||||
- repro_base.yaml
|
||||
- _self_
|
||||
data_source_ImplicitronDataSource_args:
|
||||
data_source_args:
|
||||
data_loader_map_provider_SequenceDataLoaderMapProvider_args:
|
||||
batch_size: 10
|
||||
dataset_length_train: 1000
|
||||
@@ -26,13 +26,10 @@ data_source_ImplicitronDataSource_args:
|
||||
n_frames_per_sequence: -1
|
||||
test_on_train: true
|
||||
test_restrict_sequence_id: 0
|
||||
optimizer_factory_ImplicitronOptimizerFactory_args:
|
||||
multistep_lr_milestones:
|
||||
- 1000
|
||||
training_loop_ImplicitronTrainingLoop_args:
|
||||
solver_args:
|
||||
max_epochs: 3000
|
||||
evaluator_ImplicitronEvaluator_args:
|
||||
camera_difficulty_bin_breaks:
|
||||
- 0.666667
|
||||
- 0.833334
|
||||
is_multisequence: true
|
||||
milestones:
|
||||
- 1000
|
||||
camera_difficulty_bin_breaks:
|
||||
- 0.666667
|
||||
- 0.833334
|
||||
|
||||
@@ -1,65 +1,65 @@
|
||||
defaults:
|
||||
- repro_multiseq_base.yaml
|
||||
- _self_
|
||||
model_factory_ImplicitronModelFactory_args:
|
||||
model_GenericModel_args:
|
||||
loss_weights:
|
||||
loss_mask_bce: 100.0
|
||||
loss_kl: 0.0
|
||||
loss_rgb_mse: 1.0
|
||||
loss_eikonal: 0.1
|
||||
chunk_size_grid: 65536
|
||||
num_passes: 1
|
||||
output_rasterized_mc: true
|
||||
sampling_mode_training: mask_sample
|
||||
global_encoder_class_type: SequenceAutodecoder
|
||||
global_encoder_SequenceAutodecoder_args:
|
||||
autodecoder_args:
|
||||
n_instances: 20000
|
||||
init_scale: 1.0
|
||||
encoding_dim: 256
|
||||
implicit_function_IdrFeatureField_args:
|
||||
n_harmonic_functions_xyz: 6
|
||||
bias: 0.6
|
||||
d_in: 3
|
||||
d_out: 1
|
||||
generic_model_args:
|
||||
loss_weights:
|
||||
loss_mask_bce: 100.0
|
||||
loss_kl: 0.0
|
||||
loss_rgb_mse: 1.0
|
||||
loss_eikonal: 0.1
|
||||
chunk_size_grid: 65536
|
||||
num_passes: 1
|
||||
output_rasterized_mc: true
|
||||
sampling_mode_training: mask_sample
|
||||
global_encoder_class_type: SequenceAutodecoder
|
||||
global_encoder_SequenceAutodecoder_args:
|
||||
autodecoder_args:
|
||||
n_instances: 20000
|
||||
init_scale: 1.0
|
||||
encoding_dim: 256
|
||||
implicit_function_IdrFeatureField_args:
|
||||
n_harmonic_functions_xyz: 6
|
||||
bias: 0.6
|
||||
d_in: 3
|
||||
d_out: 1
|
||||
dims:
|
||||
- 512
|
||||
- 512
|
||||
- 512
|
||||
- 512
|
||||
- 512
|
||||
- 512
|
||||
- 512
|
||||
- 512
|
||||
geometric_init: true
|
||||
pooled_feature_dim: 0
|
||||
skip_in:
|
||||
- 6
|
||||
weight_norm: true
|
||||
renderer_SignedDistanceFunctionRenderer_args:
|
||||
ray_tracer_args:
|
||||
line_search_step: 0.5
|
||||
line_step_iters: 3
|
||||
n_secant_steps: 8
|
||||
n_steps: 100
|
||||
object_bounding_sphere: 8.0
|
||||
sdf_threshold: 5.0e-05
|
||||
ray_normal_coloring_network_args:
|
||||
d_in: 9
|
||||
d_out: 3
|
||||
dims:
|
||||
- 512
|
||||
- 512
|
||||
- 512
|
||||
- 512
|
||||
- 512
|
||||
- 512
|
||||
- 512
|
||||
- 512
|
||||
geometric_init: true
|
||||
mode: idr
|
||||
n_harmonic_functions_dir: 4
|
||||
pooled_feature_dim: 0
|
||||
skip_in:
|
||||
- 6
|
||||
weight_norm: true
|
||||
renderer_SignedDistanceFunctionRenderer_args:
|
||||
ray_tracer_args:
|
||||
line_search_step: 0.5
|
||||
line_step_iters: 3
|
||||
n_secant_steps: 8
|
||||
n_steps: 100
|
||||
sdf_threshold: 5.0e-05
|
||||
ray_normal_coloring_network_args:
|
||||
d_in: 9
|
||||
d_out: 3
|
||||
dims:
|
||||
- 512
|
||||
- 512
|
||||
- 512
|
||||
- 512
|
||||
mode: idr
|
||||
n_harmonic_functions_dir: 4
|
||||
pooled_feature_dim: 0
|
||||
weight_norm: true
|
||||
raysampler_AdaptiveRaySampler_args:
|
||||
n_rays_per_image_sampled_from_mask: 1024
|
||||
n_pts_per_ray_training: 0
|
||||
n_pts_per_ray_evaluation: 0
|
||||
scene_extent: 8.0
|
||||
renderer_class_type: SignedDistanceFunctionRenderer
|
||||
implicit_function_class_type: IdrFeatureField
|
||||
raysampler_AdaptiveRaySampler_args:
|
||||
n_rays_per_image_sampled_from_mask: 1024
|
||||
n_pts_per_ray_training: 0
|
||||
n_pts_per_ray_evaluation: 0
|
||||
scene_extent: 8.0
|
||||
renderer_class_type: SignedDistanceFunctionRenderer
|
||||
implicit_function_class_type: IdrFeatureField
|
||||
|
||||
@@ -1,12 +1,11 @@
|
||||
defaults:
|
||||
- repro_multiseq_base.yaml
|
||||
- _self_
|
||||
model_factory_ImplicitronModelFactory_args:
|
||||
model_GenericModel_args:
|
||||
chunk_size_grid: 16000
|
||||
view_pooler_enabled: false
|
||||
global_encoder_class_type: SequenceAutodecoder
|
||||
global_encoder_SequenceAutodecoder_args:
|
||||
autodecoder_args:
|
||||
n_instances: 20000
|
||||
encoding_dim: 256
|
||||
generic_model_args:
|
||||
chunk_size_grid: 16000
|
||||
view_pooler_enabled: false
|
||||
global_encoder_class_type: SequenceAutodecoder
|
||||
global_encoder_SequenceAutodecoder_args:
|
||||
autodecoder_args:
|
||||
n_instances: 20000
|
||||
encoding_dim: 256
|
||||
|
||||
@@ -2,11 +2,9 @@ defaults:
|
||||
- repro_multiseq_base.yaml
|
||||
- repro_feat_extractor_unnormed.yaml
|
||||
- _self_
|
||||
model_factory_ImplicitronModelFactory_args:
|
||||
model_GenericModel_args:
|
||||
chunk_size_grid: 16000
|
||||
view_pooler_enabled: true
|
||||
raysampler_AdaptiveRaySampler_args:
|
||||
n_rays_per_image_sampled_from_mask: 850
|
||||
training_loop_ImplicitronTrainingLoop_args:
|
||||
clip_grad: 1.0
|
||||
clip_grad: 1.0
|
||||
generic_model_args:
|
||||
chunk_size_grid: 16000
|
||||
view_pooler_enabled: true
|
||||
raysampler_AdaptiveRaySampler_args:
|
||||
n_rays_per_image_sampled_from_mask: 850
|
||||
|
||||
@@ -2,17 +2,16 @@ defaults:
|
||||
- repro_multiseq_base.yaml
|
||||
- repro_feat_extractor_transformer.yaml
|
||||
- _self_
|
||||
model_factory_ImplicitronModelFactory_args:
|
||||
model_GenericModel_args:
|
||||
chunk_size_grid: 16000
|
||||
raysampler_AdaptiveRaySampler_args:
|
||||
n_rays_per_image_sampled_from_mask: 800
|
||||
n_pts_per_ray_training: 32
|
||||
n_pts_per_ray_evaluation: 32
|
||||
renderer_MultiPassEmissionAbsorptionRenderer_args:
|
||||
n_pts_per_ray_fine_training: 16
|
||||
n_pts_per_ray_fine_evaluation: 16
|
||||
implicit_function_class_type: NeRFormerImplicitFunction
|
||||
view_pooler_enabled: true
|
||||
view_pooler_args:
|
||||
feature_aggregator_class_type: IdentityFeatureAggregator
|
||||
generic_model_args:
|
||||
chunk_size_grid: 16000
|
||||
raysampler_AdaptiveRaySampler_args:
|
||||
n_rays_per_image_sampled_from_mask: 800
|
||||
n_pts_per_ray_training: 32
|
||||
n_pts_per_ray_evaluation: 32
|
||||
renderer_MultiPassEmissionAbsorptionRenderer_args:
|
||||
n_pts_per_ray_fine_training: 16
|
||||
n_pts_per_ray_fine_evaluation: 16
|
||||
implicit_function_class_type: NeRFormerImplicitFunction
|
||||
view_pooler_enabled: true
|
||||
view_pooler_args:
|
||||
feature_aggregator_class_type: IdentityFeatureAggregator
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
defaults:
|
||||
- repro_multiseq_nerformer.yaml
|
||||
- _self_
|
||||
model_factory_ImplicitronModelFactory_args:
|
||||
model_GenericModel_args:
|
||||
view_pooler_args:
|
||||
feature_aggregator_class_type: AngleWeightedIdentityFeatureAggregator
|
||||
generic_model_args:
|
||||
view_pooler_args:
|
||||
feature_aggregator_class_type: AngleWeightedIdentityFeatureAggregator
|
||||
|
||||
@@ -1,35 +1,34 @@
|
||||
defaults:
|
||||
- repro_multiseq_base.yaml
|
||||
- _self_
|
||||
model_factory_ImplicitronModelFactory_args:
|
||||
model_GenericModel_args:
|
||||
chunk_size_grid: 16000
|
||||
view_pooler_enabled: false
|
||||
n_train_target_views: -1
|
||||
num_passes: 1
|
||||
loss_weights:
|
||||
loss_rgb_mse: 200.0
|
||||
loss_prev_stage_rgb_mse: 0.0
|
||||
loss_mask_bce: 1.0
|
||||
loss_prev_stage_mask_bce: 0.0
|
||||
loss_autodecoder_norm: 0.001
|
||||
depth_neg_penalty: 10000.0
|
||||
global_encoder_class_type: SequenceAutodecoder
|
||||
global_encoder_SequenceAutodecoder_args:
|
||||
autodecoder_args:
|
||||
encoding_dim: 256
|
||||
n_instances: 20000
|
||||
raysampler_class_type: NearFarRaySampler
|
||||
raysampler_NearFarRaySampler_args:
|
||||
n_rays_per_image_sampled_from_mask: 2048
|
||||
min_depth: 0.05
|
||||
max_depth: 0.05
|
||||
n_pts_per_ray_training: 1
|
||||
n_pts_per_ray_evaluation: 1
|
||||
stratified_point_sampling_training: false
|
||||
stratified_point_sampling_evaluation: false
|
||||
renderer_class_type: LSTMRenderer
|
||||
implicit_function_class_type: SRNHyperNetImplicitFunction
|
||||
optimizer_factory_ImplicitronOptimizerFactory_args:
|
||||
breed: Adam
|
||||
generic_model_args:
|
||||
chunk_size_grid: 16000
|
||||
view_pooler_enabled: false
|
||||
n_train_target_views: -1
|
||||
num_passes: 1
|
||||
loss_weights:
|
||||
loss_rgb_mse: 200.0
|
||||
loss_prev_stage_rgb_mse: 0.0
|
||||
loss_mask_bce: 1.0
|
||||
loss_prev_stage_mask_bce: 0.0
|
||||
loss_autodecoder_norm: 0.001
|
||||
depth_neg_penalty: 10000.0
|
||||
global_encoder_class_type: SequenceAutodecoder
|
||||
global_encoder_SequenceAutodecoder_args:
|
||||
autodecoder_args:
|
||||
encoding_dim: 256
|
||||
n_instances: 20000
|
||||
raysampler_class_type: NearFarRaySampler
|
||||
raysampler_NearFarRaySampler_args:
|
||||
n_rays_per_image_sampled_from_mask: 2048
|
||||
min_depth: 0.05
|
||||
max_depth: 0.05
|
||||
n_pts_per_ray_training: 1
|
||||
n_pts_per_ray_evaluation: 1
|
||||
stratified_point_sampling_training: false
|
||||
stratified_point_sampling_evaluation: false
|
||||
renderer_class_type: LSTMRenderer
|
||||
implicit_function_class_type: SRNHyperNetImplicitFunction
|
||||
solver_args:
|
||||
breed: adam
|
||||
lr: 5.0e-05
|
||||
|
||||
@@ -1,11 +1,10 @@
|
||||
defaults:
|
||||
- repro_multiseq_srn_ad_hypernet.yaml
|
||||
- _self_
|
||||
model_factory_ImplicitronModelFactory_args:
|
||||
model_GenericModel_args:
|
||||
num_passes: 1
|
||||
implicit_function_SRNHyperNetImplicitFunction_args:
|
||||
pixel_generator_args:
|
||||
n_harmonic_functions: 0
|
||||
hypernet_args:
|
||||
n_harmonic_functions: 0
|
||||
generic_model_args:
|
||||
num_passes: 1
|
||||
implicit_function_SRNHyperNetImplicitFunction_args:
|
||||
pixel_generator_args:
|
||||
n_harmonic_functions: 0
|
||||
hypernet_args:
|
||||
n_harmonic_functions: 0
|
||||
|
||||
@@ -2,30 +2,29 @@ defaults:
|
||||
- repro_multiseq_base.yaml
|
||||
- repro_feat_extractor_normed.yaml
|
||||
- _self_
|
||||
model_factory_ImplicitronModelFactory_args:
|
||||
model_GenericModel_args:
|
||||
chunk_size_grid: 32000
|
||||
num_passes: 1
|
||||
n_train_target_views: -1
|
||||
loss_weights:
|
||||
loss_rgb_mse: 200.0
|
||||
loss_prev_stage_rgb_mse: 0.0
|
||||
loss_mask_bce: 1.0
|
||||
loss_prev_stage_mask_bce: 0.0
|
||||
loss_autodecoder_norm: 0.0
|
||||
depth_neg_penalty: 10000.0
|
||||
raysampler_class_type: NearFarRaySampler
|
||||
raysampler_NearFarRaySampler_args:
|
||||
n_rays_per_image_sampled_from_mask: 2048
|
||||
min_depth: 0.05
|
||||
max_depth: 0.05
|
||||
n_pts_per_ray_training: 1
|
||||
n_pts_per_ray_evaluation: 1
|
||||
stratified_point_sampling_training: false
|
||||
stratified_point_sampling_evaluation: false
|
||||
renderer_class_type: LSTMRenderer
|
||||
implicit_function_class_type: SRNImplicitFunction
|
||||
view_pooler_enabled: true
|
||||
optimizer_factory_ImplicitronOptimizerFactory_args:
|
||||
breed: Adam
|
||||
generic_model_args:
|
||||
chunk_size_grid: 32000
|
||||
num_passes: 1
|
||||
n_train_target_views: -1
|
||||
loss_weights:
|
||||
loss_rgb_mse: 200.0
|
||||
loss_prev_stage_rgb_mse: 0.0
|
||||
loss_mask_bce: 1.0
|
||||
loss_prev_stage_mask_bce: 0.0
|
||||
loss_autodecoder_norm: 0.0
|
||||
depth_neg_penalty: 10000.0
|
||||
raysampler_class_type: NearFarRaySampler
|
||||
raysampler_NearFarRaySampler_args:
|
||||
n_rays_per_image_sampled_from_mask: 2048
|
||||
min_depth: 0.05
|
||||
max_depth: 0.05
|
||||
n_pts_per_ray_training: 1
|
||||
n_pts_per_ray_evaluation: 1
|
||||
stratified_point_sampling_training: false
|
||||
stratified_point_sampling_evaluation: false
|
||||
renderer_class_type: LSTMRenderer
|
||||
implicit_function_class_type: SRNImplicitFunction
|
||||
view_pooler_enabled: true
|
||||
solver_args:
|
||||
breed: adam
|
||||
lr: 5.0e-05
|
||||
|
||||
@@ -1,11 +1,10 @@
|
||||
defaults:
|
||||
- repro_multiseq_srn_wce.yaml
|
||||
- _self_
|
||||
model_factory_ImplicitronModelFactory_args:
|
||||
model_GenericModel_args:
|
||||
num_passes: 1
|
||||
implicit_function_SRNImplicitFunction_args:
|
||||
pixel_generator_args:
|
||||
n_harmonic_functions: 0
|
||||
raymarch_function_args:
|
||||
n_harmonic_functions: 0
|
||||
generic_model_args:
|
||||
num_passes: 1
|
||||
implicit_function_SRNImplicitFunction_args:
|
||||
pixel_generator_args:
|
||||
n_harmonic_functions: 0
|
||||
raymarch_function_args:
|
||||
n_harmonic_functions: 0
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
defaults:
|
||||
- repro_base
|
||||
- _self_
|
||||
data_source_ImplicitronDataSource_args:
|
||||
data_source_args:
|
||||
data_loader_map_provider_SequenceDataLoaderMapProvider_args:
|
||||
batch_size: 1
|
||||
dataset_length_train: 1000
|
||||
@@ -12,30 +12,28 @@ data_source_ImplicitronDataSource_args:
|
||||
n_frames_per_sequence: -1
|
||||
test_restrict_sequence_id: 0
|
||||
test_on_train: false
|
||||
model_factory_ImplicitronModelFactory_args:
|
||||
model_GenericModel_args:
|
||||
render_image_height: 800
|
||||
render_image_width: 800
|
||||
log_vars:
|
||||
- loss_rgb_psnr_fg
|
||||
- loss_rgb_psnr
|
||||
- loss_eikonal
|
||||
- loss_prev_stage_rgb_psnr
|
||||
- loss_mask_bce
|
||||
- loss_prev_stage_mask_bce
|
||||
- loss_rgb_mse
|
||||
- loss_prev_stage_rgb_mse
|
||||
- loss_depth_abs
|
||||
- loss_depth_abs_fg
|
||||
- loss_kl
|
||||
- loss_mask_neg_iou
|
||||
- objective
|
||||
- epoch
|
||||
- sec/it
|
||||
optimizer_factory_ImplicitronOptimizerFactory_args:
|
||||
generic_model_args:
|
||||
render_image_height: 800
|
||||
render_image_width: 800
|
||||
log_vars:
|
||||
- loss_rgb_psnr_fg
|
||||
- loss_rgb_psnr
|
||||
- loss_eikonal
|
||||
- loss_prev_stage_rgb_psnr
|
||||
- loss_mask_bce
|
||||
- loss_prev_stage_mask_bce
|
||||
- loss_rgb_mse
|
||||
- loss_prev_stage_rgb_mse
|
||||
- loss_depth_abs
|
||||
- loss_depth_abs_fg
|
||||
- loss_kl
|
||||
- loss_mask_neg_iou
|
||||
- objective
|
||||
- epoch
|
||||
- sec/it
|
||||
solver_args:
|
||||
lr: 0.0005
|
||||
multistep_lr_milestones:
|
||||
max_epochs: 400
|
||||
milestones:
|
||||
- 200
|
||||
- 300
|
||||
training_loop_ImplicitronTrainingLoop_args:
|
||||
max_epochs: 400
|
||||
|
||||
@@ -1,57 +1,57 @@
|
||||
defaults:
|
||||
- repro_singleseq_base
|
||||
- _self_
|
||||
model_factory_ImplicitronModelFactory_args:
|
||||
model_GenericModel_args:
|
||||
loss_weights:
|
||||
loss_mask_bce: 100.0
|
||||
loss_kl: 0.0
|
||||
loss_rgb_mse: 1.0
|
||||
loss_eikonal: 0.1
|
||||
chunk_size_grid: 65536
|
||||
num_passes: 1
|
||||
view_pooler_enabled: false
|
||||
implicit_function_IdrFeatureField_args:
|
||||
n_harmonic_functions_xyz: 6
|
||||
bias: 0.6
|
||||
d_in: 3
|
||||
d_out: 1
|
||||
generic_model_args:
|
||||
loss_weights:
|
||||
loss_mask_bce: 100.0
|
||||
loss_kl: 0.0
|
||||
loss_rgb_mse: 1.0
|
||||
loss_eikonal: 0.1
|
||||
chunk_size_grid: 65536
|
||||
num_passes: 1
|
||||
view_pooler_enabled: false
|
||||
implicit_function_IdrFeatureField_args:
|
||||
n_harmonic_functions_xyz: 6
|
||||
bias: 0.6
|
||||
d_in: 3
|
||||
d_out: 1
|
||||
dims:
|
||||
- 512
|
||||
- 512
|
||||
- 512
|
||||
- 512
|
||||
- 512
|
||||
- 512
|
||||
- 512
|
||||
- 512
|
||||
geometric_init: true
|
||||
pooled_feature_dim: 0
|
||||
skip_in:
|
||||
- 6
|
||||
weight_norm: true
|
||||
renderer_SignedDistanceFunctionRenderer_args:
|
||||
ray_tracer_args:
|
||||
line_search_step: 0.5
|
||||
line_step_iters: 3
|
||||
n_secant_steps: 8
|
||||
n_steps: 100
|
||||
object_bounding_sphere: 8.0
|
||||
sdf_threshold: 5.0e-05
|
||||
ray_normal_coloring_network_args:
|
||||
d_in: 9
|
||||
d_out: 3
|
||||
dims:
|
||||
- 512
|
||||
- 512
|
||||
- 512
|
||||
- 512
|
||||
- 512
|
||||
- 512
|
||||
- 512
|
||||
- 512
|
||||
geometric_init: true
|
||||
mode: idr
|
||||
n_harmonic_functions_dir: 4
|
||||
pooled_feature_dim: 0
|
||||
skip_in:
|
||||
- 6
|
||||
weight_norm: true
|
||||
renderer_SignedDistanceFunctionRenderer_args:
|
||||
ray_tracer_args:
|
||||
line_search_step: 0.5
|
||||
line_step_iters: 3
|
||||
n_secant_steps: 8
|
||||
n_steps: 100
|
||||
sdf_threshold: 5.0e-05
|
||||
ray_normal_coloring_network_args:
|
||||
d_in: 9
|
||||
d_out: 3
|
||||
dims:
|
||||
- 512
|
||||
- 512
|
||||
- 512
|
||||
- 512
|
||||
mode: idr
|
||||
n_harmonic_functions_dir: 4
|
||||
pooled_feature_dim: 0
|
||||
weight_norm: true
|
||||
raysampler_AdaptiveRaySampler_args:
|
||||
n_rays_per_image_sampled_from_mask: 1024
|
||||
n_pts_per_ray_training: 0
|
||||
n_pts_per_ray_evaluation: 0
|
||||
renderer_class_type: SignedDistanceFunctionRenderer
|
||||
implicit_function_class_type: IdrFeatureField
|
||||
raysampler_AdaptiveRaySampler_args:
|
||||
n_rays_per_image_sampled_from_mask: 1024
|
||||
n_pts_per_ray_training: 0
|
||||
n_pts_per_ray_evaluation: 0
|
||||
renderer_class_type: SignedDistanceFunctionRenderer
|
||||
implicit_function_class_type: IdrFeatureField
|
||||
|
||||
@@ -1,48 +0,0 @@
|
||||
defaults:
|
||||
- repro_singleseq_base
|
||||
- _self_
|
||||
exp_dir: "./data/nerf_blender_repro/${oc.env:BLENDER_SINGLESEQ_CLASS}"
|
||||
data_source_ImplicitronDataSource_args:
|
||||
data_loader_map_provider_SequenceDataLoaderMapProvider_args:
|
||||
dataset_length_train: 100
|
||||
dataset_map_provider_class_type: BlenderDatasetMapProvider
|
||||
dataset_map_provider_BlenderDatasetMapProvider_args:
|
||||
base_dir: ${oc.env:BLENDER_DATASET_ROOT}
|
||||
n_known_frames_for_test: null
|
||||
object_name: ${oc.env:BLENDER_SINGLESEQ_CLASS}
|
||||
path_manager_factory_class_type: PathManagerFactory
|
||||
path_manager_factory_PathManagerFactory_args:
|
||||
silence_logs: true
|
||||
|
||||
model_factory_ImplicitronModelFactory_args:
|
||||
model_GenericModel_args:
|
||||
mask_images: false
|
||||
raysampler_class_type: NearFarRaySampler
|
||||
raysampler_NearFarRaySampler_args:
|
||||
n_rays_per_image_sampled_from_mask: 4096
|
||||
min_depth: 2
|
||||
max_depth: 6
|
||||
renderer_MultiPassEmissionAbsorptionRenderer_args:
|
||||
density_noise_std_train: 1.0
|
||||
n_pts_per_ray_fine_training: 128
|
||||
n_pts_per_ray_fine_evaluation: 128
|
||||
raymarcher_EmissionAbsorptionRaymarcher_args:
|
||||
blend_output: false
|
||||
loss_weights:
|
||||
loss_rgb_mse: 1.0
|
||||
loss_prev_stage_rgb_mse: 1.0
|
||||
loss_mask_bce: 0.0
|
||||
loss_prev_stage_mask_bce: 0.0
|
||||
loss_autodecoder_norm: 0.00
|
||||
|
||||
optimizer_factory_ImplicitronOptimizerFactory_args:
|
||||
exponential_lr_step_size: 3001
|
||||
lr_policy: LinearExponential
|
||||
linear_exponential_lr_milestone: 200
|
||||
|
||||
training_loop_ImplicitronTrainingLoop_args:
|
||||
max_epochs: 3201
|
||||
metric_print_interval: 10
|
||||
store_checkpoints_purge: 3
|
||||
test_when_finished: true
|
||||
validation_interval: 100
|
||||
@@ -2,9 +2,8 @@ defaults:
|
||||
- repro_singleseq_wce_base.yaml
|
||||
- repro_feat_extractor_unnormed.yaml
|
||||
- _self_
|
||||
model_factory_ImplicitronModelFactory_args:
|
||||
model_GenericModel_args:
|
||||
chunk_size_grid: 16000
|
||||
view_pooler_enabled: true
|
||||
raysampler_AdaptiveRaySampler_args:
|
||||
n_rays_per_image_sampled_from_mask: 850
|
||||
generic_model_args:
|
||||
chunk_size_grid: 16000
|
||||
view_pooler_enabled: true
|
||||
raysampler_AdaptiveRaySampler_args:
|
||||
n_rays_per_image_sampled_from_mask: 850
|
||||
|
||||
@@ -2,17 +2,16 @@ defaults:
|
||||
- repro_singleseq_wce_base.yaml
|
||||
- repro_feat_extractor_transformer.yaml
|
||||
- _self_
|
||||
model_factory_ImplicitronModelFactory_args:
|
||||
model_GenericModel_args:
|
||||
chunk_size_grid: 16000
|
||||
view_pooler_enabled: true
|
||||
implicit_function_class_type: NeRFormerImplicitFunction
|
||||
raysampler_AdaptiveRaySampler_args:
|
||||
n_rays_per_image_sampled_from_mask: 800
|
||||
n_pts_per_ray_training: 32
|
||||
n_pts_per_ray_evaluation: 32
|
||||
renderer_MultiPassEmissionAbsorptionRenderer_args:
|
||||
n_pts_per_ray_fine_training: 16
|
||||
n_pts_per_ray_fine_evaluation: 16
|
||||
view_pooler_args:
|
||||
feature_aggregator_class_type: IdentityFeatureAggregator
|
||||
generic_model_args:
|
||||
chunk_size_grid: 16000
|
||||
view_pooler_enabled: true
|
||||
implicit_function_class_type: NeRFormerImplicitFunction
|
||||
raysampler_AdaptiveRaySampler_args:
|
||||
n_rays_per_image_sampled_from_mask: 800
|
||||
n_pts_per_ray_training: 32
|
||||
n_pts_per_ray_evaluation: 32
|
||||
renderer_MultiPassEmissionAbsorptionRenderer_args:
|
||||
n_pts_per_ray_fine_training: 16
|
||||
n_pts_per_ray_fine_evaluation: 16
|
||||
view_pooler_args:
|
||||
feature_aggregator_class_type: IdentityFeatureAggregator
|
||||
|
||||
@@ -1,29 +1,28 @@
|
||||
defaults:
|
||||
- repro_singleseq_base.yaml
|
||||
- _self_
|
||||
model_factory_ImplicitronModelFactory_args:
|
||||
model_GenericModel_args:
|
||||
num_passes: 1
|
||||
chunk_size_grid: 32000
|
||||
view_pooler_enabled: false
|
||||
loss_weights:
|
||||
loss_rgb_mse: 200.0
|
||||
loss_prev_stage_rgb_mse: 0.0
|
||||
loss_mask_bce: 1.0
|
||||
loss_prev_stage_mask_bce: 0.0
|
||||
loss_autodecoder_norm: 0.0
|
||||
depth_neg_penalty: 10000.0
|
||||
raysampler_class_type: NearFarRaySampler
|
||||
raysampler_NearFarRaySampler_args:
|
||||
n_rays_per_image_sampled_from_mask: 2048
|
||||
min_depth: 0.05
|
||||
max_depth: 0.05
|
||||
n_pts_per_ray_training: 1
|
||||
n_pts_per_ray_evaluation: 1
|
||||
stratified_point_sampling_training: false
|
||||
stratified_point_sampling_evaluation: false
|
||||
renderer_class_type: LSTMRenderer
|
||||
implicit_function_class_type: SRNImplicitFunction
|
||||
optimizer_factory_ImplicitronOptimizerFactory_args:
|
||||
breed: Adam
|
||||
generic_model_args:
|
||||
num_passes: 1
|
||||
chunk_size_grid: 32000
|
||||
view_pooler_enabled: false
|
||||
loss_weights:
|
||||
loss_rgb_mse: 200.0
|
||||
loss_prev_stage_rgb_mse: 0.0
|
||||
loss_mask_bce: 1.0
|
||||
loss_prev_stage_mask_bce: 0.0
|
||||
loss_autodecoder_norm: 0.0
|
||||
depth_neg_penalty: 10000.0
|
||||
raysampler_class_type: NearFarRaySampler
|
||||
raysampler_NearFarRaySampler_args:
|
||||
n_rays_per_image_sampled_from_mask: 2048
|
||||
min_depth: 0.05
|
||||
max_depth: 0.05
|
||||
n_pts_per_ray_training: 1
|
||||
n_pts_per_ray_evaluation: 1
|
||||
stratified_point_sampling_training: false
|
||||
stratified_point_sampling_evaluation: false
|
||||
renderer_class_type: LSTMRenderer
|
||||
implicit_function_class_type: SRNImplicitFunction
|
||||
solver_args:
|
||||
breed: adam
|
||||
lr: 5.0e-05
|
||||
|
||||
@@ -1,11 +1,10 @@
|
||||
defaults:
|
||||
- repro_singleseq_srn.yaml
|
||||
- _self_
|
||||
model_factory_ImplicitronModelFactory_args:
|
||||
model_GenericModel_args:
|
||||
num_passes: 1
|
||||
implicit_function_SRNImplicitFunction_args:
|
||||
pixel_generator_args:
|
||||
n_harmonic_functions: 0
|
||||
raymarch_function_args:
|
||||
n_harmonic_functions: 0
|
||||
generic_model_args:
|
||||
num_passes: 1
|
||||
implicit_function_SRNImplicitFunction_args:
|
||||
pixel_generator_args:
|
||||
n_harmonic_functions: 0
|
||||
raymarch_function_args:
|
||||
n_harmonic_functions: 0
|
||||
|
||||
@@ -2,29 +2,28 @@ defaults:
|
||||
- repro_singleseq_wce_base
|
||||
- repro_feat_extractor_normed.yaml
|
||||
- _self_
|
||||
model_factory_ImplicitronModelFactory_args:
|
||||
model_GenericModel_args:
|
||||
num_passes: 1
|
||||
chunk_size_grid: 32000
|
||||
view_pooler_enabled: true
|
||||
loss_weights:
|
||||
loss_rgb_mse: 200.0
|
||||
loss_prev_stage_rgb_mse: 0.0
|
||||
loss_mask_bce: 1.0
|
||||
loss_prev_stage_mask_bce: 0.0
|
||||
loss_autodecoder_norm: 0.0
|
||||
depth_neg_penalty: 10000.0
|
||||
raysampler_class_type: NearFarRaySampler
|
||||
raysampler_NearFarRaySampler_args:
|
||||
n_rays_per_image_sampled_from_mask: 2048
|
||||
min_depth: 0.05
|
||||
max_depth: 0.05
|
||||
n_pts_per_ray_training: 1
|
||||
n_pts_per_ray_evaluation: 1
|
||||
stratified_point_sampling_training: false
|
||||
stratified_point_sampling_evaluation: false
|
||||
renderer_class_type: LSTMRenderer
|
||||
implicit_function_class_type: SRNImplicitFunction
|
||||
optimizer_factory_ImplicitronOptimizerFactory_args:
|
||||
breed: Adam
|
||||
generic_model_args:
|
||||
num_passes: 1
|
||||
chunk_size_grid: 32000
|
||||
view_pooler_enabled: true
|
||||
loss_weights:
|
||||
loss_rgb_mse: 200.0
|
||||
loss_prev_stage_rgb_mse: 0.0
|
||||
loss_mask_bce: 1.0
|
||||
loss_prev_stage_mask_bce: 0.0
|
||||
loss_autodecoder_norm: 0.0
|
||||
depth_neg_penalty: 10000.0
|
||||
raysampler_class_type: NearFarRaySampler
|
||||
raysampler_NearFarRaySampler_args:
|
||||
n_rays_per_image_sampled_from_mask: 2048
|
||||
min_depth: 0.05
|
||||
max_depth: 0.05
|
||||
n_pts_per_ray_training: 1
|
||||
n_pts_per_ray_evaluation: 1
|
||||
stratified_point_sampling_training: false
|
||||
stratified_point_sampling_evaluation: false
|
||||
renderer_class_type: LSTMRenderer
|
||||
implicit_function_class_type: SRNImplicitFunction
|
||||
solver_args:
|
||||
breed: adam
|
||||
lr: 5.0e-05
|
||||
|
||||
@@ -1,11 +1,10 @@
|
||||
defaults:
|
||||
- repro_singleseq_srn_wce.yaml
|
||||
- _self_
|
||||
model_factory_ImplicitronModelFactory_args:
|
||||
model_GenericModel_args:
|
||||
num_passes: 1
|
||||
implicit_function_SRNImplicitFunction_args:
|
||||
pixel_generator_args:
|
||||
n_harmonic_functions: 0
|
||||
raymarch_function_args:
|
||||
n_harmonic_functions: 0
|
||||
generic_model_args:
|
||||
num_passes: 1
|
||||
implicit_function_SRNImplicitFunction_args:
|
||||
pixel_generator_args:
|
||||
n_harmonic_functions: 0
|
||||
raymarch_function_args:
|
||||
n_harmonic_functions: 0
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
defaults:
|
||||
- repro_singleseq_base
|
||||
- _self_
|
||||
data_source_ImplicitronDataSource_args:
|
||||
data_source_args:
|
||||
data_loader_map_provider_SequenceDataLoaderMapProvider_args:
|
||||
batch_size: 10
|
||||
dataset_length_train: 1000
|
||||
|
||||
@@ -8,28 +8,27 @@
|
||||
""""
|
||||
This file is the entry point for launching experiments with Implicitron.
|
||||
|
||||
Main functions
|
||||
---------------
|
||||
- `run_training` is the wrapper for the train, val, test loops
|
||||
and checkpointing
|
||||
- `trainvalidate` is the inner loop which runs the model forward/backward
|
||||
pass, visualizations and metric printing
|
||||
|
||||
Launch Training
|
||||
---------------
|
||||
Experiment config .yaml files are located in the
|
||||
`projects/implicitron_trainer/configs` folder. To launch an experiment,
|
||||
specify the name of the file. Specific config values can also be overridden
|
||||
from the command line, for example:
|
||||
`projects/implicitron_trainer/configs` folder. To launch
|
||||
an experiment, specify the name of the file. Specific config values can
|
||||
also be overridden from the command line, for example:
|
||||
|
||||
```
|
||||
./experiment.py --config-name base_config.yaml override.param.one=42 override.param.two=84
|
||||
```
|
||||
|
||||
To run an experiment on a specific GPU, specify the `gpu_idx` key in the
|
||||
config file / CLI. To run on a different device, specify the device in
|
||||
`run_training`.
|
||||
|
||||
Main functions
|
||||
---------------
|
||||
- The Experiment class defines `run` which creates the model, optimizer, and other
|
||||
objects used in training, then starts TrainingLoop's `run` function.
|
||||
- TrainingLoop takes care of the actual training logic: forward and backward passes,
|
||||
evaluation and testing, as well as model checkpointing, visualization, and metric
|
||||
printing.
|
||||
To run an experiment on a specific GPU, specify the `gpu_idx` key
|
||||
in the config file / CLI. To run on a different device, specify the
|
||||
device in `run_training`.
|
||||
|
||||
Outputs
|
||||
--------
|
||||
@@ -46,40 +45,43 @@ The outputs of the experiment are saved and logged in multiple ways:
|
||||
config file.
|
||||
|
||||
"""
|
||||
import copy
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import random
|
||||
import time
|
||||
import warnings
|
||||
|
||||
from dataclasses import field
|
||||
from typing import Any, Dict, Optional, Tuple
|
||||
|
||||
import hydra
|
||||
|
||||
import lpips
|
||||
import numpy as np
|
||||
import torch
|
||||
import tqdm
|
||||
from accelerate import Accelerator
|
||||
from omegaconf import DictConfig, OmegaConf
|
||||
from packaging import version
|
||||
|
||||
from pytorch3d.implicitron.dataset.data_source import (
|
||||
DataSourceBase,
|
||||
ImplicitronDataSource,
|
||||
)
|
||||
from pytorch3d.implicitron.models.generic_model import ImplicitronModelBase
|
||||
|
||||
from pytorch3d.implicitron.dataset import utils as ds_utils
|
||||
from pytorch3d.implicitron.dataset.data_loader_map_provider import DataLoaderMap
|
||||
from pytorch3d.implicitron.dataset.data_source import ImplicitronDataSource, Task
|
||||
from pytorch3d.implicitron.dataset.dataset_map_provider import DatasetMap
|
||||
from pytorch3d.implicitron.evaluation import evaluate_new_view_synthesis as evaluate
|
||||
from pytorch3d.implicitron.models.generic_model import EvaluationMode, GenericModel
|
||||
from pytorch3d.implicitron.models.renderer.multipass_ea import (
|
||||
MultiPassEmissionAbsorptionRenderer,
|
||||
)
|
||||
from pytorch3d.implicitron.models.renderer.ray_sampler import AdaptiveRaySampler
|
||||
from pytorch3d.implicitron.tools import model_io, vis_utils
|
||||
from pytorch3d.implicitron.tools.config import (
|
||||
Configurable,
|
||||
expand_args_fields,
|
||||
remove_unused_components,
|
||||
run_auto_creation,
|
||||
)
|
||||
from pytorch3d.implicitron.tools.stats import Stats
|
||||
from pytorch3d.renderer.cameras import CamerasBase
|
||||
|
||||
from .impl.model_factory import ModelFactoryBase
|
||||
from .impl.optimizer_factory import OptimizerFactoryBase
|
||||
from .impl.training_loop import TrainingLoopBase
|
||||
from .impl.utils import seed_all_random_engines
|
||||
from .impl.experiment_config import ExperimentConfig
|
||||
from .impl.optimization import init_optimizer
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -98,132 +100,551 @@ except ModuleNotFoundError:
|
||||
no_accelerate = os.environ.get("PYTORCH3D_NO_ACCELERATE") is not None
|
||||
|
||||
|
||||
class Experiment(Configurable): # pyre-ignore: 13
|
||||
def init_model(
|
||||
*,
|
||||
cfg: DictConfig,
|
||||
accelerator: Optional[Accelerator] = None,
|
||||
force_load: bool = False,
|
||||
clear_stats: bool = False,
|
||||
load_model_only: bool = False,
|
||||
) -> Tuple[GenericModel, Stats, Optional[Dict[str, Any]]]:
|
||||
"""
|
||||
This class is at the top level of Implicitron's config hierarchy. Its
|
||||
members are high-level components necessary for training an implicit rende-
|
||||
ring network.
|
||||
Returns an instance of `GenericModel`.
|
||||
|
||||
Members:
|
||||
data_source: An object that produces datasets and dataloaders.
|
||||
model_factory: An object that produces an implicit rendering model as
|
||||
well as its corresponding Stats object.
|
||||
optimizer_factory: An object that produces the optimizer and lr
|
||||
scheduler.
|
||||
training_loop: An object that runs training given the outputs produced
|
||||
by the data_source, model_factory and optimizer_factory.
|
||||
seed: A random seed to ensure reproducibility.
|
||||
detect_anomaly: Whether torch.autograd should detect anomalies. Useful
|
||||
for debugging, but might slow down the training.
|
||||
exp_dir: Root experimentation directory. Checkpoints and training stats
|
||||
will be saved here.
|
||||
If `cfg.resume` is set or `force_load` is true,
|
||||
attempts to load the last checkpoint from `cfg.exp_dir`. Failure to do so
|
||||
will return the model with initial weights, unless `force_load` is passed,
|
||||
in which case a FileNotFoundError is raised.
|
||||
|
||||
Args:
|
||||
force_load: If true, force load model from checkpoint even if
|
||||
cfg.resume is false.
|
||||
clear_stats: If true, clear the stats object loaded from checkpoint
|
||||
load_model_only: If true, load only the model weights from checkpoint
|
||||
and do not load the state of the optimizer and stats.
|
||||
|
||||
Returns:
|
||||
model: The model with optionally loaded weights from checkpoint
|
||||
stats: The stats structure (optionally loaded from checkpoint)
|
||||
optimizer_state: The optimizer state dict containing
|
||||
`state` and `param_groups` keys (optionally loaded from checkpoint)
|
||||
|
||||
Raise:
|
||||
FileNotFoundError if `force_load` is passed but checkpoint is not found.
|
||||
"""
|
||||
|
||||
data_source: DataSourceBase
|
||||
data_source_class_type: str = "ImplicitronDataSource"
|
||||
model_factory: ModelFactoryBase
|
||||
model_factory_class_type: str = "ImplicitronModelFactory"
|
||||
optimizer_factory: OptimizerFactoryBase
|
||||
optimizer_factory_class_type: str = "ImplicitronOptimizerFactory"
|
||||
training_loop: TrainingLoopBase
|
||||
training_loop_class_type: str = "ImplicitronTrainingLoop"
|
||||
# Initialize the model
|
||||
if cfg.architecture == "generic":
|
||||
model = GenericModel(**cfg.generic_model_args)
|
||||
else:
|
||||
raise ValueError(f"No such arch {cfg.architecture}.")
|
||||
|
||||
seed: int = 42
|
||||
detect_anomaly: bool = False
|
||||
exp_dir: str = "./data/default_experiment/"
|
||||
# Determine the network outputs that should be logged
|
||||
if hasattr(model, "log_vars"):
|
||||
log_vars = copy.deepcopy(list(model.log_vars))
|
||||
else:
|
||||
log_vars = ["objective"]
|
||||
|
||||
hydra: dict = field(
|
||||
default_factory=lambda: {
|
||||
"run": {"dir": "."}, # Make hydra not change the working dir.
|
||||
"output_subdir": None, # disable storing the .hydra logs
|
||||
}
|
||||
visdom_env_charts = vis_utils.get_visdom_env(cfg) + "_charts"
|
||||
|
||||
# Init the stats struct
|
||||
stats = Stats(
|
||||
log_vars,
|
||||
visdom_env=visdom_env_charts,
|
||||
verbose=False,
|
||||
visdom_server=cfg.visdom_server,
|
||||
visdom_port=cfg.visdom_port,
|
||||
)
|
||||
|
||||
def __post_init__(self):
|
||||
seed_all_random_engines(
|
||||
self.seed
|
||||
) # Set all random engine seeds for reproducibility
|
||||
# Retrieve the last checkpoint
|
||||
if cfg.resume_epoch > 0:
|
||||
model_path = model_io.get_checkpoint(cfg.exp_dir, cfg.resume_epoch)
|
||||
else:
|
||||
model_path = model_io.find_last_checkpoint(cfg.exp_dir)
|
||||
|
||||
run_auto_creation(self)
|
||||
optimizer_state = None
|
||||
if model_path is not None:
|
||||
logger.info("found previous model %s" % model_path)
|
||||
if force_load or cfg.resume:
|
||||
logger.info(" -> resuming")
|
||||
|
||||
def run(self) -> None:
|
||||
# Initialize the accelerator if desired.
|
||||
if no_accelerate:
|
||||
accelerator = None
|
||||
device = torch.device("cuda:0")
|
||||
map_location = None
|
||||
if accelerator is not None and not accelerator.is_local_main_process:
|
||||
map_location = {
|
||||
"cuda:%d" % 0: "cuda:%d" % accelerator.local_process_index
|
||||
}
|
||||
if load_model_only:
|
||||
model_state_dict = torch.load(
|
||||
model_io.get_model_path(model_path), map_location=map_location
|
||||
)
|
||||
stats_load, optimizer_state = None, None
|
||||
else:
|
||||
model_state_dict, stats_load, optimizer_state = model_io.load_model(
|
||||
model_path, map_location=map_location
|
||||
)
|
||||
|
||||
# Determine if stats should be reset
|
||||
if not clear_stats:
|
||||
if stats_load is None:
|
||||
logger.info("\n\n\n\nCORRUPT STATS -> clearing stats\n\n\n\n")
|
||||
last_epoch = model_io.parse_epoch_from_model_path(model_path)
|
||||
logger.info(f"Estimated resume epoch = {last_epoch}")
|
||||
|
||||
# Reset the stats struct
|
||||
for _ in range(last_epoch + 1):
|
||||
stats.new_epoch()
|
||||
assert last_epoch == stats.epoch
|
||||
else:
|
||||
stats = stats_load
|
||||
|
||||
# Update stats properties incase it was reset on load
|
||||
stats.visdom_env = visdom_env_charts
|
||||
stats.visdom_server = cfg.visdom_server
|
||||
stats.visdom_port = cfg.visdom_port
|
||||
stats.plot_file = os.path.join(cfg.exp_dir, "train_stats.pdf")
|
||||
stats.synchronize_logged_vars(log_vars)
|
||||
else:
|
||||
logger.info(" -> clearing stats")
|
||||
|
||||
try:
|
||||
# TODO: fix on creation of the buffers
|
||||
# after the hack above, this will not pass in most cases
|
||||
# ... but this is fine for now
|
||||
model.load_state_dict(model_state_dict, strict=True)
|
||||
except RuntimeError as e:
|
||||
logger.error(e)
|
||||
logger.info("Cant load state dict in strict mode! -> trying non-strict")
|
||||
model.load_state_dict(model_state_dict, strict=False)
|
||||
model.log_vars = log_vars
|
||||
else:
|
||||
accelerator = Accelerator(device_placement=False)
|
||||
logger.info(accelerator.state)
|
||||
device = accelerator.device
|
||||
logger.info(" -> but not resuming -> starting from scratch")
|
||||
elif force_load:
|
||||
raise FileNotFoundError(f"Cannot find a checkpoint in {cfg.exp_dir}!")
|
||||
|
||||
logger.info(f"Running experiment on device: {device}")
|
||||
os.makedirs(self.exp_dir, exist_ok=True)
|
||||
return model, stats, optimizer_state
|
||||
|
||||
# set the debug mode
|
||||
if self.detect_anomaly:
|
||||
logger.info("Anomaly detection!")
|
||||
torch.autograd.set_detect_anomaly(self.detect_anomaly)
|
||||
|
||||
# Initialize the datasets and dataloaders.
|
||||
datasets, dataloaders = self.data_source.get_datasets_and_dataloaders()
|
||||
def trainvalidate(
|
||||
model,
|
||||
stats,
|
||||
epoch,
|
||||
loader,
|
||||
optimizer,
|
||||
validation: bool,
|
||||
*,
|
||||
accelerator: Optional[Accelerator],
|
||||
device: torch.device,
|
||||
bp_var: str = "objective",
|
||||
metric_print_interval: int = 5,
|
||||
visualize_interval: int = 100,
|
||||
visdom_env_root: str = "trainvalidate",
|
||||
clip_grad: float = 0.0,
|
||||
**kwargs,
|
||||
) -> None:
|
||||
"""
|
||||
This is the main loop for training and evaluation including:
|
||||
model forward pass, loss computation, backward pass and visualization.
|
||||
|
||||
# Init the model and the corresponding Stats object.
|
||||
model = self.model_factory(
|
||||
accelerator=accelerator,
|
||||
exp_dir=self.exp_dir,
|
||||
)
|
||||
Args:
|
||||
model: The model module optionally loaded from checkpoint
|
||||
stats: The stats struct, also optionally loaded from checkpoint
|
||||
epoch: The index of the current epoch
|
||||
loader: The dataloader to use for the loop
|
||||
optimizer: The optimizer module optionally loaded from checkpoint
|
||||
validation: If true, run the loop with the model in eval mode
|
||||
and skip the backward pass
|
||||
bp_var: The name of the key in the model output `preds` dict which
|
||||
should be used as the loss for the backward pass.
|
||||
metric_print_interval: The batch interval at which the stats should be
|
||||
logged.
|
||||
visualize_interval: The batch interval at which the visualizations
|
||||
should be plotted
|
||||
visdom_env_root: The name of the visdom environment to use for plotting
|
||||
clip_grad: Optionally clip the gradient norms.
|
||||
If set to a value <=0.0, no clipping
|
||||
device: The device on which to run the model.
|
||||
|
||||
stats = self.training_loop.load_stats(
|
||||
log_vars=model.log_vars,
|
||||
exp_dir=self.exp_dir,
|
||||
resume=self.model_factory.resume,
|
||||
resume_epoch=self.model_factory.resume_epoch, # pyre-ignore [16]
|
||||
)
|
||||
start_epoch = stats.epoch + 1
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
|
||||
model.to(device)
|
||||
if validation:
|
||||
model.eval()
|
||||
trainmode = "val"
|
||||
else:
|
||||
model.train()
|
||||
trainmode = "train"
|
||||
|
||||
# Init the optimizer and LR scheduler.
|
||||
optimizer, scheduler = self.optimizer_factory(
|
||||
accelerator=accelerator,
|
||||
exp_dir=self.exp_dir,
|
||||
last_epoch=start_epoch,
|
||||
model=model,
|
||||
resume=self.model_factory.resume,
|
||||
resume_epoch=self.model_factory.resume_epoch,
|
||||
)
|
||||
t_start = time.time()
|
||||
|
||||
# Wrap all modules in the distributed library
|
||||
# Note: we don't pass the scheduler to prepare as it
|
||||
# doesn't need to be stepped at each optimizer step
|
||||
train_loader = dataloaders.train
|
||||
val_loader = dataloaders.val
|
||||
test_loader = dataloaders.test
|
||||
if accelerator is not None:
|
||||
(
|
||||
model,
|
||||
optimizer,
|
||||
train_loader,
|
||||
val_loader,
|
||||
) = accelerator.prepare(model, optimizer, train_loader, val_loader)
|
||||
# get the visdom env name
|
||||
visdom_env_imgs = visdom_env_root + "_images_" + trainmode
|
||||
viz = vis_utils.get_visdom_connection(
|
||||
server=stats.visdom_server,
|
||||
port=stats.visdom_port,
|
||||
)
|
||||
|
||||
all_train_cameras = self.data_source.all_train_cameras
|
||||
# Iterate through the batches
|
||||
n_batches = len(loader)
|
||||
for it, net_input in enumerate(loader):
|
||||
last_iter = it == n_batches - 1
|
||||
|
||||
# Enter the main training loop.
|
||||
self.training_loop.run(
|
||||
train_loader=train_loader,
|
||||
val_loader=val_loader,
|
||||
test_loader=test_loader,
|
||||
model=model,
|
||||
optimizer=optimizer,
|
||||
scheduler=scheduler,
|
||||
all_train_cameras=all_train_cameras,
|
||||
accelerator=accelerator,
|
||||
# move to gpu where possible (in place)
|
||||
net_input = net_input.to(device)
|
||||
|
||||
# run the forward pass
|
||||
if not validation:
|
||||
optimizer.zero_grad()
|
||||
preds = model(**{**net_input, "evaluation_mode": EvaluationMode.TRAINING})
|
||||
else:
|
||||
with torch.no_grad():
|
||||
preds = model(
|
||||
**{**net_input, "evaluation_mode": EvaluationMode.EVALUATION}
|
||||
)
|
||||
|
||||
# make sure we dont overwrite something
|
||||
assert all(k not in preds for k in net_input.keys())
|
||||
# merge everything into one big dict
|
||||
preds.update(net_input)
|
||||
|
||||
# update the stats logger
|
||||
stats.update(preds, time_start=t_start, stat_set=trainmode)
|
||||
assert stats.it[trainmode] == it, "inconsistent stat iteration number!"
|
||||
|
||||
# print textual status update
|
||||
if it % metric_print_interval == 0 or last_iter:
|
||||
stats.print(stat_set=trainmode, max_it=n_batches)
|
||||
|
||||
# visualize results
|
||||
if (
|
||||
(accelerator is None or accelerator.is_local_main_process)
|
||||
and visualize_interval > 0
|
||||
and it % visualize_interval == 0
|
||||
):
|
||||
prefix = f"e{stats.epoch}_it{stats.it[trainmode]}"
|
||||
|
||||
model.visualize(
|
||||
viz,
|
||||
visdom_env_imgs,
|
||||
preds,
|
||||
prefix,
|
||||
)
|
||||
|
||||
# optimizer step
|
||||
if not validation:
|
||||
loss = preds[bp_var]
|
||||
assert torch.isfinite(loss).all(), "Non-finite loss!"
|
||||
# backprop
|
||||
if accelerator is None:
|
||||
loss.backward()
|
||||
else:
|
||||
accelerator.backward(loss)
|
||||
if clip_grad > 0.0:
|
||||
# Optionally clip the gradient norms.
|
||||
total_norm = torch.nn.utils.clip_grad_norm(
|
||||
model.parameters(), clip_grad
|
||||
)
|
||||
if total_norm > clip_grad:
|
||||
logger.info(
|
||||
f"Clipping gradient: {total_norm}"
|
||||
+ f" with coef {clip_grad / float(total_norm)}."
|
||||
)
|
||||
|
||||
optimizer.step()
|
||||
|
||||
|
||||
def run_training(cfg: DictConfig) -> None:
|
||||
"""
|
||||
Entry point to run the training and validation loops
|
||||
based on the specified config file.
|
||||
"""
|
||||
|
||||
# Initialize the accelerator
|
||||
if no_accelerate:
|
||||
accelerator = None
|
||||
device = torch.device("cuda:0")
|
||||
else:
|
||||
accelerator = Accelerator(device_placement=False)
|
||||
logger.info(accelerator.state)
|
||||
device = accelerator.device
|
||||
|
||||
logger.info(f"Running experiment on device: {device}")
|
||||
|
||||
# set the debug mode
|
||||
if cfg.detect_anomaly:
|
||||
logger.info("Anomaly detection!")
|
||||
torch.autograd.set_detect_anomaly(cfg.detect_anomaly)
|
||||
|
||||
# create the output folder
|
||||
os.makedirs(cfg.exp_dir, exist_ok=True)
|
||||
_seed_all_random_engines(cfg.seed)
|
||||
remove_unused_components(cfg)
|
||||
|
||||
# dump the exp config to the exp dir
|
||||
try:
|
||||
cfg_filename = os.path.join(cfg.exp_dir, "expconfig.yaml")
|
||||
OmegaConf.save(config=cfg, f=cfg_filename)
|
||||
except PermissionError:
|
||||
warnings.warn("Cant dump config due to insufficient permissions!")
|
||||
|
||||
# setup datasets
|
||||
datasource = ImplicitronDataSource(**cfg.data_source_args)
|
||||
datasets, dataloaders = datasource.get_datasets_and_dataloaders()
|
||||
task = datasource.get_task()
|
||||
|
||||
# init the model
|
||||
model, stats, optimizer_state = init_model(cfg=cfg, accelerator=accelerator)
|
||||
start_epoch = stats.epoch + 1
|
||||
|
||||
# move model to gpu
|
||||
model.to(device)
|
||||
|
||||
# only run evaluation on the test dataloader
|
||||
if cfg.eval_only:
|
||||
_eval_and_dump(
|
||||
cfg,
|
||||
task,
|
||||
datasource.all_train_cameras,
|
||||
datasets,
|
||||
dataloaders,
|
||||
model,
|
||||
stats,
|
||||
device=device,
|
||||
exp_dir=self.exp_dir,
|
||||
stats=stats,
|
||||
seed=self.seed,
|
||||
)
|
||||
return
|
||||
|
||||
# init the optimizer
|
||||
optimizer, scheduler = init_optimizer(
|
||||
model,
|
||||
optimizer_state=optimizer_state,
|
||||
last_epoch=start_epoch,
|
||||
**cfg.solver_args,
|
||||
)
|
||||
|
||||
# check the scheduler and stats have been initialized correctly
|
||||
assert scheduler.last_epoch == stats.epoch + 1
|
||||
assert scheduler.last_epoch == start_epoch
|
||||
|
||||
# Wrap all modules in the distributed library
|
||||
# Note: we don't pass the scheduler to prepare as it
|
||||
# doesn't need to be stepped at each optimizer step
|
||||
train_loader = dataloaders.train
|
||||
val_loader = dataloaders.val
|
||||
if accelerator is not None:
|
||||
(
|
||||
model,
|
||||
optimizer,
|
||||
train_loader,
|
||||
val_loader,
|
||||
) = accelerator.prepare(model, optimizer, train_loader, val_loader)
|
||||
|
||||
past_scheduler_lrs = []
|
||||
# loop through epochs
|
||||
for epoch in range(start_epoch, cfg.solver_args.max_epochs):
|
||||
# automatic new_epoch and plotting of stats at every epoch start
|
||||
with stats:
|
||||
|
||||
# Make sure to re-seed random generators to ensure reproducibility
|
||||
# even after restart.
|
||||
_seed_all_random_engines(cfg.seed + epoch)
|
||||
|
||||
cur_lr = float(scheduler.get_last_lr()[-1])
|
||||
logger.info(f"scheduler lr = {cur_lr:1.2e}")
|
||||
past_scheduler_lrs.append(cur_lr)
|
||||
|
||||
# train loop
|
||||
trainvalidate(
|
||||
model,
|
||||
stats,
|
||||
epoch,
|
||||
train_loader,
|
||||
optimizer,
|
||||
False,
|
||||
visdom_env_root=vis_utils.get_visdom_env(cfg),
|
||||
device=device,
|
||||
accelerator=accelerator,
|
||||
**cfg,
|
||||
)
|
||||
|
||||
# val loop (optional)
|
||||
if val_loader is not None and epoch % cfg.validation_interval == 0:
|
||||
trainvalidate(
|
||||
model,
|
||||
stats,
|
||||
epoch,
|
||||
val_loader,
|
||||
optimizer,
|
||||
True,
|
||||
visdom_env_root=vis_utils.get_visdom_env(cfg),
|
||||
device=device,
|
||||
accelerator=accelerator,
|
||||
**cfg,
|
||||
)
|
||||
|
||||
# eval loop (optional)
|
||||
if (
|
||||
dataloaders.test is not None
|
||||
and cfg.test_interval > 0
|
||||
and epoch % cfg.test_interval == 0
|
||||
):
|
||||
_run_eval(
|
||||
model,
|
||||
datasource.all_train_cameras,
|
||||
dataloaders.test,
|
||||
task,
|
||||
camera_difficulty_bin_breaks=cfg.camera_difficulty_bin_breaks,
|
||||
device=device,
|
||||
)
|
||||
|
||||
assert stats.epoch == epoch, "inconsistent stats!"
|
||||
|
||||
# delete previous models if required
|
||||
# save model only on the main process
|
||||
if cfg.store_checkpoints and (
|
||||
accelerator is None or accelerator.is_local_main_process
|
||||
):
|
||||
if cfg.store_checkpoints_purge > 0:
|
||||
for prev_epoch in range(epoch - cfg.store_checkpoints_purge):
|
||||
model_io.purge_epoch(cfg.exp_dir, prev_epoch)
|
||||
outfile = model_io.get_checkpoint(cfg.exp_dir, epoch)
|
||||
unwrapped_model = (
|
||||
model if accelerator is None else accelerator.unwrap_model(model)
|
||||
)
|
||||
model_io.safe_save_model(
|
||||
unwrapped_model, stats, outfile, optimizer=optimizer
|
||||
)
|
||||
|
||||
scheduler.step()
|
||||
|
||||
new_lr = float(scheduler.get_last_lr()[-1])
|
||||
if new_lr != cur_lr:
|
||||
logger.info(f"LR change! {cur_lr} -> {new_lr}")
|
||||
|
||||
if cfg.test_when_finished:
|
||||
_eval_and_dump(
|
||||
cfg,
|
||||
task,
|
||||
datasource.all_train_cameras,
|
||||
datasets,
|
||||
dataloaders,
|
||||
model,
|
||||
stats,
|
||||
device=device,
|
||||
)
|
||||
|
||||
|
||||
def _eval_and_dump(
|
||||
cfg,
|
||||
task: Task,
|
||||
all_train_cameras: Optional[CamerasBase],
|
||||
datasets: DatasetMap,
|
||||
dataloaders: DataLoaderMap,
|
||||
model,
|
||||
stats,
|
||||
device,
|
||||
) -> None:
|
||||
"""
|
||||
Run the evaluation loop with the test data loader and
|
||||
save the predictions to the `exp_dir`.
|
||||
"""
|
||||
|
||||
dataloader = dataloaders.test
|
||||
|
||||
if dataloader is None:
|
||||
raise ValueError('DataLoaderMap have to contain the "test" entry for eval!')
|
||||
|
||||
results = _run_eval(
|
||||
model,
|
||||
all_train_cameras,
|
||||
dataloader,
|
||||
task,
|
||||
camera_difficulty_bin_breaks=cfg.camera_difficulty_bin_breaks,
|
||||
device=device,
|
||||
)
|
||||
|
||||
# add the evaluation epoch to the results
|
||||
for r in results:
|
||||
r["eval_epoch"] = int(stats.epoch)
|
||||
|
||||
logger.info("Evaluation results")
|
||||
evaluate.pretty_print_nvs_metrics(results)
|
||||
|
||||
with open(os.path.join(cfg.exp_dir, "results_test.json"), "w") as f:
|
||||
json.dump(results, f)
|
||||
|
||||
|
||||
def _get_eval_frame_data(frame_data):
|
||||
"""
|
||||
Masks the unknown image data to make sure we cannot use it at model evaluation time.
|
||||
"""
|
||||
frame_data_for_eval = copy.deepcopy(frame_data)
|
||||
is_known = ds_utils.is_known_frame(frame_data.frame_type).type_as(
|
||||
frame_data.image_rgb
|
||||
)[:, None, None, None]
|
||||
for k in ("image_rgb", "depth_map", "fg_probability", "mask_crop"):
|
||||
value_masked = getattr(frame_data_for_eval, k).clone() * is_known
|
||||
setattr(frame_data_for_eval, k, value_masked)
|
||||
return frame_data_for_eval
|
||||
|
||||
|
||||
def _run_eval(
|
||||
model,
|
||||
all_train_cameras,
|
||||
loader,
|
||||
task: Task,
|
||||
camera_difficulty_bin_breaks: Tuple[float, float],
|
||||
device,
|
||||
):
|
||||
"""
|
||||
Run the evaluation loop on the test dataloader
|
||||
"""
|
||||
lpips_model = lpips.LPIPS(net="vgg")
|
||||
lpips_model = lpips_model.to(device)
|
||||
|
||||
model.eval()
|
||||
|
||||
per_batch_eval_results = []
|
||||
logger.info("Evaluating model ...")
|
||||
for frame_data in tqdm.tqdm(loader):
|
||||
frame_data = frame_data.to(device)
|
||||
|
||||
# mask out the unknown images so that the model does not see them
|
||||
frame_data_for_eval = _get_eval_frame_data(frame_data)
|
||||
|
||||
with torch.no_grad():
|
||||
preds = model(
|
||||
**{**frame_data_for_eval, "evaluation_mode": EvaluationMode.EVALUATION}
|
||||
)
|
||||
|
||||
# TODO: Cannot use accelerate gather for two reasons:.
|
||||
# (1) TypeError: Can't apply _gpu_gather_one on object of type
|
||||
# <class 'pytorch3d.implicitron.models.base_model.ImplicitronRender'>,
|
||||
# only of nested list/tuple/dicts of objects that satisfy is_torch_tensor.
|
||||
# (2) Same error above but for frame_data which contains Cameras.
|
||||
|
||||
implicitron_render = copy.deepcopy(preds["implicitron_render"])
|
||||
|
||||
per_batch_eval_results.append(
|
||||
evaluate.eval_batch(
|
||||
frame_data,
|
||||
implicitron_render,
|
||||
bg_color="black",
|
||||
lpips_model=lpips_model,
|
||||
source_cameras=all_train_cameras,
|
||||
)
|
||||
)
|
||||
|
||||
_, category_result = evaluate.summarize_nvs_eval_results(
|
||||
per_batch_eval_results, task, camera_difficulty_bin_breaks
|
||||
)
|
||||
|
||||
return category_result["results"]
|
||||
|
||||
|
||||
def _seed_all_random_engines(seed: int) -> None:
|
||||
np.random.seed(seed)
|
||||
torch.manual_seed(seed)
|
||||
random.seed(seed)
|
||||
|
||||
|
||||
def _setup_envvars_for_cluster() -> bool:
|
||||
@@ -257,20 +678,9 @@ def _setup_envvars_for_cluster() -> bool:
|
||||
return True
|
||||
|
||||
|
||||
def dump_cfg(cfg: DictConfig) -> None:
|
||||
remove_unused_components(cfg)
|
||||
# dump the exp config to the exp dir
|
||||
os.makedirs(cfg.exp_dir, exist_ok=True)
|
||||
try:
|
||||
cfg_filename = os.path.join(cfg.exp_dir, "expconfig.yaml")
|
||||
OmegaConf.save(config=cfg, f=cfg_filename)
|
||||
except PermissionError:
|
||||
warnings.warn("Can't dump config due to insufficient permissions!")
|
||||
|
||||
|
||||
expand_args_fields(Experiment)
|
||||
expand_args_fields(ExperimentConfig)
|
||||
cs = hydra.core.config_store.ConfigStore.instance()
|
||||
cs.store(name="default_config", node=Experiment)
|
||||
cs.store(name="default_config", node=ExperimentConfig)
|
||||
|
||||
|
||||
@hydra.main(config_path="./configs/", config_name="default_config")
|
||||
@@ -284,14 +694,12 @@ def experiment(cfg: DictConfig) -> None:
|
||||
logger.info("Running locally")
|
||||
|
||||
# TODO: The following may be needed for hydra/submitit it to work
|
||||
expand_args_fields(ImplicitronModelBase)
|
||||
expand_args_fields(GenericModel)
|
||||
expand_args_fields(AdaptiveRaySampler)
|
||||
expand_args_fields(MultiPassEmissionAbsorptionRenderer)
|
||||
expand_args_fields(ImplicitronDataSource)
|
||||
|
||||
experiment = Experiment(**cfg)
|
||||
dump_cfg(cfg)
|
||||
experiment.run()
|
||||
run_training(cfg)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
49
projects/implicitron_trainer/impl/experiment_config.py
Normal file
49
projects/implicitron_trainer/impl/experiment_config.py
Normal file
@@ -0,0 +1,49 @@
|
||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the BSD-style license found in the
|
||||
# LICENSE file in the root directory of this source tree.
|
||||
|
||||
from dataclasses import field
|
||||
from typing import Any, Dict, Tuple
|
||||
|
||||
from omegaconf import DictConfig
|
||||
from pytorch3d.implicitron.dataset.data_source import ImplicitronDataSource
|
||||
from pytorch3d.implicitron.models.generic_model import GenericModel
|
||||
from pytorch3d.implicitron.tools.config import Configurable, get_default_args_field
|
||||
|
||||
from .optimization import init_optimizer
|
||||
|
||||
|
||||
class ExperimentConfig(Configurable):
|
||||
generic_model_args: DictConfig = get_default_args_field(GenericModel)
|
||||
solver_args: DictConfig = get_default_args_field(init_optimizer)
|
||||
data_source_args: DictConfig = get_default_args_field(ImplicitronDataSource)
|
||||
architecture: str = "generic"
|
||||
detect_anomaly: bool = False
|
||||
eval_only: bool = False
|
||||
exp_dir: str = "./data/default_experiment/"
|
||||
exp_idx: int = 0
|
||||
gpu_idx: int = 0
|
||||
metric_print_interval: int = 5
|
||||
resume: bool = True
|
||||
resume_epoch: int = -1
|
||||
seed: int = 0
|
||||
store_checkpoints: bool = True
|
||||
store_checkpoints_purge: int = 1
|
||||
test_interval: int = -1
|
||||
test_when_finished: bool = False
|
||||
validation_interval: int = 1
|
||||
visdom_env: str = ""
|
||||
visdom_port: int = 8097
|
||||
visdom_server: str = "http://127.0.0.1"
|
||||
visualize_interval: int = 1000
|
||||
clip_grad: float = 0.0
|
||||
camera_difficulty_bin_breaks: Tuple[float, ...] = 0.97, 0.98
|
||||
|
||||
hydra: Dict[str, Any] = field(
|
||||
default_factory=lambda: {
|
||||
"run": {"dir": "."}, # Make hydra not change the working dir.
|
||||
"output_subdir": None, # disable storing the .hydra logs
|
||||
}
|
||||
)
|
||||
@@ -1,133 +0,0 @@
|
||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the BSD-style license found in the
|
||||
# LICENSE file in the root directory of this source tree.
|
||||
|
||||
import logging
|
||||
import os
|
||||
from typing import Optional
|
||||
|
||||
import torch.optim
|
||||
|
||||
from accelerate import Accelerator
|
||||
from pytorch3d.implicitron.models.base_model import ImplicitronModelBase
|
||||
from pytorch3d.implicitron.tools import model_io
|
||||
from pytorch3d.implicitron.tools.config import (
|
||||
registry,
|
||||
ReplaceableBase,
|
||||
run_auto_creation,
|
||||
)
|
||||
from pytorch3d.implicitron.tools.stats import Stats
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ModelFactoryBase(ReplaceableBase):
|
||||
|
||||
resume: bool = True # resume from the last checkpoint
|
||||
|
||||
def __call__(self, **kwargs) -> ImplicitronModelBase:
|
||||
"""
|
||||
Initialize the model (possibly from a previously saved state).
|
||||
|
||||
Returns: An instance of ImplicitronModelBase.
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def load_stats(self, **kwargs) -> Stats:
|
||||
"""
|
||||
Initialize or load a Stats object.
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
@registry.register
|
||||
class ImplicitronModelFactory(ModelFactoryBase): # pyre-ignore [13]
|
||||
"""
|
||||
A factory class that initializes an implicit rendering model.
|
||||
|
||||
Members:
|
||||
model: An ImplicitronModelBase object.
|
||||
resume: If True, attempt to load the last checkpoint from `exp_dir`
|
||||
passed to __call__. Failure to do so will return a model with ini-
|
||||
tial weights unless `force_resume` is True.
|
||||
resume_epoch: If `resume` is True: Resume a model at this epoch, or if
|
||||
`resume_epoch` <= 0, then resume from the latest checkpoint.
|
||||
force_resume: If True, throw a FileNotFoundError if `resume` is True but
|
||||
a model checkpoint cannot be found.
|
||||
|
||||
"""
|
||||
|
||||
model: ImplicitronModelBase
|
||||
model_class_type: str = "GenericModel"
|
||||
resume: bool = True
|
||||
resume_epoch: int = -1
|
||||
force_resume: bool = False
|
||||
|
||||
def __post_init__(self):
|
||||
run_auto_creation(self)
|
||||
|
||||
def __call__(
|
||||
self,
|
||||
exp_dir: str,
|
||||
accelerator: Optional[Accelerator] = None,
|
||||
) -> ImplicitronModelBase:
|
||||
"""
|
||||
Returns an instance of `ImplicitronModelBase`, possibly loaded from a
|
||||
checkpoint (if self.resume, self.resume_epoch specify so).
|
||||
|
||||
Args:
|
||||
exp_dir: Root experiment directory.
|
||||
accelerator: An Accelerator object.
|
||||
|
||||
Returns:
|
||||
model: The model with optionally loaded weights from checkpoint
|
||||
|
||||
Raise:
|
||||
FileNotFoundError if `force_resume` is True but checkpoint not found.
|
||||
"""
|
||||
# Determine the network outputs that should be logged
|
||||
if hasattr(self.model, "log_vars"):
|
||||
log_vars = list(self.model.log_vars)
|
||||
else:
|
||||
log_vars = ["objective"]
|
||||
|
||||
if self.resume_epoch > 0:
|
||||
# Resume from a certain epoch
|
||||
model_path = model_io.get_checkpoint(exp_dir, self.resume_epoch)
|
||||
if not os.path.isfile(model_path):
|
||||
raise ValueError(f"Cannot find model from epoch {self.resume_epoch}.")
|
||||
else:
|
||||
# Retrieve the last checkpoint
|
||||
model_path = model_io.find_last_checkpoint(exp_dir)
|
||||
|
||||
if model_path is not None:
|
||||
logger.info(f"Found previous model {model_path}")
|
||||
if self.force_resume or self.resume:
|
||||
logger.info("Resuming.")
|
||||
|
||||
map_location = None
|
||||
if accelerator is not None and not accelerator.is_local_main_process:
|
||||
map_location = {
|
||||
"cuda:%d" % 0: "cuda:%d" % accelerator.local_process_index
|
||||
}
|
||||
model_state_dict = torch.load(
|
||||
model_io.get_model_path(model_path), map_location=map_location
|
||||
)
|
||||
|
||||
try:
|
||||
self.model.load_state_dict(model_state_dict, strict=True)
|
||||
except RuntimeError as e:
|
||||
logger.error(e)
|
||||
logger.info(
|
||||
"Cannot load state dict in strict mode! -> trying non-strict"
|
||||
)
|
||||
self.model.load_state_dict(model_state_dict, strict=False)
|
||||
self.model.log_vars = log_vars
|
||||
else:
|
||||
logger.info("Not resuming -> starting from scratch.")
|
||||
elif self.force_resume:
|
||||
raise FileNotFoundError(f"Cannot find a checkpoint in {exp_dir}!")
|
||||
|
||||
return self.model
|
||||
109
projects/implicitron_trainer/impl/optimization.py
Normal file
109
projects/implicitron_trainer/impl/optimization.py
Normal file
@@ -0,0 +1,109 @@
|
||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the BSD-style license found in the
|
||||
# LICENSE file in the root directory of this source tree.
|
||||
|
||||
import logging
|
||||
from typing import Any, Dict, Optional, Tuple
|
||||
|
||||
import torch
|
||||
from pytorch3d.implicitron.models.generic_model import GenericModel
|
||||
from pytorch3d.implicitron.tools.config import enable_get_default_args
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def init_optimizer(
|
||||
model: GenericModel,
|
||||
optimizer_state: Optional[Dict[str, Any]],
|
||||
last_epoch: int,
|
||||
breed: str = "adam",
|
||||
weight_decay: float = 0.0,
|
||||
lr_policy: str = "multistep",
|
||||
lr: float = 0.0005,
|
||||
gamma: float = 0.1,
|
||||
momentum: float = 0.9,
|
||||
betas: Tuple[float, ...] = (0.9, 0.999),
|
||||
milestones: Tuple[int, ...] = (),
|
||||
max_epochs: int = 1000,
|
||||
):
|
||||
"""
|
||||
Initialize the optimizer (optionally from checkpoint state)
|
||||
and the learning rate scheduler.
|
||||
|
||||
Args:
|
||||
model: The model with optionally loaded weights
|
||||
optimizer_state: The state dict for the optimizer. If None
|
||||
it has not been loaded from checkpoint
|
||||
last_epoch: If the model was loaded from checkpoint this will be the
|
||||
number of the last epoch that was saved
|
||||
breed: The type of optimizer to use e.g. adam
|
||||
weight_decay: The optimizer weight_decay (L2 penalty on model weights)
|
||||
lr_policy: The policy to use for learning rate. Currently, only "multistep:
|
||||
is supported.
|
||||
lr: The value for the initial learning rate
|
||||
gamma: Multiplicative factor of learning rate decay
|
||||
momentum: Momentum factor for SGD optimizer
|
||||
betas: Coefficients used for computing running averages of gradient and its square
|
||||
in the Adam optimizer
|
||||
milestones: List of increasing epoch indices at which the learning rate is
|
||||
modified
|
||||
max_epochs: The maximum number of epochs to run the optimizer for
|
||||
|
||||
Returns:
|
||||
optimizer: Optimizer module, optionally loaded from checkpoint
|
||||
scheduler: Learning rate scheduler module
|
||||
|
||||
Raise:
|
||||
ValueError if `breed` or `lr_policy` are not supported.
|
||||
"""
|
||||
|
||||
# Get the parameters to optimize
|
||||
if hasattr(model, "_get_param_groups"): # use the model function
|
||||
# pyre-ignore[29]
|
||||
p_groups = model._get_param_groups(lr, wd=weight_decay)
|
||||
else:
|
||||
allprm = [prm for prm in model.parameters() if prm.requires_grad]
|
||||
p_groups = [{"params": allprm, "lr": lr}]
|
||||
|
||||
# Intialize the optimizer
|
||||
if breed == "sgd":
|
||||
optimizer = torch.optim.SGD(
|
||||
p_groups, lr=lr, momentum=momentum, weight_decay=weight_decay
|
||||
)
|
||||
elif breed == "adagrad":
|
||||
optimizer = torch.optim.Adagrad(p_groups, lr=lr, weight_decay=weight_decay)
|
||||
elif breed == "adam":
|
||||
optimizer = torch.optim.Adam(
|
||||
p_groups, lr=lr, betas=betas, weight_decay=weight_decay
|
||||
)
|
||||
else:
|
||||
raise ValueError("no such solver type %s" % breed)
|
||||
logger.info(" -> solver type = %s" % breed)
|
||||
|
||||
# Load state from checkpoint
|
||||
if optimizer_state is not None:
|
||||
logger.info(" -> setting loaded optimizer state")
|
||||
optimizer.load_state_dict(optimizer_state)
|
||||
|
||||
# Initialize the learning rate scheduler
|
||||
if lr_policy == "multistep":
|
||||
scheduler = torch.optim.lr_scheduler.MultiStepLR(
|
||||
optimizer,
|
||||
milestones=milestones,
|
||||
gamma=gamma,
|
||||
)
|
||||
else:
|
||||
raise ValueError("no such lr policy %s" % lr_policy)
|
||||
|
||||
# When loading from checkpoint, this will make sure that the
|
||||
# lr is correctly set even after returning
|
||||
for _ in range(last_epoch):
|
||||
scheduler.step()
|
||||
|
||||
optimizer.zero_grad()
|
||||
return optimizer, scheduler
|
||||
|
||||
|
||||
enable_get_default_args(init_optimizer)
|
||||
@@ -1,230 +0,0 @@
|
||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the BSD-style license found in the
|
||||
# LICENSE file in the root directory of this source tree.
|
||||
|
||||
import logging
|
||||
import os
|
||||
from typing import Any, Dict, Optional, Tuple
|
||||
|
||||
import torch.optim
|
||||
|
||||
from accelerate import Accelerator
|
||||
|
||||
from pytorch3d.implicitron.models.base_model import ImplicitronModelBase
|
||||
from pytorch3d.implicitron.tools import model_io
|
||||
from pytorch3d.implicitron.tools.config import (
|
||||
registry,
|
||||
ReplaceableBase,
|
||||
run_auto_creation,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class OptimizerFactoryBase(ReplaceableBase):
|
||||
def __call__(
|
||||
self, model: ImplicitronModelBase, **kwargs
|
||||
) -> Tuple[torch.optim.Optimizer, Any]:
|
||||
"""
|
||||
Initialize the optimizer and lr scheduler.
|
||||
|
||||
Args:
|
||||
model: The model with optionally loaded weights.
|
||||
|
||||
Returns:
|
||||
An optimizer module (optionally loaded from a checkpoint) and
|
||||
a learning rate scheduler module (should be a subclass of torch.optim's
|
||||
lr_scheduler._LRScheduler).
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
@registry.register
|
||||
class ImplicitronOptimizerFactory(OptimizerFactoryBase):
|
||||
"""
|
||||
A factory that initializes the optimizer and lr scheduler.
|
||||
|
||||
Members:
|
||||
betas: Beta parameters for the Adam optimizer.
|
||||
breed: The type of optimizer to use. We currently support SGD, Adagrad
|
||||
and Adam.
|
||||
exponential_lr_step_size: With Exponential policy only,
|
||||
lr = lr * gamma ** (epoch/step_size)
|
||||
gamma: Multiplicative factor of learning rate decay.
|
||||
lr: The value for the initial learning rate.
|
||||
lr_policy: The policy to use for learning rate. We currently support
|
||||
MultiStepLR and Exponential policies.
|
||||
momentum: A momentum value (for SGD only).
|
||||
multistep_lr_milestones: With MultiStepLR policy only: list of
|
||||
increasing epoch indices at which the learning rate is modified.
|
||||
momentum: Momentum factor for SGD optimizer.
|
||||
weight_decay: The optimizer weight_decay (L2 penalty on model weights).
|
||||
"""
|
||||
|
||||
betas: Tuple[float, ...] = (0.9, 0.999)
|
||||
breed: str = "Adam"
|
||||
exponential_lr_step_size: int = 250
|
||||
gamma: float = 0.1
|
||||
lr: float = 0.0005
|
||||
lr_policy: str = "MultiStepLR"
|
||||
momentum: float = 0.9
|
||||
multistep_lr_milestones: tuple = ()
|
||||
weight_decay: float = 0.0
|
||||
linear_exponential_lr_milestone: int = 200
|
||||
linear_exponential_start_gamma: float = 0.1
|
||||
|
||||
def __post_init__(self):
|
||||
run_auto_creation(self)
|
||||
|
||||
def __call__(
|
||||
self,
|
||||
last_epoch: int,
|
||||
model: ImplicitronModelBase,
|
||||
accelerator: Optional[Accelerator] = None,
|
||||
exp_dir: Optional[str] = None,
|
||||
resume: bool = True,
|
||||
resume_epoch: int = -1,
|
||||
**kwargs,
|
||||
) -> Tuple[torch.optim.Optimizer, Any]:
|
||||
"""
|
||||
Initialize the optimizer (optionally from a checkpoint) and the lr scheduluer.
|
||||
|
||||
Args:
|
||||
last_epoch: If the model was loaded from checkpoint this will be the
|
||||
number of the last epoch that was saved.
|
||||
model: The model with optionally loaded weights.
|
||||
accelerator: An optional Accelerator instance.
|
||||
exp_dir: Root experiment directory.
|
||||
resume: If True, attempt to load optimizer checkpoint from exp_dir.
|
||||
Failure to do so will return a newly initialized optimizer.
|
||||
resume_epoch: If `resume` is True: Resume optimizer at this epoch. If
|
||||
`resume_epoch` <= 0, then resume from the latest checkpoint.
|
||||
Returns:
|
||||
An optimizer module (optionally loaded from a checkpoint) and
|
||||
a learning rate scheduler module (should be a subclass of torch.optim's
|
||||
lr_scheduler._LRScheduler).
|
||||
"""
|
||||
# Get the parameters to optimize
|
||||
if hasattr(model, "_get_param_groups"): # use the model function
|
||||
# pyre-ignore[29]
|
||||
p_groups = model._get_param_groups(self.lr, wd=self.weight_decay)
|
||||
else:
|
||||
allprm = [prm for prm in model.parameters() if prm.requires_grad]
|
||||
p_groups = [{"params": allprm, "lr": self.lr}]
|
||||
|
||||
# Intialize the optimizer
|
||||
if self.breed == "SGD":
|
||||
optimizer = torch.optim.SGD(
|
||||
p_groups,
|
||||
lr=self.lr,
|
||||
momentum=self.momentum,
|
||||
weight_decay=self.weight_decay,
|
||||
)
|
||||
elif self.breed == "Adagrad":
|
||||
optimizer = torch.optim.Adagrad(
|
||||
p_groups, lr=self.lr, weight_decay=self.weight_decay
|
||||
)
|
||||
elif self.breed == "Adam":
|
||||
optimizer = torch.optim.Adam(
|
||||
p_groups, lr=self.lr, betas=self.betas, weight_decay=self.weight_decay
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"No such solver type {self.breed}")
|
||||
logger.info(f"Solver type = {self.breed}")
|
||||
|
||||
# Load state from checkpoint
|
||||
optimizer_state = self._get_optimizer_state(
|
||||
exp_dir,
|
||||
accelerator,
|
||||
resume_epoch=resume_epoch,
|
||||
resume=resume,
|
||||
)
|
||||
if optimizer_state is not None:
|
||||
logger.info("Setting loaded optimizer state.")
|
||||
optimizer.load_state_dict(optimizer_state)
|
||||
|
||||
# Initialize the learning rate scheduler
|
||||
if self.lr_policy.casefold() == "MultiStepLR".casefold():
|
||||
scheduler = torch.optim.lr_scheduler.MultiStepLR(
|
||||
optimizer,
|
||||
milestones=self.multistep_lr_milestones,
|
||||
gamma=self.gamma,
|
||||
)
|
||||
elif self.lr_policy.casefold() == "Exponential".casefold():
|
||||
scheduler = torch.optim.lr_scheduler.LambdaLR(
|
||||
optimizer,
|
||||
lambda epoch: self.gamma ** (epoch / self.exponential_lr_step_size),
|
||||
verbose=False,
|
||||
)
|
||||
elif self.lr_policy.casefold() == "LinearExponential".casefold():
|
||||
# linear learning rate progression between epochs 0 to
|
||||
# self.linear_exponential_lr_milestone, followed by exponential
|
||||
# lr decay for the rest of the epochs
|
||||
def _get_lr(epoch: int):
|
||||
m = self.linear_exponential_lr_milestone
|
||||
if epoch < m:
|
||||
w = (m - epoch) / m
|
||||
gamma = w * self.linear_exponential_start_gamma + (1 - w)
|
||||
else:
|
||||
epoch_rest = epoch - m
|
||||
gamma = self.gamma ** (epoch_rest / self.exponential_lr_step_size)
|
||||
return gamma
|
||||
|
||||
scheduler = torch.optim.lr_scheduler.LambdaLR(
|
||||
optimizer, _get_lr, verbose=False
|
||||
)
|
||||
else:
|
||||
raise ValueError("no such lr policy %s" % self.lr_policy)
|
||||
|
||||
# When loading from checkpoint, this will make sure that the
|
||||
# lr is correctly set even after returning.
|
||||
for _ in range(last_epoch):
|
||||
scheduler.step()
|
||||
|
||||
optimizer.zero_grad()
|
||||
|
||||
return optimizer, scheduler
|
||||
|
||||
def _get_optimizer_state(
|
||||
self,
|
||||
exp_dir: Optional[str],
|
||||
accelerator: Optional[Accelerator] = None,
|
||||
resume: bool = True,
|
||||
resume_epoch: int = -1,
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Load an optimizer state from a checkpoint.
|
||||
|
||||
resume: If True, attempt to load the last checkpoint from `exp_dir`
|
||||
passed to __call__. Failure to do so will return a newly initialized
|
||||
optimizer.
|
||||
resume_epoch: If `resume` is True: Resume optimizer at this epoch. If
|
||||
`resume_epoch` <= 0, then resume from the latest checkpoint.
|
||||
"""
|
||||
if exp_dir is None or not resume:
|
||||
return None
|
||||
if resume_epoch > 0:
|
||||
save_path = model_io.get_checkpoint(exp_dir, resume_epoch)
|
||||
if not os.path.isfile(save_path):
|
||||
raise FileNotFoundError(
|
||||
f"Cannot find optimizer from epoch {resume_epoch}."
|
||||
)
|
||||
else:
|
||||
save_path = model_io.find_last_checkpoint(exp_dir)
|
||||
optimizer_state = None
|
||||
if save_path is not None:
|
||||
logger.info(f"Found previous optimizer state {save_path} -> resuming.")
|
||||
opt_path = model_io.get_optimizer_path(save_path)
|
||||
|
||||
if os.path.isfile(opt_path):
|
||||
map_location = None
|
||||
if accelerator is not None and not accelerator.is_local_main_process:
|
||||
map_location = {
|
||||
"cuda:%d" % 0: "cuda:%d" % accelerator.local_process_index
|
||||
}
|
||||
optimizer_state = torch.load(opt_path, map_location)
|
||||
else:
|
||||
raise FileNotFoundError(f"Optimizer state {opt_path} does not exist.")
|
||||
return optimizer_state
|
||||
@@ -1,447 +0,0 @@
|
||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the BSD-style license found in the
|
||||
# LICENSE file in the root directory of this source tree.
|
||||
|
||||
import logging
|
||||
import os
|
||||
import time
|
||||
from typing import Any, List, Optional
|
||||
|
||||
import torch
|
||||
from accelerate import Accelerator
|
||||
from pytorch3d.implicitron.evaluation.evaluator import EvaluatorBase
|
||||
from pytorch3d.implicitron.models.base_model import ImplicitronModelBase
|
||||
from pytorch3d.implicitron.models.generic_model import EvaluationMode
|
||||
from pytorch3d.implicitron.tools import model_io, vis_utils
|
||||
from pytorch3d.implicitron.tools.config import (
|
||||
registry,
|
||||
ReplaceableBase,
|
||||
run_auto_creation,
|
||||
)
|
||||
from pytorch3d.implicitron.tools.stats import Stats
|
||||
from pytorch3d.renderer.cameras import CamerasBase
|
||||
from torch.utils.data import DataLoader
|
||||
|
||||
from .utils import seed_all_random_engines
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class TrainingLoopBase(ReplaceableBase):
|
||||
def run(
|
||||
self,
|
||||
train_loader: DataLoader,
|
||||
val_loader: Optional[DataLoader],
|
||||
test_loader: Optional[DataLoader],
|
||||
model: ImplicitronModelBase,
|
||||
optimizer: torch.optim.Optimizer,
|
||||
scheduler: Any,
|
||||
**kwargs,
|
||||
) -> None:
|
||||
raise NotImplementedError()
|
||||
|
||||
def load_stats(
|
||||
self,
|
||||
log_vars: List[str],
|
||||
exp_dir: str,
|
||||
resume: bool = True,
|
||||
resume_epoch: int = -1,
|
||||
**kwargs,
|
||||
) -> Stats:
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
@registry.register
|
||||
class ImplicitronTrainingLoop(TrainingLoopBase): # pyre-ignore [13]
|
||||
"""
|
||||
Members:
|
||||
eval_only: If True, only run evaluation using the test dataloader.
|
||||
evaluator: An EvaluatorBase instance, used to evaluate training results.
|
||||
max_epochs: Train for this many epochs. Note that if the model was
|
||||
loaded from a checkpoint, we will restart training at the appropriate
|
||||
epoch and run for (max_epochs - checkpoint_epoch) epochs.
|
||||
store_checkpoints: If True, store model and optimizer state checkpoints.
|
||||
store_checkpoints_purge: If >= 0, remove any checkpoints older or equal
|
||||
to this many epochs.
|
||||
test_interval: Evaluate on a test dataloader each `test_interval` epochs.
|
||||
test_when_finished: If True, evaluate on a test dataloader when training
|
||||
completes.
|
||||
validation_interval: Validate each `validation_interval` epochs.
|
||||
clip_grad: Optionally clip the gradient norms.
|
||||
If set to a value <=0.0, no clipping
|
||||
metric_print_interval: The batch interval at which the stats should be
|
||||
logged.
|
||||
visualize_interval: The batch interval at which the visualizations
|
||||
should be plotted
|
||||
visdom_env: The name of the Visdom environment to use for plotting.
|
||||
visdom_port: The Visdom port.
|
||||
visdom_server: Address of the Visdom server.
|
||||
"""
|
||||
|
||||
# Parameters of the outer training loop.
|
||||
eval_only: bool = False
|
||||
evaluator: EvaluatorBase
|
||||
evaluator_class_type: str = "ImplicitronEvaluator"
|
||||
max_epochs: int = 1000
|
||||
store_checkpoints: bool = True
|
||||
store_checkpoints_purge: int = 1
|
||||
test_interval: int = -1
|
||||
test_when_finished: bool = False
|
||||
validation_interval: int = 1
|
||||
|
||||
# Gradient clipping.
|
||||
clip_grad: float = 0.0
|
||||
|
||||
# Visualization/logging parameters.
|
||||
metric_print_interval: int = 5
|
||||
visualize_interval: int = 1000
|
||||
visdom_env: str = ""
|
||||
visdom_port: int = int(os.environ.get("VISDOM_PORT", 8097))
|
||||
visdom_server: str = "http://127.0.0.1"
|
||||
|
||||
def __post_init__(self):
|
||||
run_auto_creation(self)
|
||||
|
||||
def run(
|
||||
self,
|
||||
*,
|
||||
train_loader: DataLoader,
|
||||
val_loader: Optional[DataLoader],
|
||||
test_loader: Optional[DataLoader],
|
||||
model: ImplicitronModelBase,
|
||||
optimizer: torch.optim.Optimizer,
|
||||
scheduler: Any,
|
||||
accelerator: Optional[Accelerator],
|
||||
all_train_cameras: Optional[CamerasBase],
|
||||
device: torch.device,
|
||||
exp_dir: str,
|
||||
stats: Stats,
|
||||
seed: int,
|
||||
**kwargs,
|
||||
):
|
||||
"""
|
||||
Entry point to run the training and validation loops
|
||||
based on the specified config file.
|
||||
"""
|
||||
start_epoch = stats.epoch + 1
|
||||
assert scheduler.last_epoch == stats.epoch + 1
|
||||
assert scheduler.last_epoch == start_epoch
|
||||
|
||||
# only run evaluation on the test dataloader
|
||||
if self.eval_only:
|
||||
if test_loader is not None:
|
||||
self.evaluator.run(
|
||||
all_train_cameras=all_train_cameras,
|
||||
dataloader=test_loader,
|
||||
device=device,
|
||||
dump_to_json=True,
|
||||
epoch=stats.epoch,
|
||||
exp_dir=exp_dir,
|
||||
model=model,
|
||||
)
|
||||
return
|
||||
else:
|
||||
raise ValueError(
|
||||
"Cannot evaluate and dump results to json, no test data provided."
|
||||
)
|
||||
|
||||
# loop through epochs
|
||||
for epoch in range(start_epoch, self.max_epochs):
|
||||
# automatic new_epoch and plotting of stats at every epoch start
|
||||
with stats:
|
||||
|
||||
# Make sure to re-seed random generators to ensure reproducibility
|
||||
# even after restart.
|
||||
seed_all_random_engines(seed + epoch)
|
||||
|
||||
cur_lr = float(scheduler.get_last_lr()[-1])
|
||||
logger.debug(f"scheduler lr = {cur_lr:1.2e}")
|
||||
|
||||
# train loop
|
||||
self._training_or_validation_epoch(
|
||||
accelerator=accelerator,
|
||||
device=device,
|
||||
epoch=epoch,
|
||||
loader=train_loader,
|
||||
model=model,
|
||||
optimizer=optimizer,
|
||||
stats=stats,
|
||||
validation=False,
|
||||
)
|
||||
|
||||
# val loop (optional)
|
||||
if val_loader is not None and epoch % self.validation_interval == 0:
|
||||
self._training_or_validation_epoch(
|
||||
accelerator=accelerator,
|
||||
device=device,
|
||||
epoch=epoch,
|
||||
loader=val_loader,
|
||||
model=model,
|
||||
optimizer=optimizer,
|
||||
stats=stats,
|
||||
validation=True,
|
||||
)
|
||||
|
||||
# eval loop (optional)
|
||||
if (
|
||||
test_loader is not None
|
||||
and self.test_interval > 0
|
||||
and epoch % self.test_interval == 0
|
||||
):
|
||||
self.evaluator.run(
|
||||
all_train_cameras=all_train_cameras,
|
||||
device=device,
|
||||
dataloader=test_loader,
|
||||
model=model,
|
||||
)
|
||||
|
||||
assert stats.epoch == epoch, "inconsistent stats!"
|
||||
self._checkpoint(accelerator, epoch, exp_dir, model, optimizer, stats)
|
||||
|
||||
scheduler.step()
|
||||
new_lr = float(scheduler.get_last_lr()[-1])
|
||||
if new_lr != cur_lr:
|
||||
logger.info(f"LR change! {cur_lr} -> {new_lr}")
|
||||
|
||||
if self.test_when_finished:
|
||||
if test_loader is not None:
|
||||
self.evaluator.run(
|
||||
all_train_cameras=all_train_cameras,
|
||||
device=device,
|
||||
dump_to_json=True,
|
||||
epoch=stats.epoch,
|
||||
exp_dir=exp_dir,
|
||||
dataloader=test_loader,
|
||||
model=model,
|
||||
)
|
||||
else:
|
||||
raise ValueError(
|
||||
"Cannot evaluate and dump results to json, no test data provided."
|
||||
)
|
||||
|
||||
def load_stats(
|
||||
self,
|
||||
log_vars: List[str],
|
||||
exp_dir: str,
|
||||
resume: bool = True,
|
||||
resume_epoch: int = -1,
|
||||
**kwargs,
|
||||
) -> Stats:
|
||||
"""
|
||||
Load Stats that correspond to the model's log_vars and resume_epoch.
|
||||
|
||||
Args:
|
||||
log_vars: A list of variable names to log. Should be a subset of the
|
||||
`preds` returned by the forward function of the corresponding
|
||||
ImplicitronModelBase instance.
|
||||
exp_dir: Root experiment directory.
|
||||
resume: If False, do not load stats from the checkpoint speci-
|
||||
fied by resume and resume_epoch; instead, create a fresh stats object.
|
||||
|
||||
stats: The stats structure (optionally loaded from checkpoint)
|
||||
"""
|
||||
# Init the stats struct
|
||||
visdom_env_charts = (
|
||||
vis_utils.get_visdom_env(self.visdom_env, exp_dir) + "_charts"
|
||||
)
|
||||
stats = Stats(
|
||||
# log_vars should be a list, but OmegaConf might load them as ListConfig
|
||||
list(log_vars),
|
||||
plot_file=os.path.join(exp_dir, "train_stats.pdf"),
|
||||
visdom_env=visdom_env_charts,
|
||||
verbose=False,
|
||||
visdom_server=self.visdom_server,
|
||||
visdom_port=self.visdom_port,
|
||||
)
|
||||
|
||||
model_path = None
|
||||
if resume:
|
||||
if resume_epoch > 0:
|
||||
model_path = model_io.get_checkpoint(exp_dir, resume_epoch)
|
||||
if not os.path.isfile(model_path):
|
||||
raise FileNotFoundError(
|
||||
f"Cannot find stats from epoch {resume_epoch}."
|
||||
)
|
||||
else:
|
||||
model_path = model_io.find_last_checkpoint(exp_dir)
|
||||
|
||||
if model_path is not None:
|
||||
stats_path = model_io.get_stats_path(model_path)
|
||||
stats_load = model_io.load_stats(stats_path)
|
||||
|
||||
# Determine if stats should be reset
|
||||
if resume:
|
||||
if stats_load is None:
|
||||
logger.warning("\n\n\n\nCORRUPT STATS -> clearing stats\n\n\n\n")
|
||||
last_epoch = model_io.parse_epoch_from_model_path(model_path)
|
||||
logger.info(f"Estimated resume epoch = {last_epoch}")
|
||||
|
||||
# Reset the stats struct
|
||||
for _ in range(last_epoch + 1):
|
||||
stats.new_epoch()
|
||||
assert last_epoch == stats.epoch
|
||||
else:
|
||||
logger.info(f"Found previous stats in {stats_path} -> resuming.")
|
||||
stats = stats_load
|
||||
|
||||
# Update stats properties incase it was reset on load
|
||||
stats.visdom_env = visdom_env_charts
|
||||
stats.visdom_server = self.visdom_server
|
||||
stats.visdom_port = self.visdom_port
|
||||
stats.plot_file = os.path.join(exp_dir, "train_stats.pdf")
|
||||
stats.synchronize_logged_vars(log_vars)
|
||||
else:
|
||||
logger.info("Clearing stats")
|
||||
|
||||
return stats
|
||||
|
||||
def _training_or_validation_epoch(
|
||||
self,
|
||||
epoch: int,
|
||||
loader: DataLoader,
|
||||
model: ImplicitronModelBase,
|
||||
optimizer: torch.optim.Optimizer,
|
||||
stats: Stats,
|
||||
validation: bool,
|
||||
*,
|
||||
accelerator: Optional[Accelerator],
|
||||
bp_var: str = "objective",
|
||||
device: torch.device,
|
||||
**kwargs,
|
||||
) -> None:
|
||||
"""
|
||||
This is the main loop for training and evaluation including:
|
||||
model forward pass, loss computation, backward pass and visualization.
|
||||
|
||||
Args:
|
||||
epoch: The index of the current epoch
|
||||
loader: The dataloader to use for the loop
|
||||
model: The model module optionally loaded from checkpoint
|
||||
optimizer: The optimizer module optionally loaded from checkpoint
|
||||
stats: The stats struct, also optionally loaded from checkpoint
|
||||
validation: If true, run the loop with the model in eval mode
|
||||
and skip the backward pass
|
||||
accelerator: An optional Accelerator instance.
|
||||
bp_var: The name of the key in the model output `preds` dict which
|
||||
should be used as the loss for the backward pass.
|
||||
device: The device on which to run the model.
|
||||
"""
|
||||
|
||||
if validation:
|
||||
model.eval()
|
||||
trainmode = "val"
|
||||
else:
|
||||
model.train()
|
||||
trainmode = "train"
|
||||
|
||||
t_start = time.time()
|
||||
|
||||
# get the visdom env name
|
||||
visdom_env_imgs = stats.visdom_env + "_images_" + trainmode
|
||||
viz = vis_utils.get_visdom_connection(
|
||||
server=stats.visdom_server,
|
||||
port=stats.visdom_port,
|
||||
)
|
||||
|
||||
# Iterate through the batches
|
||||
n_batches = len(loader)
|
||||
for it, net_input in enumerate(loader):
|
||||
last_iter = it == n_batches - 1
|
||||
|
||||
# move to gpu where possible (in place)
|
||||
net_input = net_input.to(device)
|
||||
|
||||
# run the forward pass
|
||||
if not validation:
|
||||
optimizer.zero_grad()
|
||||
preds = model(
|
||||
**{**net_input, "evaluation_mode": EvaluationMode.TRAINING}
|
||||
)
|
||||
else:
|
||||
with torch.no_grad():
|
||||
preds = model(
|
||||
**{**net_input, "evaluation_mode": EvaluationMode.EVALUATION}
|
||||
)
|
||||
|
||||
# make sure we dont overwrite something
|
||||
assert all(k not in preds for k in net_input.keys())
|
||||
# merge everything into one big dict
|
||||
preds.update(net_input)
|
||||
|
||||
# update the stats logger
|
||||
stats.update(preds, time_start=t_start, stat_set=trainmode)
|
||||
# pyre-ignore [16]
|
||||
assert stats.it[trainmode] == it, "inconsistent stat iteration number!"
|
||||
|
||||
# print textual status update
|
||||
if it % self.metric_print_interval == 0 or last_iter:
|
||||
stats.print(stat_set=trainmode, max_it=n_batches)
|
||||
|
||||
# visualize results
|
||||
if (
|
||||
(accelerator is None or accelerator.is_local_main_process)
|
||||
and self.visualize_interval > 0
|
||||
and it % self.visualize_interval == 0
|
||||
):
|
||||
prefix = f"e{stats.epoch}_it{stats.it[trainmode]}"
|
||||
if hasattr(model, "visualize"):
|
||||
# pyre-ignore [29]
|
||||
model.visualize(
|
||||
viz,
|
||||
visdom_env_imgs,
|
||||
preds,
|
||||
prefix,
|
||||
)
|
||||
|
||||
# optimizer step
|
||||
if not validation:
|
||||
loss = preds[bp_var]
|
||||
assert torch.isfinite(loss).all(), "Non-finite loss!"
|
||||
# backprop
|
||||
if accelerator is None:
|
||||
loss.backward()
|
||||
else:
|
||||
accelerator.backward(loss)
|
||||
if self.clip_grad > 0.0:
|
||||
# Optionally clip the gradient norms.
|
||||
total_norm = torch.nn.utils.clip_grad_norm(
|
||||
model.parameters(), self.clip_grad
|
||||
)
|
||||
if total_norm > self.clip_grad:
|
||||
logger.debug(
|
||||
f"Clipping gradient: {total_norm}"
|
||||
+ f" with coef {self.clip_grad / float(total_norm)}."
|
||||
)
|
||||
|
||||
optimizer.step()
|
||||
|
||||
def _checkpoint(
|
||||
self,
|
||||
accelerator: Optional[Accelerator],
|
||||
epoch: int,
|
||||
exp_dir: str,
|
||||
model: ImplicitronModelBase,
|
||||
optimizer: torch.optim.Optimizer,
|
||||
stats: Stats,
|
||||
):
|
||||
"""
|
||||
Save a model and its corresponding Stats object to a file, if
|
||||
`self.store_checkpoints` is True. In addition, if
|
||||
`self.store_checkpoints_purge` is True, remove any checkpoints older
|
||||
than `self.store_checkpoints_purge` epochs old.
|
||||
"""
|
||||
if self.store_checkpoints and (
|
||||
accelerator is None or accelerator.is_local_main_process
|
||||
):
|
||||
if self.store_checkpoints_purge > 0:
|
||||
for prev_epoch in range(epoch - self.store_checkpoints_purge):
|
||||
model_io.purge_epoch(exp_dir, prev_epoch)
|
||||
outfile = model_io.get_checkpoint(exp_dir, epoch)
|
||||
unwrapped_model = (
|
||||
model if accelerator is None else accelerator.unwrap_model(model)
|
||||
)
|
||||
model_io.safe_save_model(
|
||||
unwrapped_model, stats, outfile, optimizer=optimizer
|
||||
)
|
||||
@@ -1,17 +0,0 @@
|
||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the BSD-style license found in the
|
||||
# LICENSE file in the root directory of this source tree.
|
||||
|
||||
|
||||
import random
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
|
||||
|
||||
def seed_all_random_engines(seed: int) -> None:
|
||||
np.random.seed(seed)
|
||||
torch.manual_seed(seed)
|
||||
random.seed(seed)
|
||||
@@ -1,15 +1,296 @@
|
||||
data_source_class_type: ImplicitronDataSource
|
||||
model_factory_class_type: ImplicitronModelFactory
|
||||
optimizer_factory_class_type: ImplicitronOptimizerFactory
|
||||
training_loop_class_type: ImplicitronTrainingLoop
|
||||
seed: 42
|
||||
detect_anomaly: false
|
||||
exp_dir: ./data/default_experiment/
|
||||
hydra:
|
||||
run:
|
||||
dir: .
|
||||
output_subdir: null
|
||||
data_source_ImplicitronDataSource_args:
|
||||
generic_model_args:
|
||||
mask_images: true
|
||||
mask_depths: true
|
||||
render_image_width: 400
|
||||
render_image_height: 400
|
||||
mask_threshold: 0.5
|
||||
output_rasterized_mc: false
|
||||
bg_color:
|
||||
- 0.0
|
||||
- 0.0
|
||||
- 0.0
|
||||
num_passes: 1
|
||||
chunk_size_grid: 4096
|
||||
render_features_dimensions: 3
|
||||
tqdm_trigger_threshold: 16
|
||||
n_train_target_views: 1
|
||||
sampling_mode_training: mask_sample
|
||||
sampling_mode_evaluation: full_grid
|
||||
global_encoder_class_type: null
|
||||
raysampler_class_type: AdaptiveRaySampler
|
||||
renderer_class_type: MultiPassEmissionAbsorptionRenderer
|
||||
image_feature_extractor_class_type: null
|
||||
view_pooler_enabled: false
|
||||
implicit_function_class_type: NeuralRadianceFieldImplicitFunction
|
||||
view_metrics_class_type: ViewMetrics
|
||||
regularization_metrics_class_type: RegularizationMetrics
|
||||
loss_weights:
|
||||
loss_rgb_mse: 1.0
|
||||
loss_prev_stage_rgb_mse: 1.0
|
||||
loss_mask_bce: 0.0
|
||||
loss_prev_stage_mask_bce: 0.0
|
||||
log_vars:
|
||||
- loss_rgb_psnr_fg
|
||||
- loss_rgb_psnr
|
||||
- loss_rgb_mse
|
||||
- loss_rgb_huber
|
||||
- loss_depth_abs
|
||||
- loss_depth_abs_fg
|
||||
- loss_mask_neg_iou
|
||||
- loss_mask_bce
|
||||
- loss_mask_beta_prior
|
||||
- loss_eikonal
|
||||
- loss_density_tv
|
||||
- loss_depth_neg_penalty
|
||||
- loss_autodecoder_norm
|
||||
- loss_prev_stage_rgb_mse
|
||||
- loss_prev_stage_rgb_psnr_fg
|
||||
- loss_prev_stage_rgb_psnr
|
||||
- loss_prev_stage_mask_bce
|
||||
- objective
|
||||
- epoch
|
||||
- sec/it
|
||||
global_encoder_HarmonicTimeEncoder_args:
|
||||
n_harmonic_functions: 10
|
||||
append_input: true
|
||||
time_divisor: 1.0
|
||||
global_encoder_SequenceAutodecoder_args:
|
||||
autodecoder_args:
|
||||
encoding_dim: 0
|
||||
n_instances: 0
|
||||
init_scale: 1.0
|
||||
ignore_input: false
|
||||
raysampler_AdaptiveRaySampler_args:
|
||||
image_width: 400
|
||||
image_height: 400
|
||||
sampling_mode_training: mask_sample
|
||||
sampling_mode_evaluation: full_grid
|
||||
n_pts_per_ray_training: 64
|
||||
n_pts_per_ray_evaluation: 64
|
||||
n_rays_per_image_sampled_from_mask: 1024
|
||||
stratified_point_sampling_training: true
|
||||
stratified_point_sampling_evaluation: false
|
||||
scene_extent: 8.0
|
||||
scene_center:
|
||||
- 0.0
|
||||
- 0.0
|
||||
- 0.0
|
||||
raysampler_NearFarRaySampler_args:
|
||||
image_width: 400
|
||||
image_height: 400
|
||||
sampling_mode_training: mask_sample
|
||||
sampling_mode_evaluation: full_grid
|
||||
n_pts_per_ray_training: 64
|
||||
n_pts_per_ray_evaluation: 64
|
||||
n_rays_per_image_sampled_from_mask: 1024
|
||||
stratified_point_sampling_training: true
|
||||
stratified_point_sampling_evaluation: false
|
||||
min_depth: 0.1
|
||||
max_depth: 8.0
|
||||
renderer_LSTMRenderer_args:
|
||||
num_raymarch_steps: 10
|
||||
init_depth: 17.0
|
||||
init_depth_noise_std: 0.0005
|
||||
hidden_size: 16
|
||||
n_feature_channels: 256
|
||||
bg_color: null
|
||||
verbose: false
|
||||
renderer_MultiPassEmissionAbsorptionRenderer_args:
|
||||
raymarcher_class_type: EmissionAbsorptionRaymarcher
|
||||
n_pts_per_ray_fine_training: 64
|
||||
n_pts_per_ray_fine_evaluation: 64
|
||||
stratified_sampling_coarse_training: true
|
||||
stratified_sampling_coarse_evaluation: false
|
||||
append_coarse_samples_to_fine: true
|
||||
density_noise_std_train: 0.0
|
||||
return_weights: false
|
||||
raymarcher_CumsumRaymarcher_args:
|
||||
surface_thickness: 1
|
||||
bg_color:
|
||||
- 0.0
|
||||
background_opacity: 0.0
|
||||
density_relu: true
|
||||
blend_output: false
|
||||
raymarcher_EmissionAbsorptionRaymarcher_args:
|
||||
surface_thickness: 1
|
||||
bg_color:
|
||||
- 0.0
|
||||
background_opacity: 10000000000.0
|
||||
density_relu: true
|
||||
blend_output: false
|
||||
renderer_SignedDistanceFunctionRenderer_args:
|
||||
render_features_dimensions: 3
|
||||
ray_tracer_args:
|
||||
object_bounding_sphere: 1.0
|
||||
sdf_threshold: 5.0e-05
|
||||
line_search_step: 0.5
|
||||
line_step_iters: 1
|
||||
sphere_tracing_iters: 10
|
||||
n_steps: 100
|
||||
n_secant_steps: 8
|
||||
ray_normal_coloring_network_args:
|
||||
feature_vector_size: 3
|
||||
mode: idr
|
||||
d_in: 9
|
||||
d_out: 3
|
||||
dims:
|
||||
- 512
|
||||
- 512
|
||||
- 512
|
||||
- 512
|
||||
weight_norm: true
|
||||
n_harmonic_functions_dir: 0
|
||||
pooled_feature_dim: 0
|
||||
bg_color:
|
||||
- 0.0
|
||||
soft_mask_alpha: 50.0
|
||||
image_feature_extractor_ResNetFeatureExtractor_args:
|
||||
name: resnet34
|
||||
pretrained: true
|
||||
stages:
|
||||
- 1
|
||||
- 2
|
||||
- 3
|
||||
- 4
|
||||
normalize_image: true
|
||||
image_rescale: 0.16
|
||||
first_max_pool: true
|
||||
proj_dim: 32
|
||||
l2_norm: true
|
||||
add_masks: true
|
||||
add_images: true
|
||||
global_average_pool: false
|
||||
feature_rescale: 1.0
|
||||
view_pooler_args:
|
||||
feature_aggregator_class_type: AngleWeightedReductionFeatureAggregator
|
||||
view_sampler_args:
|
||||
masked_sampling: false
|
||||
sampling_mode: bilinear
|
||||
feature_aggregator_AngleWeightedIdentityFeatureAggregator_args:
|
||||
exclude_target_view: true
|
||||
exclude_target_view_mask_features: true
|
||||
concatenate_output: true
|
||||
weight_by_ray_angle_gamma: 1.0
|
||||
min_ray_angle_weight: 0.1
|
||||
feature_aggregator_AngleWeightedReductionFeatureAggregator_args:
|
||||
exclude_target_view: true
|
||||
exclude_target_view_mask_features: true
|
||||
concatenate_output: true
|
||||
reduction_functions:
|
||||
- AVG
|
||||
- STD
|
||||
weight_by_ray_angle_gamma: 1.0
|
||||
min_ray_angle_weight: 0.1
|
||||
feature_aggregator_IdentityFeatureAggregator_args:
|
||||
exclude_target_view: true
|
||||
exclude_target_view_mask_features: true
|
||||
concatenate_output: true
|
||||
feature_aggregator_ReductionFeatureAggregator_args:
|
||||
exclude_target_view: true
|
||||
exclude_target_view_mask_features: true
|
||||
concatenate_output: true
|
||||
reduction_functions:
|
||||
- AVG
|
||||
- STD
|
||||
implicit_function_IdrFeatureField_args:
|
||||
feature_vector_size: 3
|
||||
d_in: 3
|
||||
d_out: 1
|
||||
dims:
|
||||
- 512
|
||||
- 512
|
||||
- 512
|
||||
- 512
|
||||
- 512
|
||||
- 512
|
||||
- 512
|
||||
- 512
|
||||
geometric_init: true
|
||||
bias: 1.0
|
||||
skip_in: []
|
||||
weight_norm: true
|
||||
n_harmonic_functions_xyz: 0
|
||||
pooled_feature_dim: 0
|
||||
encoding_dim: 0
|
||||
implicit_function_NeRFormerImplicitFunction_args:
|
||||
n_harmonic_functions_xyz: 10
|
||||
n_harmonic_functions_dir: 4
|
||||
n_hidden_neurons_dir: 128
|
||||
latent_dim: 0
|
||||
input_xyz: true
|
||||
xyz_ray_dir_in_camera_coords: false
|
||||
color_dim: 3
|
||||
transformer_dim_down_factor: 2.0
|
||||
n_hidden_neurons_xyz: 80
|
||||
n_layers_xyz: 2
|
||||
append_xyz:
|
||||
- 1
|
||||
implicit_function_NeuralRadianceFieldImplicitFunction_args:
|
||||
n_harmonic_functions_xyz: 10
|
||||
n_harmonic_functions_dir: 4
|
||||
n_hidden_neurons_dir: 128
|
||||
latent_dim: 0
|
||||
input_xyz: true
|
||||
xyz_ray_dir_in_camera_coords: false
|
||||
color_dim: 3
|
||||
transformer_dim_down_factor: 1.0
|
||||
n_hidden_neurons_xyz: 256
|
||||
n_layers_xyz: 8
|
||||
append_xyz:
|
||||
- 5
|
||||
implicit_function_SRNHyperNetImplicitFunction_args:
|
||||
hypernet_args:
|
||||
n_harmonic_functions: 3
|
||||
n_hidden_units: 256
|
||||
n_layers: 2
|
||||
n_hidden_units_hypernet: 256
|
||||
n_layers_hypernet: 1
|
||||
in_features: 3
|
||||
out_features: 256
|
||||
latent_dim_hypernet: 0
|
||||
latent_dim: 0
|
||||
xyz_in_camera_coords: false
|
||||
pixel_generator_args:
|
||||
n_harmonic_functions: 4
|
||||
n_hidden_units: 256
|
||||
n_hidden_units_color: 128
|
||||
n_layers: 2
|
||||
in_features: 256
|
||||
out_features: 3
|
||||
ray_dir_in_camera_coords: false
|
||||
implicit_function_SRNImplicitFunction_args:
|
||||
raymarch_function_args:
|
||||
n_harmonic_functions: 3
|
||||
n_hidden_units: 256
|
||||
n_layers: 2
|
||||
in_features: 3
|
||||
out_features: 256
|
||||
latent_dim: 0
|
||||
xyz_in_camera_coords: false
|
||||
raymarch_function: null
|
||||
pixel_generator_args:
|
||||
n_harmonic_functions: 4
|
||||
n_hidden_units: 256
|
||||
n_hidden_units_color: 128
|
||||
n_layers: 2
|
||||
in_features: 256
|
||||
out_features: 3
|
||||
ray_dir_in_camera_coords: false
|
||||
view_metrics_ViewMetrics_args: {}
|
||||
regularization_metrics_RegularizationMetrics_args: {}
|
||||
solver_args:
|
||||
breed: adam
|
||||
weight_decay: 0.0
|
||||
lr_policy: multistep
|
||||
lr: 0.0005
|
||||
gamma: 0.1
|
||||
momentum: 0.9
|
||||
betas:
|
||||
- 0.9
|
||||
- 0.999
|
||||
milestones: []
|
||||
max_epochs: 1000
|
||||
data_source_args:
|
||||
dataset_map_provider_class_type: ???
|
||||
data_loader_map_provider_class_type: SequenceDataLoaderMapProvider
|
||||
dataset_map_provider_BlenderDatasetMapProvider_args:
|
||||
@@ -64,11 +345,17 @@ data_source_ImplicitronDataSource_args:
|
||||
dataset_class_type: JsonIndexDataset
|
||||
path_manager_factory_class_type: PathManagerFactory
|
||||
dataset_JsonIndexDataset_args:
|
||||
path_manager: null
|
||||
frame_annotations_file: ''
|
||||
sequence_annotations_file: ''
|
||||
subset_lists_file: ''
|
||||
subsets: null
|
||||
limit_to: 0
|
||||
limit_sequences_to: 0
|
||||
pick_sequence: []
|
||||
exclude_sequence: []
|
||||
limit_category_to: []
|
||||
dataset_root: ''
|
||||
load_images: true
|
||||
load_depths: true
|
||||
load_depth_masks: true
|
||||
@@ -86,6 +373,7 @@ data_source_ImplicitronDataSource_args:
|
||||
n_frames_per_sequence: -1
|
||||
seed: 0
|
||||
sort_frames: false
|
||||
eval_batches: null
|
||||
path_manager_factory_PathManagerFactory_args:
|
||||
silence_logs: true
|
||||
dataset_map_provider_LlffDatasetMapProvider_args:
|
||||
@@ -95,16 +383,6 @@ data_source_ImplicitronDataSource_args:
|
||||
n_known_frames_for_test: null
|
||||
path_manager_factory_PathManagerFactory_args:
|
||||
silence_logs: true
|
||||
downscale_factor: 4
|
||||
dataset_map_provider_RenderedMeshDatasetMapProvider_args:
|
||||
num_views: 40
|
||||
data_file: null
|
||||
azimuth_range: 180.0
|
||||
resolution: 128
|
||||
use_point_light: true
|
||||
path_manager_factory_class_type: PathManagerFactory
|
||||
path_manager_factory_PathManagerFactory_args:
|
||||
silence_logs: true
|
||||
data_loader_map_provider_SequenceDataLoaderMapProvider_args:
|
||||
batch_size: 1
|
||||
num_workers: 0
|
||||
@@ -118,309 +396,30 @@ data_source_ImplicitronDataSource_args:
|
||||
sample_consecutive_frames: false
|
||||
consecutive_frames_max_gap: 0
|
||||
consecutive_frames_max_gap_seconds: 0.1
|
||||
data_loader_map_provider_SimpleDataLoaderMapProvider_args:
|
||||
batch_size: 1
|
||||
num_workers: 0
|
||||
dataset_length_train: 0
|
||||
dataset_length_val: 0
|
||||
dataset_length_test: 0
|
||||
model_factory_ImplicitronModelFactory_args:
|
||||
resume: true
|
||||
model_class_type: GenericModel
|
||||
resume_epoch: -1
|
||||
force_resume: false
|
||||
model_GenericModel_args:
|
||||
log_vars:
|
||||
- loss_rgb_psnr_fg
|
||||
- loss_rgb_psnr
|
||||
- loss_rgb_mse
|
||||
- loss_rgb_huber
|
||||
- loss_depth_abs
|
||||
- loss_depth_abs_fg
|
||||
- loss_mask_neg_iou
|
||||
- loss_mask_bce
|
||||
- loss_mask_beta_prior
|
||||
- loss_eikonal
|
||||
- loss_density_tv
|
||||
- loss_depth_neg_penalty
|
||||
- loss_autodecoder_norm
|
||||
- loss_prev_stage_rgb_mse
|
||||
- loss_prev_stage_rgb_psnr_fg
|
||||
- loss_prev_stage_rgb_psnr
|
||||
- loss_prev_stage_mask_bce
|
||||
- objective
|
||||
- epoch
|
||||
- sec/it
|
||||
mask_images: true
|
||||
mask_depths: true
|
||||
render_image_width: 400
|
||||
render_image_height: 400
|
||||
mask_threshold: 0.5
|
||||
output_rasterized_mc: false
|
||||
bg_color:
|
||||
- 0.0
|
||||
- 0.0
|
||||
- 0.0
|
||||
num_passes: 1
|
||||
chunk_size_grid: 4096
|
||||
render_features_dimensions: 3
|
||||
tqdm_trigger_threshold: 16
|
||||
n_train_target_views: 1
|
||||
sampling_mode_training: mask_sample
|
||||
sampling_mode_evaluation: full_grid
|
||||
global_encoder_class_type: null
|
||||
raysampler_class_type: AdaptiveRaySampler
|
||||
renderer_class_type: MultiPassEmissionAbsorptionRenderer
|
||||
image_feature_extractor_class_type: null
|
||||
view_pooler_enabled: false
|
||||
implicit_function_class_type: NeuralRadianceFieldImplicitFunction
|
||||
view_metrics_class_type: ViewMetrics
|
||||
regularization_metrics_class_type: RegularizationMetrics
|
||||
loss_weights:
|
||||
loss_rgb_mse: 1.0
|
||||
loss_prev_stage_rgb_mse: 1.0
|
||||
loss_mask_bce: 0.0
|
||||
loss_prev_stage_mask_bce: 0.0
|
||||
global_encoder_HarmonicTimeEncoder_args:
|
||||
n_harmonic_functions: 10
|
||||
append_input: true
|
||||
time_divisor: 1.0
|
||||
global_encoder_SequenceAutodecoder_args:
|
||||
autodecoder_args:
|
||||
encoding_dim: 0
|
||||
n_instances: 1
|
||||
init_scale: 1.0
|
||||
ignore_input: false
|
||||
raysampler_AdaptiveRaySampler_args:
|
||||
n_pts_per_ray_training: 64
|
||||
n_pts_per_ray_evaluation: 64
|
||||
n_rays_per_image_sampled_from_mask: 1024
|
||||
stratified_point_sampling_training: true
|
||||
stratified_point_sampling_evaluation: false
|
||||
scene_extent: 8.0
|
||||
scene_center:
|
||||
- 0.0
|
||||
- 0.0
|
||||
- 0.0
|
||||
raysampler_NearFarRaySampler_args:
|
||||
n_pts_per_ray_training: 64
|
||||
n_pts_per_ray_evaluation: 64
|
||||
n_rays_per_image_sampled_from_mask: 1024
|
||||
stratified_point_sampling_training: true
|
||||
stratified_point_sampling_evaluation: false
|
||||
min_depth: 0.1
|
||||
max_depth: 8.0
|
||||
renderer_LSTMRenderer_args:
|
||||
num_raymarch_steps: 10
|
||||
init_depth: 17.0
|
||||
init_depth_noise_std: 0.0005
|
||||
hidden_size: 16
|
||||
n_feature_channels: 256
|
||||
bg_color: null
|
||||
verbose: false
|
||||
renderer_MultiPassEmissionAbsorptionRenderer_args:
|
||||
raymarcher_class_type: EmissionAbsorptionRaymarcher
|
||||
n_pts_per_ray_fine_training: 64
|
||||
n_pts_per_ray_fine_evaluation: 64
|
||||
stratified_sampling_coarse_training: true
|
||||
stratified_sampling_coarse_evaluation: false
|
||||
append_coarse_samples_to_fine: true
|
||||
density_noise_std_train: 0.0
|
||||
return_weights: false
|
||||
raymarcher_CumsumRaymarcher_args:
|
||||
surface_thickness: 1
|
||||
bg_color:
|
||||
- 0.0
|
||||
background_opacity: 0.0
|
||||
density_relu: true
|
||||
blend_output: false
|
||||
raymarcher_EmissionAbsorptionRaymarcher_args:
|
||||
surface_thickness: 1
|
||||
bg_color:
|
||||
- 0.0
|
||||
background_opacity: 10000000000.0
|
||||
density_relu: true
|
||||
blend_output: false
|
||||
renderer_SignedDistanceFunctionRenderer_args:
|
||||
ray_normal_coloring_network_args:
|
||||
feature_vector_size: 3
|
||||
mode: idr
|
||||
d_in: 9
|
||||
d_out: 3
|
||||
dims:
|
||||
- 512
|
||||
- 512
|
||||
- 512
|
||||
- 512
|
||||
weight_norm: true
|
||||
n_harmonic_functions_dir: 0
|
||||
pooled_feature_dim: 0
|
||||
bg_color:
|
||||
- 0.0
|
||||
soft_mask_alpha: 50.0
|
||||
ray_tracer_args:
|
||||
sdf_threshold: 5.0e-05
|
||||
line_search_step: 0.5
|
||||
line_step_iters: 1
|
||||
sphere_tracing_iters: 10
|
||||
n_steps: 100
|
||||
n_secant_steps: 8
|
||||
image_feature_extractor_ResNetFeatureExtractor_args:
|
||||
name: resnet34
|
||||
pretrained: true
|
||||
stages:
|
||||
- 1
|
||||
- 2
|
||||
- 3
|
||||
- 4
|
||||
normalize_image: true
|
||||
image_rescale: 0.16
|
||||
first_max_pool: true
|
||||
proj_dim: 32
|
||||
l2_norm: true
|
||||
add_masks: true
|
||||
add_images: true
|
||||
global_average_pool: false
|
||||
feature_rescale: 1.0
|
||||
view_pooler_args:
|
||||
feature_aggregator_class_type: AngleWeightedReductionFeatureAggregator
|
||||
view_sampler_args:
|
||||
masked_sampling: false
|
||||
sampling_mode: bilinear
|
||||
feature_aggregator_AngleWeightedIdentityFeatureAggregator_args:
|
||||
exclude_target_view: true
|
||||
exclude_target_view_mask_features: true
|
||||
concatenate_output: true
|
||||
weight_by_ray_angle_gamma: 1.0
|
||||
min_ray_angle_weight: 0.1
|
||||
feature_aggregator_AngleWeightedReductionFeatureAggregator_args:
|
||||
exclude_target_view: true
|
||||
exclude_target_view_mask_features: true
|
||||
concatenate_output: true
|
||||
reduction_functions:
|
||||
- AVG
|
||||
- STD
|
||||
weight_by_ray_angle_gamma: 1.0
|
||||
min_ray_angle_weight: 0.1
|
||||
feature_aggregator_IdentityFeatureAggregator_args:
|
||||
exclude_target_view: true
|
||||
exclude_target_view_mask_features: true
|
||||
concatenate_output: true
|
||||
feature_aggregator_ReductionFeatureAggregator_args:
|
||||
exclude_target_view: true
|
||||
exclude_target_view_mask_features: true
|
||||
concatenate_output: true
|
||||
reduction_functions:
|
||||
- AVG
|
||||
- STD
|
||||
implicit_function_IdrFeatureField_args:
|
||||
d_in: 3
|
||||
d_out: 1
|
||||
dims:
|
||||
- 512
|
||||
- 512
|
||||
- 512
|
||||
- 512
|
||||
- 512
|
||||
- 512
|
||||
- 512
|
||||
- 512
|
||||
geometric_init: true
|
||||
bias: 1.0
|
||||
skip_in: []
|
||||
weight_norm: true
|
||||
n_harmonic_functions_xyz: 0
|
||||
pooled_feature_dim: 0
|
||||
implicit_function_NeRFormerImplicitFunction_args:
|
||||
n_harmonic_functions_xyz: 10
|
||||
n_harmonic_functions_dir: 4
|
||||
n_hidden_neurons_dir: 128
|
||||
input_xyz: true
|
||||
xyz_ray_dir_in_camera_coords: false
|
||||
transformer_dim_down_factor: 2.0
|
||||
n_hidden_neurons_xyz: 80
|
||||
n_layers_xyz: 2
|
||||
append_xyz:
|
||||
- 1
|
||||
implicit_function_NeuralRadianceFieldImplicitFunction_args:
|
||||
n_harmonic_functions_xyz: 10
|
||||
n_harmonic_functions_dir: 4
|
||||
n_hidden_neurons_dir: 128
|
||||
input_xyz: true
|
||||
xyz_ray_dir_in_camera_coords: false
|
||||
transformer_dim_down_factor: 1.0
|
||||
n_hidden_neurons_xyz: 256
|
||||
n_layers_xyz: 8
|
||||
append_xyz:
|
||||
- 5
|
||||
implicit_function_SRNHyperNetImplicitFunction_args:
|
||||
hypernet_args:
|
||||
n_harmonic_functions: 3
|
||||
n_hidden_units: 256
|
||||
n_layers: 2
|
||||
n_hidden_units_hypernet: 256
|
||||
n_layers_hypernet: 1
|
||||
in_features: 3
|
||||
out_features: 256
|
||||
xyz_in_camera_coords: false
|
||||
pixel_generator_args:
|
||||
n_harmonic_functions: 4
|
||||
n_hidden_units: 256
|
||||
n_hidden_units_color: 128
|
||||
n_layers: 2
|
||||
in_features: 256
|
||||
out_features: 3
|
||||
ray_dir_in_camera_coords: false
|
||||
implicit_function_SRNImplicitFunction_args:
|
||||
raymarch_function_args:
|
||||
n_harmonic_functions: 3
|
||||
n_hidden_units: 256
|
||||
n_layers: 2
|
||||
in_features: 3
|
||||
out_features: 256
|
||||
xyz_in_camera_coords: false
|
||||
raymarch_function: null
|
||||
pixel_generator_args:
|
||||
n_harmonic_functions: 4
|
||||
n_hidden_units: 256
|
||||
n_hidden_units_color: 128
|
||||
n_layers: 2
|
||||
in_features: 256
|
||||
out_features: 3
|
||||
ray_dir_in_camera_coords: false
|
||||
view_metrics_ViewMetrics_args: {}
|
||||
regularization_metrics_RegularizationMetrics_args: {}
|
||||
optimizer_factory_ImplicitronOptimizerFactory_args:
|
||||
betas:
|
||||
- 0.9
|
||||
- 0.999
|
||||
breed: Adam
|
||||
exponential_lr_step_size: 250
|
||||
gamma: 0.1
|
||||
lr: 0.0005
|
||||
lr_policy: MultiStepLR
|
||||
momentum: 0.9
|
||||
multistep_lr_milestones: []
|
||||
weight_decay: 0.0
|
||||
linear_exponential_lr_milestone: 200
|
||||
linear_exponential_start_gamma: 0.1
|
||||
training_loop_ImplicitronTrainingLoop_args:
|
||||
eval_only: false
|
||||
evaluator_class_type: ImplicitronEvaluator
|
||||
max_epochs: 1000
|
||||
store_checkpoints: true
|
||||
store_checkpoints_purge: 1
|
||||
test_interval: -1
|
||||
test_when_finished: false
|
||||
validation_interval: 1
|
||||
clip_grad: 0.0
|
||||
metric_print_interval: 5
|
||||
visualize_interval: 1000
|
||||
visdom_env: ''
|
||||
visdom_port: 8097
|
||||
visdom_server: http://127.0.0.1
|
||||
evaluator_ImplicitronEvaluator_args:
|
||||
camera_difficulty_bin_breaks:
|
||||
- 0.97
|
||||
- 0.98
|
||||
is_multisequence: false
|
||||
architecture: generic
|
||||
detect_anomaly: false
|
||||
eval_only: false
|
||||
exp_dir: ./data/default_experiment/
|
||||
exp_idx: 0
|
||||
gpu_idx: 0
|
||||
metric_print_interval: 5
|
||||
resume: true
|
||||
resume_epoch: -1
|
||||
seed: 0
|
||||
store_checkpoints: true
|
||||
store_checkpoints_purge: 1
|
||||
test_interval: -1
|
||||
test_when_finished: false
|
||||
validation_interval: 1
|
||||
visdom_env: ''
|
||||
visdom_port: 8097
|
||||
visdom_server: http://127.0.0.1
|
||||
visualize_interval: 1000
|
||||
clip_grad: 0.0
|
||||
camera_difficulty_bin_breaks:
|
||||
- 0.97
|
||||
- 0.98
|
||||
hydra:
|
||||
run:
|
||||
dir: .
|
||||
output_subdir: null
|
||||
|
||||
@@ -5,7 +5,6 @@
|
||||
# LICENSE file in the root directory of this source tree.
|
||||
|
||||
import os
|
||||
import tempfile
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
|
||||
@@ -13,7 +12,6 @@ from hydra import compose, initialize_config_dir
|
||||
from omegaconf import OmegaConf
|
||||
|
||||
from .. import experiment
|
||||
from .utils import intercept_logs
|
||||
|
||||
|
||||
def interactive_testing_requested() -> bool:
|
||||
@@ -35,10 +33,7 @@ DEBUG: bool = False
|
||||
# TODO:
|
||||
# - add enough files to skateboard_first_5 that this works on RE.
|
||||
# - share common code with PyTorch3D tests?
|
||||
|
||||
|
||||
def _parse_float_from_log(line):
|
||||
return float(line.split()[-1])
|
||||
# - deal with the temporary output files this test creates
|
||||
|
||||
|
||||
class TestExperiment(unittest.TestCase):
|
||||
@@ -49,18 +44,15 @@ class TestExperiment(unittest.TestCase):
|
||||
# Test making minimal changes to the dataclass defaults.
|
||||
if not interactive_testing_requested() or not internal:
|
||||
return
|
||||
|
||||
# Manually override config values. Note that this is not necessary out-
|
||||
# side of the tests!
|
||||
cfg = OmegaConf.structured(experiment.Experiment)
|
||||
cfg.data_source_ImplicitronDataSource_args.dataset_map_provider_class_type = (
|
||||
cfg = OmegaConf.structured(experiment.ExperimentConfig)
|
||||
cfg.data_source_args.dataset_map_provider_class_type = (
|
||||
"JsonIndexDatasetMapProvider"
|
||||
)
|
||||
dataset_args = (
|
||||
cfg.data_source_ImplicitronDataSource_args.dataset_map_provider_JsonIndexDatasetMapProvider_args
|
||||
cfg.data_source_args.dataset_map_provider_JsonIndexDatasetMapProvider_args
|
||||
)
|
||||
dataloader_args = (
|
||||
cfg.data_source_ImplicitronDataSource_args.data_loader_map_provider_SequenceDataLoaderMapProvider_args
|
||||
cfg.data_source_args.data_loader_map_provider_SequenceDataLoaderMapProvider_args
|
||||
)
|
||||
dataset_args.category = "skateboard"
|
||||
dataset_args.test_restrict_sequence_id = 0
|
||||
@@ -70,80 +62,18 @@ class TestExperiment(unittest.TestCase):
|
||||
dataset_args.dataset_JsonIndexDataset_args.image_width = 80
|
||||
dataloader_args.dataset_length_train = 1
|
||||
dataloader_args.dataset_length_val = 1
|
||||
cfg.training_loop_ImplicitronTrainingLoop_args.max_epochs = 2
|
||||
cfg.training_loop_ImplicitronTrainingLoop_args.store_checkpoints = False
|
||||
cfg.optimizer_factory_ImplicitronOptimizerFactory_args.multistep_lr_milestones = [
|
||||
0,
|
||||
1,
|
||||
]
|
||||
cfg.solver_args.max_epochs = 2
|
||||
|
||||
if DEBUG:
|
||||
experiment.dump_cfg(cfg)
|
||||
with intercept_logs(
|
||||
logger_name="projects.implicitron_trainer.impl.training_loop",
|
||||
regexp="LR change!",
|
||||
) as intercepted_logs:
|
||||
experiment_runner = experiment.Experiment(**cfg)
|
||||
experiment_runner.run()
|
||||
|
||||
# Make sure LR decreased on 0th and 1st epoch 10fold.
|
||||
self.assertEqual(intercepted_logs[0].split()[-1], "5e-06")
|
||||
|
||||
def test_exponential_lr(self):
|
||||
# Test making minimal changes to the dataclass defaults.
|
||||
if not interactive_testing_requested():
|
||||
return
|
||||
cfg = OmegaConf.structured(experiment.Experiment)
|
||||
cfg.data_source_ImplicitronDataSource_args.dataset_map_provider_class_type = (
|
||||
"JsonIndexDatasetMapProvider"
|
||||
)
|
||||
dataset_args = (
|
||||
cfg.data_source_ImplicitronDataSource_args.dataset_map_provider_JsonIndexDatasetMapProvider_args
|
||||
)
|
||||
dataloader_args = (
|
||||
cfg.data_source_ImplicitronDataSource_args.data_loader_map_provider_SequenceDataLoaderMapProvider_args
|
||||
)
|
||||
dataset_args.category = "skateboard"
|
||||
dataset_args.test_restrict_sequence_id = 0
|
||||
dataset_args.dataset_root = "manifold://co3d/tree/extracted"
|
||||
dataset_args.dataset_JsonIndexDataset_args.limit_sequences_to = 5
|
||||
dataset_args.dataset_JsonIndexDataset_args.image_height = 80
|
||||
dataset_args.dataset_JsonIndexDataset_args.image_width = 80
|
||||
dataloader_args.dataset_length_train = 1
|
||||
dataloader_args.dataset_length_val = 1
|
||||
cfg.training_loop_ImplicitronTrainingLoop_args.max_epochs = 2
|
||||
cfg.training_loop_ImplicitronTrainingLoop_args.store_checkpoints = False
|
||||
cfg.optimizer_factory_ImplicitronOptimizerFactory_args.lr_policy = "Exponential"
|
||||
cfg.optimizer_factory_ImplicitronOptimizerFactory_args.exponential_lr_step_size = (
|
||||
2
|
||||
)
|
||||
|
||||
if DEBUG:
|
||||
experiment.dump_cfg(cfg)
|
||||
with intercept_logs(
|
||||
logger_name="projects.implicitron_trainer.impl.training_loop",
|
||||
regexp="LR change!",
|
||||
) as intercepted_logs:
|
||||
experiment_runner = experiment.Experiment(**cfg)
|
||||
experiment_runner.run()
|
||||
|
||||
# Make sure we followed the exponential lr schedule with gamma=0.1,
|
||||
# exponential_lr_step_size=2 -- so after two epochs, should
|
||||
# decrease lr 10x to 5e-5.
|
||||
self.assertEqual(intercepted_logs[0].split()[-1], "0.00015811388300841897")
|
||||
self.assertEqual(intercepted_logs[1].split()[-1], "5e-05")
|
||||
experiment.run_training(cfg)
|
||||
|
||||
def test_yaml_contents(self):
|
||||
# Check that the default config values, defined by Experiment and its
|
||||
# members, is what we expect it to be.
|
||||
cfg = OmegaConf.structured(experiment.Experiment)
|
||||
cfg = OmegaConf.structured(experiment.ExperimentConfig)
|
||||
yaml = OmegaConf.to_yaml(cfg, sort_keys=False)
|
||||
if DEBUG:
|
||||
(DATA_DIR / "experiment.yaml").write_text(yaml)
|
||||
self.assertEqual(yaml, (DATA_DIR / "experiment.yaml").read_text())
|
||||
|
||||
def test_load_configs(self):
|
||||
# Check that all the pre-prepared configs are valid.
|
||||
config_files = []
|
||||
|
||||
for pattern in ("repro_singleseq*.yaml", "repro_multiseq*.yaml"):
|
||||
@@ -159,78 +89,3 @@ class TestExperiment(unittest.TestCase):
|
||||
with self.subTest(file.name):
|
||||
with initialize_config_dir(config_dir=str(IMPLICITRON_CONFIGS_DIR)):
|
||||
compose(file.name)
|
||||
|
||||
|
||||
class TestNerfRepro(unittest.TestCase):
|
||||
@unittest.skip("This test runs full blender training.")
|
||||
def test_nerf_blender(self):
|
||||
# Train vanilla NERF.
|
||||
# Set env vars BLENDER_DATASET_ROOT and BLENDER_SINGLESEQ_CLASS first!
|
||||
if not interactive_testing_requested():
|
||||
return
|
||||
with initialize_config_dir(config_dir=str(IMPLICITRON_CONFIGS_DIR)):
|
||||
cfg = compose(config_name="repro_singleseq_nerf_blender", overrides=[])
|
||||
experiment_runner = experiment.Experiment(**cfg)
|
||||
experiment.dump_cfg(cfg)
|
||||
experiment_runner.run()
|
||||
|
||||
@unittest.skip("This test runs full llff training.")
|
||||
def test_nerf_llff(self):
|
||||
# Train vanilla NERF.
|
||||
# Set env vars LLFF_DATASET_ROOT and LLFF_SINGLESEQ_CLASS first!
|
||||
LLFF_SINGLESEQ_CLASS = os.environ["LLFF_SINGLESEQ_CLASS"]
|
||||
if not interactive_testing_requested():
|
||||
return
|
||||
with initialize_config_dir(config_dir=str(IMPLICITRON_CONFIGS_DIR)):
|
||||
cfg = compose(
|
||||
config_name=f"repro_singleseq_nerf_llff_{LLFF_SINGLESEQ_CLASS}",
|
||||
overrides=[],
|
||||
)
|
||||
experiment_runner = experiment.Experiment(**cfg)
|
||||
experiment.dump_cfg(cfg)
|
||||
experiment_runner.run()
|
||||
|
||||
@unittest.skip("This test checks resuming of the NeRF training.")
|
||||
def test_nerf_blender_resume(self):
|
||||
# Train one train batch of NeRF, then resume for one more batch.
|
||||
# Set env vars BLENDER_DATASET_ROOT and BLENDER_SINGLESEQ_CLASS first!
|
||||
if not interactive_testing_requested():
|
||||
return
|
||||
with initialize_config_dir(config_dir=str(IMPLICITRON_CONFIGS_DIR)):
|
||||
with tempfile.TemporaryDirectory() as exp_dir:
|
||||
cfg = compose(config_name="repro_singleseq_nerf_blender", overrides=[])
|
||||
cfg.exp_dir = exp_dir
|
||||
|
||||
# set dataset len to 1
|
||||
|
||||
# fmt: off
|
||||
(
|
||||
cfg
|
||||
.data_source_ImplicitronDataSource_args
|
||||
.data_loader_map_provider_SequenceDataLoaderMapProvider_args
|
||||
.dataset_length_train
|
||||
) = 1
|
||||
# fmt: on
|
||||
|
||||
# run for one epoch
|
||||
cfg.training_loop_ImplicitronTrainingLoop_args.max_epochs = 1
|
||||
experiment_runner = experiment.Experiment(**cfg)
|
||||
experiment.dump_cfg(cfg)
|
||||
experiment_runner.run()
|
||||
|
||||
# update num epochs + 2, let the optimizer resume
|
||||
cfg.training_loop_ImplicitronTrainingLoop_args.max_epochs = 3
|
||||
experiment_runner = experiment.Experiment(**cfg)
|
||||
experiment_runner.run()
|
||||
|
||||
# start from scratch
|
||||
cfg.model_factory_ImplicitronModelFactory_args.resume = False
|
||||
experiment_runner = experiment.Experiment(**cfg)
|
||||
experiment_runner.run()
|
||||
|
||||
# force resume from epoch 1
|
||||
cfg.model_factory_ImplicitronModelFactory_args.resume = True
|
||||
cfg.model_factory_ImplicitronModelFactory_args.force_resume = True
|
||||
cfg.model_factory_ImplicitronModelFactory_args.resume_epoch = 1
|
||||
experiment_runner = experiment.Experiment(**cfg)
|
||||
experiment_runner.run()
|
||||
|
||||
@@ -1,30 +0,0 @@
|
||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the BSD-style license found in the
|
||||
# LICENSE file in the root directory of this source tree.
|
||||
|
||||
import contextlib
|
||||
import logging
|
||||
import re
|
||||
|
||||
|
||||
@contextlib.contextmanager
|
||||
def intercept_logs(logger_name: str, regexp: str):
|
||||
# Intercept logs that match a regexp, from a given logger.
|
||||
intercepted_messages = []
|
||||
logger = logging.getLogger(logger_name)
|
||||
|
||||
class LoggerInterceptor(logging.Filter):
|
||||
def filter(self, record):
|
||||
message = record.getMessage()
|
||||
if re.search(regexp, message):
|
||||
intercepted_messages.append(message)
|
||||
return True
|
||||
|
||||
interceptor = LoggerInterceptor()
|
||||
logger.addFilter(interceptor)
|
||||
try:
|
||||
yield intercepted_messages
|
||||
finally:
|
||||
logger.removeFilter(interceptor)
|
||||
@@ -22,6 +22,7 @@ import numpy as np
|
||||
import torch
|
||||
import torch.nn.functional as Fu
|
||||
from omegaconf import OmegaConf
|
||||
from pytorch3d.implicitron.dataset.data_source import ImplicitronDataSource
|
||||
from pytorch3d.implicitron.dataset.dataset_base import DatasetBase, FrameData
|
||||
from pytorch3d.implicitron.dataset.utils import is_train_frame
|
||||
from pytorch3d.implicitron.models.base_model import EvaluationMode
|
||||
@@ -36,7 +37,7 @@ from pytorch3d.implicitron.tools.vis_utils import (
|
||||
)
|
||||
from tqdm import tqdm
|
||||
|
||||
from .experiment import Experiment
|
||||
from .experiment import init_model
|
||||
|
||||
|
||||
def render_sequence(
|
||||
@@ -343,14 +344,13 @@ def export_scenes(
|
||||
os.environ["CUDA_VISIBLE_DEVICES"] = str(config.gpu_idx)
|
||||
|
||||
# Load the previously trained model
|
||||
experiment = Experiment(config)
|
||||
model = experiment.model_factory(force_resume=True)
|
||||
model, _, _ = init_model(cfg=config, force_load=True, load_model_only=True)
|
||||
model.cuda()
|
||||
model.eval()
|
||||
|
||||
# Setup the dataset
|
||||
data_source = experiment.data_source
|
||||
dataset_map, _ = data_source.get_datasets_and_dataloaders()
|
||||
datasource = ImplicitronDataSource(**config.data_source_args)
|
||||
dataset_map = datasource.dataset_map_provider.get_dataset_map()
|
||||
dataset = dataset_map[split]
|
||||
if dataset is None:
|
||||
raise ValueError(f"{split} dataset not provided")
|
||||
|
||||
@@ -4,4 +4,4 @@
|
||||
# This source code is licensed under the BSD-style license found in the
|
||||
# LICENSE file in the root directory of this source tree.
|
||||
|
||||
__version__ = "0.7.0"
|
||||
__version__ = "0.6.2"
|
||||
|
||||
@@ -10,7 +10,7 @@ import torch
|
||||
|
||||
|
||||
"""
|
||||
Some functions which depend on PyTorch or Python versions.
|
||||
Some functions which depend on PyTorch versions.
|
||||
"""
|
||||
|
||||
|
||||
@@ -79,12 +79,3 @@ def meshgrid_ij(
|
||||
# pyre-fixme[6]: For 1st param expected `Union[List[Tensor], Tensor]` but got
|
||||
# `Union[Sequence[Tensor], Tensor]`.
|
||||
return torch.meshgrid(*A)
|
||||
|
||||
|
||||
def prod(iterable, *, start=1):
|
||||
"""
|
||||
Like math.prod in Python 3.8 and later.
|
||||
"""
|
||||
for i in iterable:
|
||||
start *= i
|
||||
return start
|
||||
|
||||
@@ -61,84 +61,6 @@ class DataLoaderMapProviderBase(ReplaceableBase):
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
@registry.register
|
||||
class SimpleDataLoaderMapProvider(DataLoaderMapProviderBase):
|
||||
"""
|
||||
Trivial implementation of DataLoaderMapProviderBase.
|
||||
|
||||
If a dataset returns batches from get_eval_batches(), then
|
||||
they will be what the corresponding dataloader returns,
|
||||
independently of any of the fields on this class.
|
||||
|
||||
Otherwise, returns shuffled batches.
|
||||
"""
|
||||
|
||||
batch_size: int = 1
|
||||
num_workers: int = 0
|
||||
dataset_length_train: int = 0
|
||||
dataset_length_val: int = 0
|
||||
dataset_length_test: int = 0
|
||||
|
||||
def get_data_loader_map(self, datasets: DatasetMap) -> DataLoaderMap:
|
||||
"""
|
||||
Returns a collection of data loaders for a given collection of datasets.
|
||||
"""
|
||||
return DataLoaderMap(
|
||||
train=self._make_data_loader(
|
||||
datasets.train,
|
||||
self.dataset_length_train,
|
||||
),
|
||||
val=self._make_data_loader(
|
||||
datasets.val,
|
||||
self.dataset_length_val,
|
||||
),
|
||||
test=self._make_data_loader(
|
||||
datasets.test,
|
||||
self.dataset_length_test,
|
||||
),
|
||||
)
|
||||
|
||||
def _make_data_loader(
|
||||
self,
|
||||
dataset: Optional[DatasetBase],
|
||||
num_batches: int,
|
||||
) -> Optional[DataLoader[FrameData]]:
|
||||
"""
|
||||
Returns the dataloader for a dataset.
|
||||
|
||||
Args:
|
||||
dataset: the dataset
|
||||
num_batches: possible ceiling on number of batches per epoch
|
||||
"""
|
||||
if dataset is None:
|
||||
return None
|
||||
|
||||
data_loader_kwargs = {
|
||||
"num_workers": self.num_workers,
|
||||
"collate_fn": dataset.frame_data_type.collate,
|
||||
}
|
||||
|
||||
eval_batches = dataset.get_eval_batches()
|
||||
if eval_batches is not None:
|
||||
return DataLoader(
|
||||
dataset,
|
||||
batch_sampler=eval_batches,
|
||||
**data_loader_kwargs,
|
||||
)
|
||||
|
||||
if num_batches > 0:
|
||||
num_samples = self.batch_size * num_batches
|
||||
else:
|
||||
num_samples = None
|
||||
sampler = RandomSampler(dataset, replacement=True, num_samples=num_samples)
|
||||
batch_sampler = BatchSampler(sampler, self.batch_size, drop_last=True)
|
||||
return DataLoader(
|
||||
dataset,
|
||||
batch_sampler=batch_sampler,
|
||||
**data_loader_kwargs,
|
||||
)
|
||||
|
||||
|
||||
class DoublePoolBatchSampler(Sampler[List[int]]):
|
||||
"""
|
||||
Batch sampler for making random batches of a single frame
|
||||
@@ -199,7 +121,7 @@ class DoublePoolBatchSampler(Sampler[List[int]]):
|
||||
torch.randperm(len(self.first_indices), generator=self.generator)
|
||||
for _ in range(n_copies)
|
||||
]
|
||||
i_first = torch.cat(raw_indices)[:num_batches]
|
||||
i_first = torch.concat(raw_indices)[:num_batches]
|
||||
else:
|
||||
i_first = torch.randperm(len(self.first_indices), generator=self.generator)
|
||||
first_indices = [self.first_indices[i] for i in i_first]
|
||||
|
||||
@@ -15,11 +15,10 @@ from pytorch3d.renderer.cameras import CamerasBase
|
||||
|
||||
from .blender_dataset_map_provider import BlenderDatasetMapProvider # noqa
|
||||
from .data_loader_map_provider import DataLoaderMap, DataLoaderMapProviderBase
|
||||
from .dataset_map_provider import DatasetMap, DatasetMapProviderBase
|
||||
from .dataset_map_provider import DatasetMap, DatasetMapProviderBase, Task
|
||||
from .json_index_dataset_map_provider import JsonIndexDatasetMapProvider # noqa
|
||||
from .json_index_dataset_map_provider_v2 import JsonIndexDatasetMapProviderV2 # noqa
|
||||
from .llff_dataset_map_provider import LlffDatasetMapProvider # noqa
|
||||
from .rendered_mesh_dataset_map_provider import RenderedMeshDatasetMapProvider # noqa
|
||||
|
||||
|
||||
class DataSourceBase(ReplaceableBase):
|
||||
@@ -68,6 +67,9 @@ class ImplicitronDataSource(DataSourceBase): # pyre-ignore[13]
|
||||
dataloaders = self.data_loader_map_provider.get_data_loader_map(datasets)
|
||||
return datasets, dataloaders
|
||||
|
||||
def get_task(self) -> Task:
|
||||
return self.dataset_map_provider.get_task()
|
||||
|
||||
@property
|
||||
def all_train_cameras(self) -> Optional[CamerasBase]:
|
||||
if self._all_train_cameras_cache is None: # pyre-ignore[16]
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
import logging
|
||||
import os
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
from typing import Iterator, Optional
|
||||
|
||||
from iopath.common.file_io import PathManager
|
||||
@@ -52,6 +53,11 @@ class DatasetMap:
|
||||
yield self.test
|
||||
|
||||
|
||||
class Task(Enum):
|
||||
SINGLE_SEQUENCE = "singlesequence"
|
||||
MULTI_SEQUENCE = "multisequence"
|
||||
|
||||
|
||||
class DatasetMapProviderBase(ReplaceableBase):
|
||||
"""
|
||||
Base class for a provider of training / validation and testing
|
||||
@@ -65,6 +71,9 @@ class DatasetMapProviderBase(ReplaceableBase):
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def get_task(self) -> Task:
|
||||
raise NotImplementedError()
|
||||
|
||||
def get_all_train_cameras(self) -> Optional[CamerasBase]:
|
||||
"""
|
||||
If the data is all for a single scene, returns a list
|
||||
|
||||
@@ -24,7 +24,7 @@ from typing import (
|
||||
Sequence,
|
||||
Tuple,
|
||||
Type,
|
||||
TYPE_CHECKING,
|
||||
TypedDict,
|
||||
Union,
|
||||
)
|
||||
|
||||
@@ -45,15 +45,9 @@ from .utils import is_known_frame_scalar
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from typing import TypedDict
|
||||
|
||||
class FrameAnnotsEntry(TypedDict):
|
||||
subset: Optional[str]
|
||||
frame_annotation: types.FrameAnnotation
|
||||
|
||||
else:
|
||||
FrameAnnotsEntry = dict
|
||||
class FrameAnnotsEntry(TypedDict):
|
||||
subset: Optional[str]
|
||||
frame_annotation: types.FrameAnnotation
|
||||
|
||||
|
||||
@registry.register
|
||||
@@ -118,11 +112,6 @@ class JsonIndexDataset(DatasetBase, ReplaceableBase):
|
||||
eval_batches: A list of batches that form the evaluation set;
|
||||
list of batch-sized lists of indices corresponding to __getitem__
|
||||
of this class, thus it can be used directly as a batch sampler.
|
||||
eval_batch_index:
|
||||
( Optional[List[List[Union[Tuple[str, int, str], Tuple[str, int]]]] )
|
||||
A list of batches of frames described as (sequence_name, frame_idx)
|
||||
that can form the evaluation set, `eval_batches` will be set from this.
|
||||
|
||||
"""
|
||||
|
||||
frame_annotations_type: ClassVar[
|
||||
@@ -158,7 +147,6 @@ class JsonIndexDataset(DatasetBase, ReplaceableBase):
|
||||
seed: int = 0
|
||||
sort_frames: bool = False
|
||||
eval_batches: Any = None
|
||||
eval_batch_index: Any = None
|
||||
# frame_annots: List[FrameAnnotsEntry] = field(init=False)
|
||||
# seq_annots: Dict[str, types.SequenceAnnotation] = field(init=False)
|
||||
|
||||
@@ -171,22 +159,8 @@ class JsonIndexDataset(DatasetBase, ReplaceableBase):
|
||||
self._sort_frames()
|
||||
self._load_subset_lists()
|
||||
self._filter_db() # also computes sequence indices
|
||||
self._extract_and_set_eval_batches()
|
||||
logger.info(str(self))
|
||||
|
||||
def _extract_and_set_eval_batches(self):
|
||||
"""
|
||||
Sets eval_batches based on input eval_batch_index.
|
||||
"""
|
||||
if self.eval_batch_index is not None:
|
||||
if self.eval_batches is not None:
|
||||
raise ValueError(
|
||||
"Cannot define both eval_batch_index and eval_batches."
|
||||
)
|
||||
self.eval_batches = self.seq_frame_index_to_dataset_index(
|
||||
self.eval_batch_index
|
||||
)
|
||||
|
||||
def is_filtered(self):
|
||||
"""
|
||||
Returns `True` in case the dataset has been filtered and thus some frame annotations
|
||||
|
||||
@@ -9,7 +9,7 @@ import json
|
||||
import os
|
||||
from typing import Dict, List, Optional, Tuple, Type
|
||||
|
||||
from omegaconf import DictConfig
|
||||
from omegaconf import DictConfig, open_dict
|
||||
from pytorch3d.implicitron.tools.config import (
|
||||
expand_args_fields,
|
||||
registry,
|
||||
@@ -17,7 +17,12 @@ from pytorch3d.implicitron.tools.config import (
|
||||
)
|
||||
from pytorch3d.renderer.cameras import CamerasBase
|
||||
|
||||
from .dataset_map_provider import DatasetMap, DatasetMapProviderBase, PathManagerFactory
|
||||
from .dataset_map_provider import (
|
||||
DatasetMap,
|
||||
DatasetMapProviderBase,
|
||||
PathManagerFactory,
|
||||
Task,
|
||||
)
|
||||
from .json_index_dataset import JsonIndexDataset
|
||||
|
||||
from .utils import (
|
||||
@@ -52,7 +57,6 @@ _CO3D_DATASET_ROOT: str = os.getenv("CO3D_DATASET_ROOT", "")
|
||||
_NEED_CONTROL: Tuple[str, ...] = (
|
||||
"dataset_root",
|
||||
"eval_batches",
|
||||
"eval_batch_index",
|
||||
"n_frames_per_sequence",
|
||||
"path_manager",
|
||||
"pick_sequence",
|
||||
@@ -113,8 +117,9 @@ class JsonIndexDatasetMapProvider(DatasetMapProviderBase): # pyre-ignore [13]
|
||||
Called by get_default_args(JsonIndexDatasetMapProvider) to
|
||||
not expose certain fields of each dataset class.
|
||||
"""
|
||||
for key in _NEED_CONTROL:
|
||||
del args[key]
|
||||
with open_dict(args):
|
||||
for key in _NEED_CONTROL:
|
||||
del args[key]
|
||||
|
||||
def create_dataset(self):
|
||||
"""
|
||||
@@ -154,7 +159,7 @@ class JsonIndexDatasetMapProvider(DatasetMapProviderBase): # pyre-ignore [13]
|
||||
# This maps the common names of the dataset subsets ("train"/"val"/"test")
|
||||
# to the names of the subsets in the CO3D dataset.
|
||||
set_names_mapping = _get_co3d_set_names_mapping(
|
||||
self.task_str,
|
||||
self.get_task(),
|
||||
self.test_on_train,
|
||||
self.only_test_set,
|
||||
)
|
||||
@@ -179,7 +184,7 @@ class JsonIndexDatasetMapProvider(DatasetMapProviderBase): # pyre-ignore [13]
|
||||
eval_batch_index = json.load(f)
|
||||
restrict_sequence_name = self.restrict_sequence_name
|
||||
|
||||
if self.task_str == "singlesequence":
|
||||
if self.get_task() == Task.SINGLE_SEQUENCE:
|
||||
if (
|
||||
self.test_restrict_sequence_id is None
|
||||
or self.test_restrict_sequence_id < 0
|
||||
@@ -207,10 +212,6 @@ class JsonIndexDatasetMapProvider(DatasetMapProviderBase): # pyre-ignore [13]
|
||||
]
|
||||
# overwrite the restrict_sequence_name
|
||||
restrict_sequence_name = [eval_sequence_name]
|
||||
if len(restrict_sequence_name) > 0:
|
||||
eval_batch_index = [
|
||||
b for b in eval_batch_index if b[0][0] in restrict_sequence_name
|
||||
]
|
||||
|
||||
dataset_type: Type[JsonIndexDataset] = registry.get(
|
||||
JsonIndexDataset, self.dataset_class_type
|
||||
@@ -238,9 +239,15 @@ class JsonIndexDatasetMapProvider(DatasetMapProviderBase): # pyre-ignore [13]
|
||||
n_frames_per_sequence=-1,
|
||||
subsets=set_names_mapping["test"],
|
||||
pick_sequence=restrict_sequence_name,
|
||||
eval_batch_index=eval_batch_index,
|
||||
**common_kwargs,
|
||||
)
|
||||
if len(restrict_sequence_name) > 0:
|
||||
eval_batch_index = [
|
||||
b for b in eval_batch_index if b[0][0] in restrict_sequence_name
|
||||
]
|
||||
test_dataset.eval_batches = test_dataset.seq_frame_index_to_dataset_index(
|
||||
eval_batch_index
|
||||
)
|
||||
dataset_map = DatasetMap(
|
||||
train=train_dataset, val=val_dataset, test=test_dataset
|
||||
)
|
||||
@@ -261,11 +268,12 @@ class JsonIndexDatasetMapProvider(DatasetMapProviderBase): # pyre-ignore [13]
|
||||
# pyre-ignore[16]
|
||||
return self.dataset_map
|
||||
|
||||
def get_all_train_cameras(self) -> Optional[CamerasBase]:
|
||||
if self.task_str == "multisequence":
|
||||
return None
|
||||
def get_task(self) -> Task:
|
||||
return Task(self.task_str)
|
||||
|
||||
assert self.task_str == "singlesequence"
|
||||
def get_all_train_cameras(self) -> Optional[CamerasBase]:
|
||||
if Task(self.task_str) == Task.MULTI_SEQUENCE:
|
||||
return None
|
||||
|
||||
# pyre-ignore[16]
|
||||
train_dataset = self.dataset_map.train
|
||||
@@ -274,7 +282,7 @@ class JsonIndexDatasetMapProvider(DatasetMapProviderBase): # pyre-ignore [13]
|
||||
|
||||
|
||||
def _get_co3d_set_names_mapping(
|
||||
task_str: str,
|
||||
task: Task,
|
||||
test_on_train: bool,
|
||||
only_test: bool,
|
||||
) -> Dict[str, List[str]]:
|
||||
@@ -288,7 +296,7 @@ def _get_co3d_set_names_mapping(
|
||||
- val (if not test_on_train)
|
||||
- test (if not test_on_train)
|
||||
"""
|
||||
single_seq = task_str == "singlesequence"
|
||||
single_seq = task == Task.SINGLE_SEQUENCE
|
||||
|
||||
if only_test:
|
||||
set_names_mapping = {}
|
||||
|
||||
@@ -9,13 +9,13 @@ import json
|
||||
import logging
|
||||
import os
|
||||
import warnings
|
||||
from typing import Dict, List, Optional, Tuple, Type
|
||||
from typing import Dict, List, Optional, Type
|
||||
|
||||
from omegaconf import DictConfig
|
||||
from pytorch3d.implicitron.dataset.dataset_map_provider import (
|
||||
DatasetMap,
|
||||
DatasetMapProviderBase,
|
||||
PathManagerFactory,
|
||||
Task,
|
||||
)
|
||||
from pytorch3d.implicitron.dataset.json_index_dataset import JsonIndexDataset
|
||||
from pytorch3d.implicitron.tools.config import (
|
||||
@@ -29,19 +29,6 @@ from pytorch3d.renderer.cameras import CamerasBase
|
||||
|
||||
_CO3DV2_DATASET_ROOT: str = os.getenv("CO3DV2_DATASET_ROOT", "")
|
||||
|
||||
# _NEED_CONTROL is a list of those elements of JsonIndexDataset which
|
||||
# are not directly specified for it in the config but come from the
|
||||
# DatasetMapProvider.
|
||||
_NEED_CONTROL: Tuple[str, ...] = (
|
||||
"dataset_root",
|
||||
"eval_batches",
|
||||
"eval_batch_index",
|
||||
"path_manager",
|
||||
"subsets",
|
||||
"frame_annotations_file",
|
||||
"sequence_annotations_file",
|
||||
"subset_lists_file",
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -189,20 +176,6 @@ class JsonIndexDatasetMapProviderV2(DatasetMapProviderBase): # pyre-ignore [13]
|
||||
|
||||
path_manager = self.path_manager_factory.get()
|
||||
|
||||
if path_manager is not None:
|
||||
path_managed_frame_file = path_manager.get_local_path(frame_file)
|
||||
else:
|
||||
path_managed_frame_file = frame_file
|
||||
if not os.path.isfile(path_managed_frame_file):
|
||||
# The frame_file does not exist.
|
||||
# Most probably the user has not specified the root folder.
|
||||
raise ValueError(
|
||||
f"Looking for frame annotations in {path_managed_frame_file}."
|
||||
+ " Please specify a correct dataset_root folder."
|
||||
+ " Note: By default the root folder is taken from the"
|
||||
+ " CO3DV2_DATASET_ROOT environment variable."
|
||||
)
|
||||
|
||||
# setup the common dataset arguments
|
||||
common_dataset_kwargs = getattr(self, f"dataset_{self.dataset_class_type}_args")
|
||||
common_dataset_kwargs = {
|
||||
@@ -296,15 +269,6 @@ class JsonIndexDatasetMapProviderV2(DatasetMapProviderBase): # pyre-ignore [13]
|
||||
train=train_dataset, val=val_dataset, test=test_dataset
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def dataset_tweak_args(cls, type, args: DictConfig) -> None:
|
||||
"""
|
||||
Called by get_default_args(JsonIndexDatasetMapProviderV2) to
|
||||
not expose certain fields of each dataset class.
|
||||
"""
|
||||
for key in _NEED_CONTROL:
|
||||
del args[key]
|
||||
|
||||
def create_dataset(self):
|
||||
# The dataset object is created inside `self.get_dataset_map`
|
||||
pass
|
||||
@@ -335,6 +299,12 @@ class JsonIndexDatasetMapProviderV2(DatasetMapProviderBase): # pyre-ignore [13]
|
||||
)
|
||||
return category_to_subset_name_list
|
||||
|
||||
def get_task(self) -> Task: # TODO: we plan to get rid of tasks
|
||||
return {
|
||||
"manyview": Task.SINGLE_SEQUENCE,
|
||||
"fewview": Task.MULTI_SEQUENCE,
|
||||
}[self.subset_name.split("_")[0]]
|
||||
|
||||
def get_all_train_cameras(self) -> Optional[CamerasBase]:
|
||||
# pyre-ignore[16]
|
||||
train_dataset = self.dataset_map.train
|
||||
|
||||
@@ -32,21 +32,17 @@ class LlffDatasetMapProvider(SingleSceneDatasetMapProviderBase):
|
||||
and test datasets, and this many random training frames are added to
|
||||
each test batch. If not set, test batches each contain just a single
|
||||
testing frame.
|
||||
downscale_factor: determines image sizes.
|
||||
"""
|
||||
|
||||
downscale_factor: int = 4
|
||||
|
||||
def _load_data(self) -> None:
|
||||
path_manager = self.path_manager_factory.get()
|
||||
images, poses, _ = load_llff_data(
|
||||
self.base_dir, factor=self.downscale_factor, path_manager=path_manager
|
||||
self.base_dir, factor=8, path_manager=path_manager
|
||||
)
|
||||
hwf = poses[0, :3, -1]
|
||||
poses = poses[:, :3, :4]
|
||||
|
||||
llffhold = 8
|
||||
i_test = np.arange(images.shape[0])[::llffhold]
|
||||
i_test = np.arange(images.shape[0])[::8]
|
||||
i_test_index = set(i_test.tolist())
|
||||
i_train = np.array(
|
||||
[i for i in np.arange(images.shape[0]) if i not in i_test_index]
|
||||
|
||||
@@ -294,7 +294,7 @@ def _local_path(path_manager, path):
|
||||
|
||||
def _ls(path_manager, path):
|
||||
if path_manager is None:
|
||||
return os.listdir(path)
|
||||
return os.path.listdir(path)
|
||||
return path_manager.ls(path)
|
||||
|
||||
|
||||
|
||||
@@ -1,211 +0,0 @@
|
||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the BSD-style license found in the
|
||||
# LICENSE file in the root directory of this source tree.
|
||||
|
||||
from os.path import dirname, join, realpath
|
||||
from typing import Optional, Tuple
|
||||
|
||||
import torch
|
||||
from pytorch3d.implicitron.tools.config import (
|
||||
expand_args_fields,
|
||||
registry,
|
||||
run_auto_creation,
|
||||
)
|
||||
from pytorch3d.io import IO
|
||||
from pytorch3d.renderer import (
|
||||
AmbientLights,
|
||||
BlendParams,
|
||||
CamerasBase,
|
||||
FoVPerspectiveCameras,
|
||||
HardPhongShader,
|
||||
look_at_view_transform,
|
||||
MeshRasterizer,
|
||||
MeshRendererWithFragments,
|
||||
PointLights,
|
||||
RasterizationSettings,
|
||||
)
|
||||
from pytorch3d.structures.meshes import Meshes
|
||||
|
||||
from .dataset_map_provider import DatasetMap, DatasetMapProviderBase, PathManagerFactory
|
||||
from .single_sequence_dataset import SingleSceneDataset
|
||||
from .utils import DATASET_TYPE_KNOWN
|
||||
|
||||
|
||||
@registry.register
|
||||
class RenderedMeshDatasetMapProvider(DatasetMapProviderBase): # pyre-ignore [13]
|
||||
"""
|
||||
A simple single-scene dataset based on PyTorch3D renders of a mesh.
|
||||
Provides `num_views` renders of the mesh as train, with no val
|
||||
and test. The renders are generated from viewpoints sampled at uniformly
|
||||
distributed azimuth intervals. The elevation is kept constant so that the
|
||||
camera's vertical position coincides with the equator.
|
||||
|
||||
By default, uses Keenan Crane's cow model, and the camera locations are
|
||||
set to make sense for that.
|
||||
|
||||
Although the rendering used to generate this dataset will use a GPU
|
||||
if one is available, the data it produces is on the CPU just like
|
||||
the data returned by implicitron's other dataset map providers.
|
||||
This is because both datasets and models can be large, so implicitron's
|
||||
GenericModel.forward (etc) expects data on the CPU and only moves
|
||||
what it needs to the device.
|
||||
|
||||
For a more detailed explanation of this code, please refer to the
|
||||
docs/tutorials/fit_textured_mesh.ipynb notebook.
|
||||
|
||||
Members:
|
||||
num_views: The number of generated renders.
|
||||
data_file: The folder that contains the mesh file. By default, finds
|
||||
the cow mesh in the same repo as this code.
|
||||
azimuth_range: number of degrees on each side of the start position to
|
||||
take samples
|
||||
resolution: the common height and width of the output images.
|
||||
use_point_light: whether to use a particular point light as opposed
|
||||
to ambient white.
|
||||
"""
|
||||
|
||||
num_views: int = 40
|
||||
data_file: Optional[str] = None
|
||||
azimuth_range: float = 180
|
||||
resolution: int = 128
|
||||
use_point_light: bool = True
|
||||
path_manager_factory: PathManagerFactory
|
||||
path_manager_factory_class_type: str = "PathManagerFactory"
|
||||
|
||||
def get_dataset_map(self) -> DatasetMap:
|
||||
# pyre-ignore[16]
|
||||
return DatasetMap(train=self.train_dataset, val=None, test=None)
|
||||
|
||||
def get_all_train_cameras(self) -> CamerasBase:
|
||||
# pyre-ignore[16]
|
||||
return self.poses
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
super().__init__()
|
||||
run_auto_creation(self)
|
||||
if torch.cuda.is_available():
|
||||
device = torch.device("cuda:0")
|
||||
else:
|
||||
device = torch.device("cpu")
|
||||
if self.data_file is None:
|
||||
data_file = join(
|
||||
dirname(dirname(dirname(dirname(realpath(__file__))))),
|
||||
"docs",
|
||||
"tutorials",
|
||||
"data",
|
||||
"cow_mesh",
|
||||
"cow.obj",
|
||||
)
|
||||
else:
|
||||
data_file = self.data_file
|
||||
io = IO(path_manager=self.path_manager_factory.get())
|
||||
mesh = io.load_mesh(data_file, device=device)
|
||||
poses, images, masks = _generate_cow_renders(
|
||||
num_views=self.num_views,
|
||||
mesh=mesh,
|
||||
azimuth_range=self.azimuth_range,
|
||||
resolution=self.resolution,
|
||||
device=device,
|
||||
use_point_light=self.use_point_light,
|
||||
)
|
||||
# pyre-ignore[16]
|
||||
self.poses = poses.cpu()
|
||||
expand_args_fields(SingleSceneDataset)
|
||||
# pyre-ignore[16]
|
||||
self.train_dataset = SingleSceneDataset( # pyre-ignore[28]
|
||||
object_name="cow",
|
||||
images=list(images.permute(0, 3, 1, 2).cpu()),
|
||||
fg_probabilities=list(masks[:, None].cpu()),
|
||||
poses=[self.poses[i] for i in range(len(poses))],
|
||||
frame_types=[DATASET_TYPE_KNOWN] * len(poses),
|
||||
eval_batches=None,
|
||||
)
|
||||
|
||||
|
||||
@torch.no_grad()
|
||||
def _generate_cow_renders(
|
||||
*,
|
||||
num_views: int,
|
||||
mesh: Meshes,
|
||||
azimuth_range: float,
|
||||
resolution: int,
|
||||
device: torch.device,
|
||||
use_point_light: bool,
|
||||
) -> Tuple[CamerasBase, torch.Tensor, torch.Tensor]:
|
||||
"""
|
||||
Returns:
|
||||
cameras: A batch of `num_views` `FoVPerspectiveCameras` from which the
|
||||
images are rendered.
|
||||
images: A tensor of shape `(num_views, height, width, 3)` containing
|
||||
the rendered images.
|
||||
silhouettes: A tensor of shape `(num_views, height, width)` containing
|
||||
the rendered silhouettes.
|
||||
"""
|
||||
|
||||
# Load obj file
|
||||
|
||||
# We scale normalize and center the target mesh to fit in a sphere of radius 1
|
||||
# centered at (0,0,0). (scale, center) will be used to bring the predicted mesh
|
||||
# to its original center and scale. Note that normalizing the target mesh,
|
||||
# speeds up the optimization but is not necessary!
|
||||
verts = mesh.verts_packed()
|
||||
N = verts.shape[0]
|
||||
center = verts.mean(0)
|
||||
scale = max((verts - center).abs().max(0)[0])
|
||||
mesh.offset_verts_(-(center.expand(N, 3)))
|
||||
mesh.scale_verts_((1.0 / float(scale)))
|
||||
|
||||
# Get a batch of viewing angles.
|
||||
elev = torch.linspace(0, 0, num_views) # keep constant
|
||||
azim = torch.linspace(-azimuth_range, azimuth_range, num_views) + 180.0
|
||||
|
||||
# Place a point light in front of the object. As mentioned above, the front of
|
||||
# the cow is facing the -z direction.
|
||||
if use_point_light:
|
||||
lights = PointLights(device=device, location=[[0.0, 0.0, -3.0]])
|
||||
else:
|
||||
lights = AmbientLights(device=device)
|
||||
|
||||
# Initialize an OpenGL perspective camera that represents a batch of different
|
||||
# viewing angles. All the cameras helper methods support mixed type inputs and
|
||||
# broadcasting. So we can view the camera from the a distance of dist=2.7, and
|
||||
# then specify elevation and azimuth angles for each viewpoint as tensors.
|
||||
R, T = look_at_view_transform(dist=2.7, elev=elev, azim=azim)
|
||||
cameras = FoVPerspectiveCameras(device=device, R=R, T=T)
|
||||
|
||||
# Define the settings for rasterization and shading.
|
||||
# As we are rendering images for visualization
|
||||
# purposes only we will set faces_per_pixel=1 and blur_radius=0.0. Refer to
|
||||
# rasterize_meshes.py for explanations of these parameters. We also leave
|
||||
# bin_size and max_faces_per_bin to their default values of None, which sets
|
||||
# their values using heuristics and ensures that the faster coarse-to-fine
|
||||
# rasterization method is used. Refer to docs/notes/renderer.md for an
|
||||
# explanation of the difference between naive and coarse-to-fine rasterization.
|
||||
raster_settings = RasterizationSettings(
|
||||
image_size=resolution, blur_radius=0.0, faces_per_pixel=1
|
||||
)
|
||||
|
||||
# Create a Phong renderer by composing a rasterizer and a shader. The textured
|
||||
# Phong shader will interpolate the texture uv coordinates for each vertex,
|
||||
# sample from a texture image and apply the Phong lighting model
|
||||
blend_params = BlendParams(sigma=1e-4, gamma=1e-4, background_color=(0.0, 0.0, 0.0))
|
||||
rasterizer_type = MeshRasterizer
|
||||
renderer = MeshRendererWithFragments(
|
||||
rasterizer=rasterizer_type(cameras=cameras, raster_settings=raster_settings),
|
||||
shader=HardPhongShader(
|
||||
device=device, cameras=cameras, lights=lights, blend_params=blend_params
|
||||
),
|
||||
)
|
||||
|
||||
# Create a batch of meshes by repeating the cow mesh and associated textures.
|
||||
# Meshes has a useful `extend` method which allows us do this very easily.
|
||||
# This also extends the textures.
|
||||
meshes = mesh.extend(num_views)
|
||||
|
||||
# Render the cow mesh from each viewing angle
|
||||
target_images, fragments = renderer(meshes, cameras=cameras, lights=lights)
|
||||
silhouette_binary = (fragments.pix_to_face[..., 0] >= 0).float()
|
||||
|
||||
return cameras, target_images[..., :3], silhouette_binary
|
||||
@@ -21,13 +21,17 @@ from pytorch3d.implicitron.tools.config import (
|
||||
from pytorch3d.renderer import CamerasBase, join_cameras_as_batch, PerspectiveCameras
|
||||
|
||||
from .dataset_base import DatasetBase, FrameData
|
||||
from .dataset_map_provider import DatasetMap, DatasetMapProviderBase, PathManagerFactory
|
||||
from .dataset_map_provider import (
|
||||
DatasetMap,
|
||||
DatasetMapProviderBase,
|
||||
PathManagerFactory,
|
||||
Task,
|
||||
)
|
||||
from .utils import DATASET_TYPE_KNOWN, DATASET_TYPE_UNKNOWN
|
||||
|
||||
_SINGLE_SEQUENCE_NAME: str = "one_sequence"
|
||||
|
||||
|
||||
@expand_args_fields
|
||||
class SingleSceneDataset(DatasetBase, Configurable):
|
||||
"""
|
||||
A dataset from images from a single scene.
|
||||
@@ -111,6 +115,7 @@ class SingleSceneDatasetMapProviderBase(DatasetMapProviderBase):
|
||||
def _get_dataset(
|
||||
self, split_idx: int, frame_type: str, set_eval_batches: bool = False
|
||||
) -> SingleSceneDataset:
|
||||
expand_args_fields(SingleSceneDataset)
|
||||
# pyre-ignore[16]
|
||||
split = self.i_split[split_idx]
|
||||
frame_types = [frame_type] * len(split)
|
||||
@@ -154,6 +159,9 @@ class SingleSceneDatasetMapProviderBase(DatasetMapProviderBase):
|
||||
test=self._get_dataset(2, DATASET_TYPE_UNKNOWN, True),
|
||||
)
|
||||
|
||||
def get_task(self) -> Task:
|
||||
return Task.SINGLE_SEQUENCE
|
||||
|
||||
def get_all_train_cameras(self) -> Optional[CamerasBase]:
|
||||
# pyre-ignore[16]
|
||||
cameras = [self.poses[i] for i in self.i_split[0]]
|
||||
|
||||
@@ -7,12 +7,11 @@
|
||||
|
||||
import dataclasses
|
||||
import os
|
||||
from enum import Enum
|
||||
from typing import Any, cast, Dict, List, Optional, Tuple
|
||||
|
||||
import lpips
|
||||
import torch
|
||||
from pytorch3d.implicitron.dataset.data_source import ImplicitronDataSource
|
||||
from pytorch3d.implicitron.dataset.data_source import ImplicitronDataSource, Task
|
||||
from pytorch3d.implicitron.dataset.json_index_dataset import JsonIndexDataset
|
||||
from pytorch3d.implicitron.dataset.json_index_dataset_map_provider import (
|
||||
CO3D_CATEGORIES,
|
||||
@@ -28,11 +27,6 @@ from pytorch3d.implicitron.tools.utils import dataclass_to_cuda_
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
class Task(Enum):
|
||||
SINGLE_SEQUENCE = "singlesequence"
|
||||
MULTI_SEQUENCE = "multisequence"
|
||||
|
||||
|
||||
def main() -> None:
|
||||
"""
|
||||
Evaluates new view synthesis metrics of a simple depth-based image rendering
|
||||
@@ -159,15 +153,11 @@ def evaluate_dbir_for_category(
|
||||
|
||||
if task == Task.SINGLE_SEQUENCE:
|
||||
camera_difficulty_bin_breaks = 0.97, 0.98
|
||||
multisequence_evaluation = False
|
||||
else:
|
||||
camera_difficulty_bin_breaks = 2.0 / 3, 5.0 / 6
|
||||
multisequence_evaluation = True
|
||||
|
||||
category_result_flat, category_result = summarize_nvs_eval_results(
|
||||
per_batch_eval_results,
|
||||
camera_difficulty_bin_breaks=camera_difficulty_bin_breaks,
|
||||
is_multisequence=multisequence_evaluation,
|
||||
per_batch_eval_results, task, camera_difficulty_bin_breaks
|
||||
)
|
||||
|
||||
return category_result["results"]
|
||||
|
||||
@@ -14,6 +14,7 @@ from typing import Any, Dict, List, Optional, Sequence, Tuple, Union
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
from pytorch3d.implicitron.dataset.data_source import Task
|
||||
from pytorch3d.implicitron.dataset.dataset_base import FrameData
|
||||
from pytorch3d.implicitron.dataset.utils import is_known_frame, is_train_frame
|
||||
from pytorch3d.implicitron.models.base_model import ImplicitronRender
|
||||
@@ -242,26 +243,10 @@ def eval_batch(
|
||||
if frame_data.depth_map is None or frame_data.depth_map.sum() <= 0:
|
||||
warnings.warn("Empty or missing depth map in evaluation!")
|
||||
|
||||
if frame_data.mask_crop is None:
|
||||
warnings.warn("mask_crop is None, assuming the whole image is valid.")
|
||||
|
||||
if frame_data.fg_probability is None:
|
||||
warnings.warn("fg_probability is None, assuming the whole image is fg.")
|
||||
|
||||
# threshold the masks to make ground truth binary masks
|
||||
mask_fg = (
|
||||
frame_data.fg_probability >= mask_thr
|
||||
if frame_data.fg_probability is not None
|
||||
# pyre-ignore [16]
|
||||
else torch.ones_like(frame_data.image_rgb[:, :1, ...]).bool()
|
||||
)
|
||||
|
||||
mask_crop = (
|
||||
frame_data.mask_crop
|
||||
if frame_data.mask_crop is not None
|
||||
else torch.ones_like(mask_fg)
|
||||
)
|
||||
|
||||
mask_fg, mask_crop = [
|
||||
(getattr(frame_data, k) >= mask_thr) for k in ("fg_probability", "mask_crop")
|
||||
]
|
||||
image_rgb_masked = mask_background(
|
||||
# pyre-fixme[6]: Expected `Tensor` for 1st param but got
|
||||
# `Optional[torch.Tensor]`.
|
||||
@@ -281,6 +266,7 @@ def eval_batch(
|
||||
# pyre-fixme[6]: Expected `Tensor` for 4th param but got
|
||||
# `Optional[torch.Tensor]`.
|
||||
depth_map=frame_data.depth_map,
|
||||
# pyre-fixme[16]: `Optional` has no attribute `__getitem__`.
|
||||
depth_mask=frame_data.depth_mask[:1],
|
||||
visdom_env=visualize_visdom_env,
|
||||
)
|
||||
@@ -312,7 +298,7 @@ def eval_batch(
|
||||
results[metric_name].item(), metric_name, loss_mask_now
|
||||
)
|
||||
|
||||
if name_postfix == "_fg" and frame_data.depth_map is not None:
|
||||
if name_postfix == "_fg":
|
||||
# only record depth metrics for the foreground
|
||||
_, abs_ = eval_depth(
|
||||
cloned_render["depth_render"],
|
||||
@@ -328,7 +314,9 @@ def eval_batch(
|
||||
if visualize:
|
||||
visualizer.show_depth(abs_.mean().item(), name_postfix, loss_mask_now)
|
||||
if break_after_visualising:
|
||||
breakpoint() # noqa: B601
|
||||
import pdb # noqa: B602
|
||||
|
||||
pdb.set_trace()
|
||||
|
||||
if lpips_model is not None:
|
||||
im1, im2 = [
|
||||
@@ -432,16 +420,16 @@ def _get_camera_difficulty_bin_edges(camera_difficulty_bin_breaks: Tuple[float,
|
||||
|
||||
def summarize_nvs_eval_results(
|
||||
per_batch_eval_results: List[Dict[str, Any]],
|
||||
is_multisequence: bool,
|
||||
camera_difficulty_bin_breaks: Tuple[float, float],
|
||||
task: Task,
|
||||
camera_difficulty_bin_breaks: Tuple[float, float] = (0.97, 0.98),
|
||||
):
|
||||
"""
|
||||
Compile the per-batch evaluation results `per_batch_eval_results` into
|
||||
a set of aggregate metrics. The produced metrics depend on is_multisequence.
|
||||
a set of aggregate metrics. The produced metrics depend on the task.
|
||||
|
||||
Args:
|
||||
per_batch_eval_results: Metrics of each per-batch evaluation.
|
||||
is_multisequence: Whether to evaluate as a multisequence task
|
||||
task: The type of the new-view synthesis task.
|
||||
camera_difficulty_bin_breaks: edge hard-medium and medium-easy
|
||||
|
||||
|
||||
@@ -451,9 +439,14 @@ def summarize_nvs_eval_results(
|
||||
"""
|
||||
n_batches = len(per_batch_eval_results)
|
||||
eval_sets: List[Optional[str]] = []
|
||||
eval_sets = [None]
|
||||
if is_multisequence:
|
||||
if task == Task.SINGLE_SEQUENCE:
|
||||
eval_sets = [None]
|
||||
# assert n_batches==100
|
||||
elif task == Task.MULTI_SEQUENCE:
|
||||
eval_sets = ["train", "test"]
|
||||
# assert n_batches==1000
|
||||
else:
|
||||
raise ValueError(task)
|
||||
batch_sizes = torch.tensor(
|
||||
[r["meta"]["batch_size"] for r in per_batch_eval_results]
|
||||
).long()
|
||||
@@ -473,9 +466,11 @@ def summarize_nvs_eval_results(
|
||||
# add per set averages
|
||||
for SET in eval_sets:
|
||||
if SET is None:
|
||||
assert task == Task.SINGLE_SEQUENCE
|
||||
ok_set = torch.ones(n_batches, dtype=torch.bool)
|
||||
set_name = "test"
|
||||
else:
|
||||
assert task == Task.MULTI_SEQUENCE
|
||||
ok_set = is_train == int(SET == "train")
|
||||
set_name = SET
|
||||
|
||||
@@ -500,7 +495,7 @@ def summarize_nvs_eval_results(
|
||||
}
|
||||
)
|
||||
|
||||
if is_multisequence:
|
||||
if task == Task.MULTI_SEQUENCE:
|
||||
# split based on n_src_views
|
||||
n_src_views = batch_sizes - 1
|
||||
for n_src in EVAL_N_SRC_VIEWS:
|
||||
|
||||
@@ -1,164 +0,0 @@
|
||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the BSD-style license found in the
|
||||
# LICENSE file in the root directory of this source tree.
|
||||
|
||||
import copy
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
import lpips
|
||||
import torch
|
||||
|
||||
import tqdm
|
||||
from pytorch3d.implicitron.dataset import utils as ds_utils
|
||||
|
||||
from pytorch3d.implicitron.evaluation import evaluate_new_view_synthesis as evaluate
|
||||
from pytorch3d.implicitron.models.base_model import EvaluationMode, ImplicitronModelBase
|
||||
from pytorch3d.implicitron.tools.config import (
|
||||
registry,
|
||||
ReplaceableBase,
|
||||
run_auto_creation,
|
||||
)
|
||||
from pytorch3d.renderer.cameras import CamerasBase
|
||||
from torch.utils.data import DataLoader
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class EvaluatorBase(ReplaceableBase):
|
||||
"""
|
||||
Evaluate a trained model on given data. Returns a dict of loss/objective
|
||||
names and their values.
|
||||
"""
|
||||
|
||||
def run(
|
||||
self, model: ImplicitronModelBase, dataloader: DataLoader, **kwargs
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Evaluate the results of Implicitron training.
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
@registry.register
|
||||
class ImplicitronEvaluator(EvaluatorBase):
|
||||
"""
|
||||
Evaluate the results of Implicitron training.
|
||||
|
||||
Members:
|
||||
camera_difficulty_bin_breaks: low/medium vals to divide camera difficulties into
|
||||
[0-eps, low, medium, 1+eps].
|
||||
"""
|
||||
|
||||
camera_difficulty_bin_breaks: Tuple[float, ...] = 0.97, 0.98
|
||||
is_multisequence: bool = False
|
||||
|
||||
def __post_init__(self):
|
||||
run_auto_creation(self)
|
||||
|
||||
def run(
|
||||
self,
|
||||
model: ImplicitronModelBase,
|
||||
dataloader: DataLoader,
|
||||
all_train_cameras: Optional[CamerasBase],
|
||||
device: torch.device,
|
||||
dump_to_json: bool = False,
|
||||
exp_dir: Optional[str] = None,
|
||||
epoch: Optional[int] = None,
|
||||
**kwargs,
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Evaluate the results of Implicitron training. Optionally, dump results to
|
||||
exp_dir/results_test.json.
|
||||
|
||||
Args:
|
||||
model: A (trained) model to evaluate.
|
||||
dataloader: A test dataloader.
|
||||
all_train_cameras: Camera instances we used for training.
|
||||
device: A torch device.
|
||||
dump_to_json: If True, will dump the results to a json file.
|
||||
exp_dir: Root expeirment directory.
|
||||
epoch: Evaluation epoch (to be stored in the results dict).
|
||||
|
||||
Returns:
|
||||
A dictionary of results.
|
||||
"""
|
||||
lpips_model = lpips.LPIPS(net="vgg")
|
||||
lpips_model = lpips_model.to(device)
|
||||
|
||||
model.eval()
|
||||
|
||||
per_batch_eval_results = []
|
||||
logger.info("Evaluating model ...")
|
||||
for frame_data in tqdm.tqdm(dataloader):
|
||||
frame_data = frame_data.to(device)
|
||||
|
||||
# mask out the unknown images so that the model does not see them
|
||||
frame_data_for_eval = _get_eval_frame_data(frame_data)
|
||||
|
||||
with torch.no_grad():
|
||||
preds = model(
|
||||
**{
|
||||
**frame_data_for_eval,
|
||||
"evaluation_mode": EvaluationMode.EVALUATION,
|
||||
}
|
||||
)
|
||||
implicitron_render = copy.deepcopy(preds["implicitron_render"])
|
||||
per_batch_eval_results.append(
|
||||
evaluate.eval_batch(
|
||||
frame_data,
|
||||
implicitron_render,
|
||||
bg_color="black",
|
||||
lpips_model=lpips_model,
|
||||
source_cameras=( # None will make it use batch’s known cameras
|
||||
None if self.is_multisequence else all_train_cameras
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
_, category_result = evaluate.summarize_nvs_eval_results(
|
||||
per_batch_eval_results,
|
||||
self.is_multisequence,
|
||||
self.camera_difficulty_bin_breaks,
|
||||
)
|
||||
|
||||
results = category_result["results"]
|
||||
evaluate.pretty_print_nvs_metrics(results)
|
||||
if dump_to_json:
|
||||
_dump_to_json(epoch, exp_dir, results)
|
||||
|
||||
return category_result["results"]
|
||||
|
||||
|
||||
def _dump_to_json(
|
||||
epoch: Optional[int], exp_dir: Optional[str], results: List[Dict[str, Any]]
|
||||
) -> None:
|
||||
if epoch is not None:
|
||||
for r in results:
|
||||
r["eval_epoch"] = int(epoch)
|
||||
logger.info("Evaluation results")
|
||||
|
||||
if exp_dir is None:
|
||||
raise ValueError("Cannot save results to json without a specified save path.")
|
||||
with open(os.path.join(exp_dir, "results_test.json"), "w") as f:
|
||||
json.dump(results, f)
|
||||
|
||||
|
||||
def _get_eval_frame_data(frame_data: Any) -> Any:
|
||||
"""
|
||||
Masks the unknown image data to make sure we cannot use it at model evaluation time.
|
||||
"""
|
||||
frame_data_for_eval = copy.deepcopy(frame_data)
|
||||
is_known = ds_utils.is_known_frame(frame_data.frame_type).type_as(
|
||||
frame_data.image_rgb
|
||||
)[:, None, None, None]
|
||||
for k in ("image_rgb", "depth_map", "fg_probability", "mask_crop"):
|
||||
value = getattr(frame_data_for_eval, k)
|
||||
value_masked = value.clone() * is_known if value is not None else None
|
||||
setattr(frame_data_for_eval, k, value_masked)
|
||||
return frame_data_for_eval
|
||||
@@ -4,7 +4,7 @@
|
||||
# This source code is licensed under the BSD-style license found in the
|
||||
# LICENSE file in the root directory of this source tree.
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import torch
|
||||
@@ -37,18 +37,12 @@ class ImplicitronRender:
|
||||
)
|
||||
|
||||
|
||||
class ImplicitronModelBase(ReplaceableBase, torch.nn.Module):
|
||||
class ImplicitronModelBase(ReplaceableBase):
|
||||
"""
|
||||
Replaceable abstract base for all image generation / rendering models.
|
||||
`forward()` method produces a render with a depth map. Derives from Module
|
||||
so we can rely on basic functionality provided to torch for model
|
||||
optimization.
|
||||
`forward()` method produces a render with a depth map.
|
||||
"""
|
||||
|
||||
# The keys from `preds` (output of ImplicitronModelBase.forward) to be logged in
|
||||
# the training loop.
|
||||
log_vars: List[str] = field(default_factory=lambda: ["objective"])
|
||||
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
|
||||
|
||||
@@ -16,10 +16,10 @@ from typing import Any, Dict, List, Optional, Tuple, Union
|
||||
|
||||
import torch
|
||||
import tqdm
|
||||
from omegaconf import DictConfig
|
||||
from pytorch3d.common.compat import prod
|
||||
from pytorch3d.implicitron.models.metrics import (
|
||||
from pytorch3d.implicitron.models.metrics import ( # noqa
|
||||
RegularizationMetrics,
|
||||
RegularizationMetricsBase,
|
||||
ViewMetrics,
|
||||
ViewMetricsBase,
|
||||
)
|
||||
from pytorch3d.implicitron.tools import image_utils, vis_utils
|
||||
@@ -29,7 +29,7 @@ from pytorch3d.implicitron.tools.config import (
|
||||
run_auto_creation,
|
||||
)
|
||||
from pytorch3d.implicitron.tools.rasterize_mc import rasterize_mc_samples
|
||||
from pytorch3d.implicitron.tools.utils import cat_dataclass
|
||||
from pytorch3d.implicitron.tools.utils import cat_dataclass, setattr_if_hasattr
|
||||
from pytorch3d.renderer import RayBundle, utils as rend_utils
|
||||
from pytorch3d.renderer.cameras import CamerasBase
|
||||
from visdom import Visdom
|
||||
@@ -67,7 +67,7 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@registry.register
|
||||
class GenericModel(ImplicitronModelBase): # pyre-ignore: 13
|
||||
class GenericModel(ImplicitronModelBase, torch.nn.Module): # pyre-ignore: 13
|
||||
"""
|
||||
GenericModel is a wrapper for the neural implicit
|
||||
rendering and reconstruction pipeline which consists
|
||||
@@ -148,9 +148,7 @@ class GenericModel(ImplicitronModelBase): # pyre-ignore: 13
|
||||
thresholded by this value before being applied to the RGB/Depth images
|
||||
output_rasterized_mc: If True, visualize the Monte-Carlo pixel renders by
|
||||
splatting onto an image grid. Default: False.
|
||||
bg_color: RGB values for setting the background color of input image
|
||||
if mask_images=True. Defaults to (0.0, 0.0, 0.0). Each renderer has its own
|
||||
way to determine the background color of its output, unrelated to this.
|
||||
bg_color: RGB values for the background color. Default (0.0, 0.0, 0.0)
|
||||
num_passes: The specified implicit_function is initialized num_passes
|
||||
times and run sequentially.
|
||||
chunk_size_grid: The total number of points which can be rendered
|
||||
@@ -535,7 +533,6 @@ class GenericModel(ImplicitronModelBase): # pyre-ignore: 13
|
||||
return None
|
||||
loss = sum(losses_weighted)
|
||||
assert torch.is_tensor(loss)
|
||||
# pyre-fixme[7]: Expected `Optional[Tensor]` but got `int`.
|
||||
return loss
|
||||
|
||||
def visualize(
|
||||
@@ -620,57 +617,51 @@ class GenericModel(ImplicitronModelBase): # pyre-ignore: 13
|
||||
self.image_feature_extractor.get_feat_dims()
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def raysampler_tweak_args(cls, type, args: DictConfig) -> None:
|
||||
"""
|
||||
We don't expose certain fields of the raysampler because we want to set
|
||||
them from our own members.
|
||||
"""
|
||||
del args["sampling_mode_training"]
|
||||
del args["sampling_mode_evaluation"]
|
||||
del args["image_width"]
|
||||
del args["image_height"]
|
||||
|
||||
def create_raysampler(self):
|
||||
extra_args = {
|
||||
"sampling_mode_training": self.sampling_mode_training,
|
||||
"sampling_mode_evaluation": self.sampling_mode_evaluation,
|
||||
"image_width": self.render_image_width,
|
||||
"image_height": self.render_image_height,
|
||||
}
|
||||
raysampler_args = getattr(
|
||||
self, "raysampler_" + self.raysampler_class_type + "_args"
|
||||
)
|
||||
setattr_if_hasattr(
|
||||
raysampler_args, "sampling_mode_training", self.sampling_mode_training
|
||||
)
|
||||
setattr_if_hasattr(
|
||||
raysampler_args, "sampling_mode_evaluation", self.sampling_mode_evaluation
|
||||
)
|
||||
setattr_if_hasattr(raysampler_args, "image_width", self.render_image_width)
|
||||
setattr_if_hasattr(raysampler_args, "image_height", self.render_image_height)
|
||||
self.raysampler = registry.get(RaySamplerBase, self.raysampler_class_type)(
|
||||
**raysampler_args, **extra_args
|
||||
**raysampler_args
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def renderer_tweak_args(cls, type, args: DictConfig) -> None:
|
||||
"""
|
||||
We don't expose certain fields of the renderer because we want to set
|
||||
them based on other inputs.
|
||||
"""
|
||||
args.pop("render_features_dimensions", None)
|
||||
args.pop("object_bounding_sphere", None)
|
||||
|
||||
def create_renderer(self):
|
||||
extra_args = {}
|
||||
raysampler_args = getattr(
|
||||
self, "raysampler_" + self.raysampler_class_type + "_args"
|
||||
)
|
||||
self.renderer_MultiPassEmissionAbsorptionRenderer_args[
|
||||
"stratified_sampling_coarse_training"
|
||||
] = raysampler_args["stratified_point_sampling_training"]
|
||||
self.renderer_MultiPassEmissionAbsorptionRenderer_args[
|
||||
"stratified_sampling_coarse_evaluation"
|
||||
] = raysampler_args["stratified_point_sampling_evaluation"]
|
||||
self.renderer_SignedDistanceFunctionRenderer_args[
|
||||
"render_features_dimensions"
|
||||
] = self.render_features_dimensions
|
||||
|
||||
if self.renderer_class_type == "SignedDistanceFunctionRenderer":
|
||||
extra_args["render_features_dimensions"] = self.render_features_dimensions
|
||||
if not hasattr(self.raysampler, "scene_extent"):
|
||||
if "scene_extent" not in raysampler_args:
|
||||
raise ValueError(
|
||||
"SignedDistanceFunctionRenderer requires"
|
||||
+ " a raysampler that defines the 'scene_extent' field"
|
||||
+ " (this field is supported by, e.g., the adaptive raysampler - "
|
||||
+ " self.raysampler_class_type='AdaptiveRaySampler')."
|
||||
)
|
||||
extra_args["object_bounding_sphere"] = self.raysampler.scene_extent
|
||||
self.renderer_SignedDistanceFunctionRenderer_args.ray_tracer_args[
|
||||
"object_bounding_sphere"
|
||||
] = self.raysampler_AdaptiveRaySampler_args["scene_extent"]
|
||||
|
||||
renderer_args = getattr(self, "renderer_" + self.renderer_class_type + "_args")
|
||||
self.renderer = registry.get(BaseRenderer, self.renderer_class_type)(
|
||||
**renderer_args, **extra_args
|
||||
**renderer_args
|
||||
)
|
||||
|
||||
def create_implicit_function(self) -> None:
|
||||
@@ -681,18 +672,6 @@ class GenericModel(ImplicitronModelBase): # pyre-ignore: 13
|
||||
"""
|
||||
pass
|
||||
|
||||
@classmethod
|
||||
def implicit_function_tweak_args(cls, type, args: DictConfig) -> None:
|
||||
"""
|
||||
We don't expose certain implicit_function fields because we want to set
|
||||
them based on other inputs.
|
||||
"""
|
||||
args.pop("feature_vector_size", None)
|
||||
args.pop("encoding_dim", None)
|
||||
args.pop("latent_dim", None)
|
||||
args.pop("latent_dim_hypernet", None)
|
||||
args.pop("color_dim", None)
|
||||
|
||||
def _construct_implicit_functions(self):
|
||||
"""
|
||||
After run_auto_creation has been called, the arguments
|
||||
@@ -702,31 +681,32 @@ class GenericModel(ImplicitronModelBase): # pyre-ignore: 13
|
||||
implicit function method. Then the required implicit
|
||||
function(s) are initialized.
|
||||
"""
|
||||
extra_args = {}
|
||||
if self.implicit_function_class_type in (
|
||||
"NeuralRadianceFieldImplicitFunction",
|
||||
"NeRFormerImplicitFunction",
|
||||
):
|
||||
extra_args["latent_dim"] = (
|
||||
self._get_viewpooled_feature_dim()
|
||||
+ self._get_global_encoder_encoding_dim()
|
||||
)
|
||||
extra_args["color_dim"] = self.render_features_dimensions
|
||||
# nerf preprocessing
|
||||
nerf_args = self.implicit_function_NeuralRadianceFieldImplicitFunction_args
|
||||
nerformer_args = self.implicit_function_NeRFormerImplicitFunction_args
|
||||
nerf_args["latent_dim"] = nerformer_args["latent_dim"] = (
|
||||
self._get_viewpooled_feature_dim() + self._get_global_encoder_encoding_dim()
|
||||
)
|
||||
nerf_args["color_dim"] = nerformer_args[
|
||||
"color_dim"
|
||||
] = self.render_features_dimensions
|
||||
|
||||
if self.implicit_function_class_type == "IdrFeatureField":
|
||||
extra_args["feature_vector_size"] = self.render_features_dimensions
|
||||
extra_args["encoding_dim"] = self._get_global_encoder_encoding_dim()
|
||||
# idr preprocessing
|
||||
idr = self.implicit_function_IdrFeatureField_args
|
||||
idr["feature_vector_size"] = self.render_features_dimensions
|
||||
idr["encoding_dim"] = self._get_global_encoder_encoding_dim()
|
||||
|
||||
if self.implicit_function_class_type == "SRNImplicitFunction":
|
||||
extra_args["latent_dim"] = (
|
||||
self._get_viewpooled_feature_dim()
|
||||
+ self._get_global_encoder_encoding_dim()
|
||||
)
|
||||
# srn preprocessing
|
||||
srn = self.implicit_function_SRNImplicitFunction_args
|
||||
srn.raymarch_function_args.latent_dim = (
|
||||
self._get_viewpooled_feature_dim() + self._get_global_encoder_encoding_dim()
|
||||
)
|
||||
|
||||
# srn_hypernet preprocessing
|
||||
if self.implicit_function_class_type == "SRNHyperNetImplicitFunction":
|
||||
extra_args["latent_dim"] = self._get_viewpooled_feature_dim()
|
||||
extra_args["latent_dim_hypernet"] = self._get_global_encoder_encoding_dim()
|
||||
srn_hypernet = self.implicit_function_SRNHyperNetImplicitFunction_args
|
||||
srn_hypernet_args = srn_hypernet.hypernet_args
|
||||
srn_hypernet_args.latent_dim_hypernet = self._get_global_encoder_encoding_dim()
|
||||
srn_hypernet_args.latent_dim = self._get_viewpooled_feature_dim()
|
||||
|
||||
# check that for srn, srn_hypernet, idr we have self.num_passes=1
|
||||
implicit_function_type = registry.get(
|
||||
@@ -749,7 +729,7 @@ class GenericModel(ImplicitronModelBase): # pyre-ignore: 13
|
||||
if config is None:
|
||||
raise ValueError(f"{config_name} not present")
|
||||
implicit_functions_list = [
|
||||
ImplicitFunctionWrapper(implicit_function_type(**config, **extra_args))
|
||||
ImplicitFunctionWrapper(implicit_function_type(**config))
|
||||
for _ in range(self.num_passes)
|
||||
]
|
||||
return torch.nn.ModuleList(implicit_functions_list)
|
||||
@@ -860,7 +840,7 @@ class GenericModel(ImplicitronModelBase): # pyre-ignore: 13
|
||||
|
||||
# Estimate the rasterization point radius so that we approximately fill
|
||||
# the whole image given the number of rasterized points.
|
||||
pt_radius = 2.0 / math.sqrt(xys.shape[1])
|
||||
pt_radius = 2.0 * math.sqrt(xys.shape[1])
|
||||
|
||||
# Rasterize the samples.
|
||||
features_depth_render, masks_render = rasterize_mc_samples(
|
||||
@@ -920,7 +900,7 @@ def _chunk_generator(
|
||||
f"by n_pts_per_ray ({n_pts_per_ray})"
|
||||
)
|
||||
|
||||
n_rays = prod(spatial_dim)
|
||||
n_rays = math.prod(spatial_dim)
|
||||
# special handling for raytracing-based methods
|
||||
n_chunks = -(-n_rays * max(n_pts_per_ray, 1) // chunk_size)
|
||||
chunk_size_in_rays = -(-n_rays // n_chunks)
|
||||
@@ -936,9 +916,9 @@ def _chunk_generator(
|
||||
directions=ray_bundle.directions.reshape(batch_size, -1, 3)[
|
||||
:, start_idx:end_idx
|
||||
],
|
||||
lengths=ray_bundle.lengths.reshape(batch_size, n_rays, n_pts_per_ray)[
|
||||
:, start_idx:end_idx
|
||||
],
|
||||
lengths=ray_bundle.lengths.reshape(
|
||||
batch_size, math.prod(spatial_dim), n_pts_per_ray
|
||||
)[:, start_idx:end_idx],
|
||||
xys=ray_bundle.xys.reshape(batch_size, -1, 2)[:, start_idx:end_idx],
|
||||
)
|
||||
extra_args = kwargs.copy()
|
||||
|
||||
@@ -24,16 +24,15 @@ class Autodecoder(Configurable, torch.nn.Module):
|
||||
"""
|
||||
|
||||
encoding_dim: int = 0
|
||||
n_instances: int = 1
|
||||
n_instances: int = 0
|
||||
init_scale: float = 1.0
|
||||
ignore_input: bool = False
|
||||
|
||||
def __post_init__(self):
|
||||
super().__init__()
|
||||
|
||||
if self.n_instances <= 0:
|
||||
raise ValueError(f"Invalid n_instances {self.n_instances}")
|
||||
|
||||
# Do not init the codes at all in case we have 0 instances.
|
||||
return
|
||||
self._autodecoder_codes = torch.nn.Embedding(
|
||||
self.n_instances,
|
||||
self.encoding_dim,
|
||||
@@ -71,9 +70,13 @@ class Autodecoder(Configurable, torch.nn.Module):
|
||||
return key_map
|
||||
|
||||
def calculate_squared_encoding_norm(self) -> Optional[torch.Tensor]:
|
||||
if self.n_instances <= 0:
|
||||
return None
|
||||
return (self._autodecoder_codes.weight**2).mean() # pyre-ignore[16]
|
||||
|
||||
def get_encoding_dim(self) -> int:
|
||||
if self.n_instances <= 0:
|
||||
return 0
|
||||
return self.encoding_dim
|
||||
|
||||
def forward(self, x: Union[torch.LongTensor, List[str]]) -> Optional[torch.Tensor]:
|
||||
@@ -87,6 +90,9 @@ class Autodecoder(Configurable, torch.nn.Module):
|
||||
codes: A tensor of shape `(N, self.encoding_dim)` containing the
|
||||
key-specific autodecoder codes.
|
||||
"""
|
||||
if self.n_instances == 0:
|
||||
return None
|
||||
|
||||
if self.ignore_input:
|
||||
x = ["singleton"]
|
||||
|
||||
|
||||
@@ -42,13 +42,7 @@ class GlobalEncoderBase(ReplaceableBase):
|
||||
"""
|
||||
raise NotImplementedError()
|
||||
|
||||
def forward(
|
||||
self,
|
||||
*,
|
||||
frame_timestamp: Optional[torch.Tensor] = None,
|
||||
sequence_name: Optional[Union[torch.LongTensor, List[str]]] = None,
|
||||
**kwargs,
|
||||
) -> torch.Tensor:
|
||||
def forward(self, **kwargs) -> torch.Tensor:
|
||||
"""
|
||||
Given a set of inputs to encode, generates a tensor containing the encoding.
|
||||
|
||||
@@ -76,14 +70,9 @@ class SequenceAutodecoder(GlobalEncoderBase, torch.nn.Module): # pyre-ignore: 1
|
||||
return self.autodecoder.get_encoding_dim()
|
||||
|
||||
def forward(
|
||||
self,
|
||||
*,
|
||||
frame_timestamp: Optional[torch.Tensor] = None,
|
||||
sequence_name: Optional[Union[torch.LongTensor, List[str]]] = None,
|
||||
**kwargs,
|
||||
self, sequence_name: Union[torch.LongTensor, List[str]], **kwargs
|
||||
) -> torch.Tensor:
|
||||
if sequence_name is None:
|
||||
raise ValueError("sequence_name must be provided.")
|
||||
|
||||
# run dtype checks and pass sequence_name to self.autodecoder
|
||||
return self.autodecoder(sequence_name)
|
||||
|
||||
@@ -112,15 +101,7 @@ class HarmonicTimeEncoder(GlobalEncoderBase, torch.nn.Module):
|
||||
def get_encoding_dim(self):
|
||||
return self._harmonic_embedding.get_output_dim(1)
|
||||
|
||||
def forward(
|
||||
self,
|
||||
*,
|
||||
frame_timestamp: Optional[torch.Tensor] = None,
|
||||
sequence_name: Optional[Union[torch.LongTensor, List[str]]] = None,
|
||||
**kwargs,
|
||||
) -> torch.Tensor:
|
||||
if frame_timestamp is None:
|
||||
raise ValueError("frame_timestamp must be provided.")
|
||||
def forward(self, frame_timestamp: torch.Tensor, **kwargs) -> torch.Tensor:
|
||||
if frame_timestamp.shape[-1] != 1:
|
||||
raise ValueError("Frame timestamp's last dimensions should be one.")
|
||||
time = frame_timestamp / self.time_divisor
|
||||
|
||||
@@ -4,7 +4,6 @@
|
||||
from typing import Any, cast, Optional, Tuple
|
||||
|
||||
import torch
|
||||
from omegaconf import DictConfig
|
||||
from pytorch3d.common.linear_with_repeat import LinearWithRepeat
|
||||
from pytorch3d.implicitron.third_party import hyperlayers, pytorch_prototyping
|
||||
from pytorch3d.implicitron.tools.config import Configurable, registry, run_auto_creation
|
||||
@@ -328,7 +327,6 @@ class SRNRaymarchHyperNet(Configurable, torch.nn.Module):
|
||||
@registry.register
|
||||
# pyre-fixme[13]: Uninitialized attribute
|
||||
class SRNImplicitFunction(ImplicitFunctionBase, torch.nn.Module):
|
||||
latent_dim: int = 0
|
||||
raymarch_function: SRNRaymarchFunction
|
||||
pixel_generator: SRNPixelGenerator
|
||||
|
||||
@@ -336,17 +334,6 @@ class SRNImplicitFunction(ImplicitFunctionBase, torch.nn.Module):
|
||||
super().__init__()
|
||||
run_auto_creation(self)
|
||||
|
||||
def create_raymarch_function(self) -> None:
|
||||
self.raymarch_function = SRNRaymarchFunction(
|
||||
latent_dim=self.latent_dim,
|
||||
# pyre-ignore[32]
|
||||
**self.raymarch_function_args,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def raymarch_function_tweak_args(cls, type, args: DictConfig) -> None:
|
||||
args.pop("latent_dim", None)
|
||||
|
||||
def forward(
|
||||
self,
|
||||
ray_bundle: RayBundle,
|
||||
@@ -384,8 +371,6 @@ class SRNHyperNetImplicitFunction(ImplicitFunctionBase, torch.nn.Module):
|
||||
the cache.
|
||||
"""
|
||||
|
||||
latent_dim_hypernet: int = 0
|
||||
latent_dim: int = 0
|
||||
hypernet: SRNRaymarchHyperNet
|
||||
pixel_generator: SRNPixelGenerator
|
||||
|
||||
@@ -393,19 +378,6 @@ class SRNHyperNetImplicitFunction(ImplicitFunctionBase, torch.nn.Module):
|
||||
super().__init__()
|
||||
run_auto_creation(self)
|
||||
|
||||
def create_hypernet(self) -> None:
|
||||
self.hypernet = SRNRaymarchHyperNet(
|
||||
latent_dim=self.latent_dim,
|
||||
latent_dim_hypernet=self.latent_dim_hypernet,
|
||||
# pyre-ignore[32]
|
||||
**self.hypernet_args,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def hypernet_tweak_args(cls, type, args: DictConfig) -> None:
|
||||
args.pop("latent_dim", None)
|
||||
args.pop("latent_dim_hypernet", None)
|
||||
|
||||
def forward(
|
||||
self,
|
||||
ray_bundle: RayBundle,
|
||||
|
||||
@@ -4,10 +4,10 @@
|
||||
# This source code is licensed under the BSD-style license found in the
|
||||
# LICENSE file in the root directory of this source tree.
|
||||
|
||||
import math
|
||||
from typing import Callable, Optional
|
||||
|
||||
import torch
|
||||
from pytorch3d.common.compat import prod
|
||||
from pytorch3d.renderer.cameras import CamerasBase
|
||||
|
||||
|
||||
@@ -52,7 +52,7 @@ def create_embeddings_for_implicit_function(
|
||||
embeds = torch.empty(
|
||||
bs,
|
||||
1,
|
||||
prod(spatial_size),
|
||||
math.prod(spatial_size),
|
||||
pts_per_ray,
|
||||
0,
|
||||
dtype=xyz_world.dtype,
|
||||
@@ -62,7 +62,7 @@ def create_embeddings_for_implicit_function(
|
||||
embeds = xyz_embedding_function(ray_points_for_embed).reshape(
|
||||
bs,
|
||||
1,
|
||||
prod(spatial_size),
|
||||
math.prod(spatial_size),
|
||||
pts_per_ray,
|
||||
-1,
|
||||
) # flatten spatial, add n_src dim
|
||||
@@ -73,7 +73,7 @@ def create_embeddings_for_implicit_function(
|
||||
embed_shape = (
|
||||
bs,
|
||||
embeds_viewpooled.shape[1],
|
||||
prod(spatial_size),
|
||||
math.prod(spatial_size),
|
||||
pts_per_ray,
|
||||
-1,
|
||||
)
|
||||
|
||||
@@ -22,7 +22,7 @@ from .renderer.base import EvaluationMode
|
||||
|
||||
|
||||
@registry.register
|
||||
class ModelDBIR(ImplicitronModelBase):
|
||||
class ModelDBIR(ImplicitronModelBase, torch.nn.Module):
|
||||
"""
|
||||
A simple depth-based image rendering model.
|
||||
|
||||
|
||||
@@ -53,12 +53,10 @@ class MultiPassEmissionAbsorptionRenderer( # pyre-ignore: 13
|
||||
fine rendering pass during training.
|
||||
n_pts_per_ray_fine_evaluation: The number of points sampled per ray for the
|
||||
fine rendering pass during evaluation.
|
||||
stratified_sampling_coarse_training: Enable/disable stratified sampling in the
|
||||
refiner during training. Only matters if there are multiple implicit
|
||||
functions (i.e. in GenericModel if num_passes>1).
|
||||
stratified_sampling_coarse_evaluation: Enable/disable stratified sampling in
|
||||
the refiner during evaluation. Only matters if there are multiple implicit
|
||||
functions (i.e. in GenericModel if num_passes>1).
|
||||
stratified_sampling_coarse_training: Enable/disable stratified sampling during
|
||||
training.
|
||||
stratified_sampling_coarse_evaluation: Enable/disable stratified sampling during
|
||||
evaluation.
|
||||
append_coarse_samples_to_fine: Add the fine ray points to the coarse points
|
||||
after sampling.
|
||||
density_noise_std_train: Standard deviation of the noise added to the
|
||||
|
||||
@@ -218,7 +218,7 @@ class AdaptiveRaySampler(AbstractMaskRaySampler):
|
||||
|
||||
def _get_min_max_depth_bounds(self, cameras: CamerasBase) -> Tuple[float, float]:
|
||||
"""
|
||||
Returns the adaptively calculated near/far planes.
|
||||
Returns the adaptivelly calculated near/far planes.
|
||||
"""
|
||||
min_depth, max_depth = camera_utils.get_min_max_depth_bounds(
|
||||
cameras, self._scene_center, self.scene_extent
|
||||
|
||||
@@ -3,16 +3,12 @@
|
||||
# implicit_differentiable_renderer.py
|
||||
# Copyright (c) 2020 Lior Yariv
|
||||
import functools
|
||||
import math
|
||||
from typing import List, Optional, Tuple
|
||||
|
||||
import torch
|
||||
from omegaconf import DictConfig
|
||||
from pytorch3d.common.compat import prod
|
||||
from pytorch3d.implicitron.tools.config import (
|
||||
get_default_args_field,
|
||||
registry,
|
||||
run_auto_creation,
|
||||
)
|
||||
from pytorch3d.implicitron.tools.config import get_default_args_field, registry
|
||||
from pytorch3d.implicitron.tools.utils import evaluating
|
||||
from pytorch3d.renderer import RayBundle
|
||||
|
||||
@@ -22,10 +18,9 @@ from .rgb_net import RayNormalColoringNetwork
|
||||
|
||||
|
||||
@registry.register
|
||||
class SignedDistanceFunctionRenderer(BaseRenderer, torch.nn.Module): # pyre-ignore[13]
|
||||
class SignedDistanceFunctionRenderer(BaseRenderer, torch.nn.Module):
|
||||
render_features_dimensions: int = 3
|
||||
object_bounding_sphere: float = 1.0
|
||||
ray_tracer: RayTracing
|
||||
ray_tracer_args: DictConfig = get_default_args_field(RayTracing)
|
||||
ray_normal_coloring_network_args: DictConfig = get_default_args_field(
|
||||
RayNormalColoringNetwork
|
||||
)
|
||||
@@ -42,7 +37,8 @@ class SignedDistanceFunctionRenderer(BaseRenderer, torch.nn.Module): # pyre-ign
|
||||
f"Background color should have {render_features_dimensions} entries."
|
||||
)
|
||||
|
||||
run_auto_creation(self)
|
||||
self.ray_tracer = RayTracing(**self.ray_tracer_args)
|
||||
self.object_bounding_sphere = self.ray_tracer_args.get("object_bounding_sphere")
|
||||
|
||||
self.ray_normal_coloring_network_args[
|
||||
"feature_vector_size"
|
||||
@@ -53,17 +49,6 @@ class SignedDistanceFunctionRenderer(BaseRenderer, torch.nn.Module): # pyre-ign
|
||||
|
||||
self.register_buffer("_bg_color", torch.tensor(self.bg_color), persistent=False)
|
||||
|
||||
@classmethod
|
||||
def ray_tracer_tweak_args(cls, type, args: DictConfig) -> None:
|
||||
del args["object_bounding_sphere"]
|
||||
|
||||
def create_ray_tracer(self) -> None:
|
||||
self.ray_tracer = RayTracing(
|
||||
# pyre-ignore[32]
|
||||
**self.ray_tracer_args,
|
||||
object_bounding_sphere=self.object_bounding_sphere,
|
||||
)
|
||||
|
||||
def requires_object_mask(self) -> bool:
|
||||
return True
|
||||
|
||||
@@ -105,13 +90,14 @@ class SignedDistanceFunctionRenderer(BaseRenderer, torch.nn.Module): # pyre-ign
|
||||
|
||||
# object_mask: silhouette of the object
|
||||
batch_size, *spatial_size, _ = ray_bundle.lengths.shape
|
||||
num_pixels = prod(spatial_size)
|
||||
num_pixels = math.prod(spatial_size)
|
||||
|
||||
cam_loc = ray_bundle.origins.reshape(batch_size, -1, 3)
|
||||
ray_dirs = ray_bundle.directions.reshape(batch_size, -1, 3)
|
||||
object_mask = object_mask.reshape(batch_size, -1)
|
||||
|
||||
with torch.no_grad(), evaluating(implicit_function):
|
||||
# pyre-fixme[29]: `Union[torch.Tensor, torch.nn.Module]` is not a function.
|
||||
points, network_object_mask, dists = self.ray_tracer(
|
||||
sdf=lambda x: implicit_function(x)[
|
||||
:, 0
|
||||
@@ -142,6 +128,7 @@ class SignedDistanceFunctionRenderer(BaseRenderer, torch.nn.Module): # pyre-ign
|
||||
N = surface_points.shape[0]
|
||||
|
||||
# Sample points for the eikonal loss
|
||||
# pyre-fixme[9]
|
||||
eik_bounding_box: float = self.object_bounding_sphere
|
||||
n_eik_points = batch_size * num_pixels // 2
|
||||
eikonal_points = torch.empty(
|
||||
|
||||
@@ -881,43 +881,7 @@ def get_default_args_field(
|
||||
def create():
|
||||
args = get_default_args(C, _do_not_process=_do_not_process)
|
||||
if _hook is not None:
|
||||
with open_dict(args):
|
||||
_hook(args)
|
||||
return args
|
||||
|
||||
return dataclasses.field(default_factory=create)
|
||||
|
||||
|
||||
def _get_default_args_field_from_registry(
|
||||
*,
|
||||
base_class_wanted: Type[_X],
|
||||
name: str,
|
||||
_do_not_process: Tuple[type, ...] = (),
|
||||
_hook: Optional[Callable[[DictConfig], None]] = None,
|
||||
):
|
||||
"""
|
||||
Get a dataclass field which defaults to
|
||||
get_default_args(registry.get(base_class_wanted, name)).
|
||||
|
||||
This is used internally in place of get_default_args_field in
|
||||
order that default values are updated if a class is redefined.
|
||||
|
||||
Args:
|
||||
base_class_wanted: As for registry.get.
|
||||
name: As for registry.get.
|
||||
_do_not_process: As for get_default_args
|
||||
_hook: Function called on the result before returning.
|
||||
|
||||
Returns:
|
||||
function to return new DictConfig object
|
||||
"""
|
||||
|
||||
def create():
|
||||
C = registry.get(base_class_wanted=base_class_wanted, name=name)
|
||||
args = get_default_args(C, _do_not_process=_do_not_process)
|
||||
if _hook is not None:
|
||||
with open_dict(args):
|
||||
_hook(args)
|
||||
_hook(args)
|
||||
return args
|
||||
|
||||
return dataclasses.field(default_factory=create)
|
||||
@@ -1014,9 +978,8 @@ def _process_member(
|
||||
setattr(
|
||||
some_class,
|
||||
args_name,
|
||||
_get_default_args_field_from_registry(
|
||||
base_class_wanted=type_,
|
||||
name=derived_type.__name__,
|
||||
get_default_args_field(
|
||||
derived_type,
|
||||
_do_not_process=_do_not_process + (some_class,),
|
||||
_hook=hook_closed,
|
||||
),
|
||||
|
||||
@@ -74,7 +74,6 @@ class Stats(object):
|
||||
"""
|
||||
stats logging object useful for gathering statistics of training a deep net in pytorch
|
||||
Example:
|
||||
```
|
||||
# init stats structure that logs statistics 'objective' and 'top1e'
|
||||
stats = Stats( ('objective','top1e') )
|
||||
network = init_net() # init a pytorch module (=nueral network)
|
||||
@@ -95,7 +94,6 @@ class Stats(object):
|
||||
# stores the training plots into '/tmp/epoch_stats.pdf'
|
||||
# and plots into a visdom server running at localhost (if running)
|
||||
stats.plot_stats(plot_file='/tmp/epoch_stats.pdf')
|
||||
```
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
@@ -198,6 +196,7 @@ class Stats(object):
|
||||
if verbose:
|
||||
print(f"Adding {add_log_var}")
|
||||
self.log_vars.append(add_log_var)
|
||||
# self.synchronize_logged_vars(self.log_vars, verbose=verbose)
|
||||
|
||||
def update(self, preds, time_start=None, freeze_iter=False, stat_set="train"):
|
||||
|
||||
@@ -229,6 +228,7 @@ class Stats(object):
|
||||
elapsed = time.time() - time_start
|
||||
time_per_it = float(elapsed) / float(it + 1)
|
||||
val = time_per_it
|
||||
# self.stats[stat_set]['sec/it'].update(time_per_it,epoch=epoch,n=1)
|
||||
else:
|
||||
if stat in preds:
|
||||
try:
|
||||
@@ -439,6 +439,7 @@ class Stats(object):
|
||||
self.log_vars = log_vars # !!!
|
||||
|
||||
for stat_set in stat_sets:
|
||||
reference_stat = list(self.stats[stat_set].keys())[0]
|
||||
for stat in log_vars:
|
||||
if stat not in self.stats[stat_set]:
|
||||
if verbose:
|
||||
@@ -465,11 +466,12 @@ class Stats(object):
|
||||
lastep = self.epoch + 1
|
||||
for ep in range(lastep):
|
||||
self.stats[stat_set][stat].update(default_val, n=1, epoch=ep)
|
||||
epoch_self = self.stats[stat_set][reference_stat].get_epoch()
|
||||
epoch_generated = self.stats[stat_set][stat].get_epoch()
|
||||
assert (
|
||||
epoch_generated == self.epoch + 1
|
||||
epoch_self == epoch_generated
|
||||
), "bad epoch of synchronized log_var! %d vs %d" % (
|
||||
self.epoch + 1,
|
||||
epoch_self,
|
||||
epoch_generated,
|
||||
)
|
||||
|
||||
|
||||
@@ -157,6 +157,15 @@ def cat_dataclass(batch, tensor_collator: Callable):
|
||||
return type(elem)(**collated)
|
||||
|
||||
|
||||
def setattr_if_hasattr(obj, name, value):
|
||||
"""
|
||||
Same as setattr(obj, name, value), but does nothing in case `name` is
|
||||
not an attribe of `obj`.
|
||||
"""
|
||||
if hasattr(obj, name):
|
||||
setattr(obj, name, value)
|
||||
|
||||
|
||||
class Timer:
|
||||
"""
|
||||
A simple class for timing execution.
|
||||
|
||||
@@ -84,6 +84,8 @@ class VideoWriter:
|
||||
or a 2-tuple defining the size of the output image.
|
||||
"""
|
||||
|
||||
# pyre-fixme[6]: For 1st param expected `Union[PathLike[str], str]` but got
|
||||
# `Optional[str]`.
|
||||
outfile = os.path.join(self.cache_dir, self.regexp % self.frame_num)
|
||||
|
||||
if isinstance(frame, matplotlib.figure.Figure):
|
||||
@@ -125,6 +127,8 @@ class VideoWriter:
|
||||
video_path: The path to the generated video.
|
||||
"""
|
||||
|
||||
# pyre-fixme[6]: For 1st param expected `Union[PathLike[str], str]` but got
|
||||
# `Optional[str]`.
|
||||
regexp = os.path.join(self.cache_dir, self.regexp)
|
||||
|
||||
if self.output_format == "visdom": # works for ppt too
|
||||
|
||||
@@ -14,22 +14,20 @@ from visdom import Visdom
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def get_visdom_env(visdom_env: str, exp_dir: str) -> str:
|
||||
def get_visdom_env(cfg):
|
||||
"""
|
||||
Parse out visdom environment name from the input config.
|
||||
|
||||
Args:
|
||||
visdom_env: Name of the wisdom environment, could be empty string.
|
||||
exp_dir: Root experiment directory.
|
||||
cfg: The global config file.
|
||||
|
||||
Returns:
|
||||
visdom_env: The name of the visdom environment. If the given visdom_env is
|
||||
empty, return the name of the bottom directory in exp_dir.
|
||||
visdom_env: The name of the visdom environment.
|
||||
"""
|
||||
if len(visdom_env) == 0:
|
||||
visdom_env = exp_dir.split("/")[-1]
|
||||
if len(cfg.visdom_env) == 0:
|
||||
visdom_env = cfg.exp_dir.split("/")[-1]
|
||||
else:
|
||||
visdom_env = visdom_env
|
||||
visdom_env = cfg.visdom_env
|
||||
return visdom_env
|
||||
|
||||
|
||||
|
||||
@@ -215,6 +215,8 @@ def load_obj(
|
||||
"""
|
||||
data_dir = "./"
|
||||
if isinstance(f, (str, bytes, Path)):
|
||||
# pyre-fixme[6]: For 1st param expected `PathLike[Variable[AnyStr <: [str,
|
||||
# bytes]]]` but got `Union[Path, bytes, str]`.
|
||||
data_dir = os.path.dirname(f)
|
||||
if path_manager is None:
|
||||
path_manager = PathManager()
|
||||
|
||||
@@ -65,6 +65,11 @@ from .mesh import (
|
||||
TexturesVertex,
|
||||
)
|
||||
|
||||
try:
|
||||
from .opengl import EGLContext, global_device_context_store, MeshRasterizerOpenGL
|
||||
except (ImportError, ModuleNotFoundError):
|
||||
pass # opengl or pycuda.gl not available, or pytorch3_opengl not in TARGETS.
|
||||
|
||||
from .points import (
|
||||
AlphaCompositor,
|
||||
NormWeightedCompositor,
|
||||
|
||||
@@ -1661,9 +1661,9 @@ def look_at_rotation(
|
||||
|
||||
|
||||
def look_at_view_transform(
|
||||
dist: _BatchFloatType = 1.0,
|
||||
elev: _BatchFloatType = 0.0,
|
||||
azim: _BatchFloatType = 0.0,
|
||||
dist: float = 1.0,
|
||||
elev: float = 0.0,
|
||||
azim: float = 0.0,
|
||||
degrees: bool = True,
|
||||
eye: Optional[Union[Sequence, torch.Tensor]] = None,
|
||||
at=((0, 0, 0),), # (1, 3)
|
||||
|
||||
@@ -10,6 +10,8 @@ import torch
|
||||
import torch.nn as nn
|
||||
|
||||
from ...structures.meshes import Meshes
|
||||
from .rasterizer import MeshRasterizer
|
||||
|
||||
|
||||
# A renderer class should be initialized with a
|
||||
# function for rasterization and a function for shading.
|
||||
@@ -30,11 +32,11 @@ from ...structures.meshes import Meshes
|
||||
class MeshRenderer(nn.Module):
|
||||
"""
|
||||
A class for rendering a batch of heterogeneous meshes. The class should
|
||||
be initialized with a rasterizer (a MeshRasterizer or a MeshRasterizerOpenGL)
|
||||
and shader class which each have a forward function.
|
||||
be initialized with a rasterizer and shader class which each have a forward
|
||||
function.
|
||||
"""
|
||||
|
||||
def __init__(self, rasterizer, shader) -> None:
|
||||
def __init__(self, rasterizer: MeshRasterizer, shader) -> None:
|
||||
super().__init__()
|
||||
self.rasterizer = rasterizer
|
||||
self.shader = shader
|
||||
@@ -67,8 +69,8 @@ class MeshRenderer(nn.Module):
|
||||
class MeshRendererWithFragments(nn.Module):
|
||||
"""
|
||||
A class for rendering a batch of heterogeneous meshes. The class should
|
||||
be initialized with a rasterizer (a MeshRasterizer or a MeshRasterizerOpenGL)
|
||||
and shader class which each have a forward function.
|
||||
be initialized with a rasterizer and shader class which each have a forward
|
||||
function.
|
||||
|
||||
In the forward pass this class returns the `fragments` from which intermediate
|
||||
values such as the depth map can be easily extracted e.g.
|
||||
@@ -78,7 +80,7 @@ class MeshRendererWithFragments(nn.Module):
|
||||
depth = fragments.zbuf
|
||||
"""
|
||||
|
||||
def __init__(self, rasterizer, shader) -> None:
|
||||
def __init__(self, rasterizer: MeshRasterizer, shader) -> None:
|
||||
super().__init__()
|
||||
self.rasterizer = rasterizer
|
||||
self.shader = shader
|
||||
|
||||
@@ -130,9 +130,8 @@ class MeshRasterizerOpenGL(nn.Module):
|
||||
|
||||
Fragments output by MeshRasterizerOpenGL and MeshRasterizer should have near
|
||||
identical pix_to_face, bary_coords and zbuf. However, MeshRasterizerOpenGL does not
|
||||
return Fragments.dists which is only relevant to SoftPhongShader and
|
||||
SoftSilhouetteShader. These do not work with MeshRasterizerOpenGL (because it is
|
||||
not differentiable).
|
||||
return Fragments.dists which is only relevant to SoftPhongShader which doesn't work
|
||||
with MeshRasterizerOpenGL (because it is not differentiable).
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
|
||||
@@ -474,12 +474,6 @@ class Renderer(torch.nn.Module):
|
||||
rot_mat = axis_angle_to_matrix(rot_vec)
|
||||
if first_R_then_T:
|
||||
pos_vec = torch.matmul(rot_mat, pos_vec[..., None])[:, :, 0]
|
||||
LOGGER.debug(
|
||||
"Camera position: %s, rotation: %s. Focal length: %s.",
|
||||
str(pos_vec),
|
||||
str(rot_vec),
|
||||
str(focal_length),
|
||||
)
|
||||
sensor_dir_x = torch.matmul(
|
||||
rot_mat,
|
||||
torch.tensor(
|
||||
@@ -500,56 +494,20 @@ class Renderer(torch.nn.Module):
|
||||
)[:, :, 0]
|
||||
if right_handed:
|
||||
sensor_dir_z *= -1
|
||||
LOGGER.debug(
|
||||
"Sensor direction vectors: %s, %s, %s.",
|
||||
str(sensor_dir_x),
|
||||
str(sensor_dir_y),
|
||||
str(sensor_dir_z),
|
||||
)
|
||||
if orthogonal:
|
||||
sensor_center = pos_vec
|
||||
else:
|
||||
sensor_center = pos_vec + focal_length * sensor_dir_z
|
||||
LOGGER.debug("Sensor center: %s.", str(sensor_center))
|
||||
sensor_luc = ( # Sensor left upper corner.
|
||||
sensor_center
|
||||
- sensor_dir_x * (sensor_size_x / 2.0)
|
||||
- sensor_dir_y * (sensor_size_y / 2.0)
|
||||
)
|
||||
LOGGER.debug("Sensor luc: %s.", str(sensor_luc))
|
||||
pixel_size_x = sensor_size_x / float(width)
|
||||
pixel_size_y = sensor_size_y / float(height)
|
||||
LOGGER.debug(
|
||||
"Pixel sizes (x): %s, (y) %s.", str(pixel_size_x), str(pixel_size_y)
|
||||
)
|
||||
pixel_vec_x: torch.Tensor = sensor_dir_x * pixel_size_x
|
||||
pixel_vec_y: torch.Tensor = sensor_dir_y * pixel_size_y
|
||||
pixel_0_0_center = sensor_luc + 0.5 * pixel_vec_x + 0.5 * pixel_vec_y
|
||||
LOGGER.debug(
|
||||
"Pixel 0 centers: %s, vec x: %s, vec y: %s.",
|
||||
str(pixel_0_0_center),
|
||||
str(pixel_vec_x),
|
||||
str(pixel_vec_y),
|
||||
)
|
||||
if not orthogonal:
|
||||
LOGGER.debug(
|
||||
"Camera horizontal fovs: %s deg.",
|
||||
str(
|
||||
2.0
|
||||
* torch.atan(0.5 * sensor_size_x / focal_length)
|
||||
/ math.pi
|
||||
* 180.0
|
||||
),
|
||||
)
|
||||
LOGGER.debug(
|
||||
"Camera vertical fovs: %s deg.",
|
||||
str(
|
||||
2.0
|
||||
* torch.atan(0.5 * sensor_size_y / focal_length)
|
||||
/ math.pi
|
||||
* 180.0
|
||||
),
|
||||
)
|
||||
# Reduce dimension.
|
||||
focal_length: torch.Tensor = focal_length[:, 0]
|
||||
if batch_processing:
|
||||
|
||||
@@ -323,6 +323,7 @@ def random_quaternions(
|
||||
"""
|
||||
if isinstance(device, str):
|
||||
device = torch.device(device)
|
||||
# pyre-fixme[6]: For 2nd param expected `dtype` but got `Optional[dtype]`.
|
||||
o = torch.randn((n, 4), dtype=dtype, device=device)
|
||||
s = (o * o).sum(1)
|
||||
o = o / _copysign(torch.sqrt(s), o[:, 0])[:, None]
|
||||
|
||||
21
setup.py
21
setup.py
@@ -8,7 +8,6 @@
|
||||
import glob
|
||||
import os
|
||||
import runpy
|
||||
import sys
|
||||
import warnings
|
||||
from typing import List, Optional
|
||||
|
||||
@@ -36,13 +35,6 @@ def get_existing_ccbin(nvcc_args: List[str]) -> Optional[str]:
|
||||
|
||||
|
||||
def get_extensions():
|
||||
no_extension = os.getenv("PYTORCH3D_NO_EXTENSION", "0") == "1"
|
||||
if no_extension:
|
||||
msg = "SKIPPING EXTENSION BUILD. PYTORCH3D WILL NOT WORK!"
|
||||
print(msg, file=sys.stderr)
|
||||
warnings.warn(msg)
|
||||
return []
|
||||
|
||||
this_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
extensions_dir = os.path.join(this_dir, "pytorch3d", "csrc")
|
||||
sources = glob.glob(os.path.join(extensions_dir, "**", "*.cpp"), recursive=True)
|
||||
@@ -54,10 +46,7 @@ def get_extensions():
|
||||
include_dirs = [extensions_dir]
|
||||
|
||||
force_cuda = os.getenv("FORCE_CUDA", "0") == "1"
|
||||
force_no_cuda = os.getenv("PYTORCH3D_FORCE_NO_CUDA", "0") == "1"
|
||||
if (
|
||||
not force_no_cuda and torch.cuda.is_available() and CUDA_HOME is not None
|
||||
) or force_cuda:
|
||||
if (torch.cuda.is_available() and CUDA_HOME is not None) or force_cuda:
|
||||
extension = CUDAExtension
|
||||
sources += source_cuda
|
||||
define_macros += [("WITH_CUDA", None)]
|
||||
@@ -139,7 +128,7 @@ if os.getenv("PYTORCH3D_NO_NINJA", "0") == "1":
|
||||
else:
|
||||
BuildExtension = torch.utils.cpp_extension.BuildExtension
|
||||
|
||||
trainer = "pytorch3d.implicitron_trainer"
|
||||
trainer = "projects.implicitron_trainer"
|
||||
|
||||
setup(
|
||||
name="pytorch3d",
|
||||
@@ -149,10 +138,8 @@ setup(
|
||||
description="PyTorch3D is FAIR's library of reusable components "
|
||||
"for deep Learning with 3D data.",
|
||||
packages=find_packages(
|
||||
exclude=("configs", "tests", "tests.*", "docs.*", "projects.*")
|
||||
)
|
||||
+ [trainer],
|
||||
package_dir={trainer: "projects/implicitron_trainer"},
|
||||
exclude=("configs", "tests", "tests.*", "docs.*", "projects.nerf.*")
|
||||
),
|
||||
install_requires=["fvcore", "iopath"],
|
||||
extras_require={
|
||||
"all": ["matplotlib", "tqdm>4.29.0", "imageio", "ipywidgets"],
|
||||
|
||||
@@ -2,6 +2,6 @@
|
||||
|
||||
This is copied version of docs/tutorials/data/cow_mesh with removed line 6159 (usemtl material_1) to test behavior without usemtl material_1 declaration.
|
||||
|
||||
Thank you to Keenan Crane for allowing the cow mesh model to be used freely in the public domain.
|
||||
Thank you to Keenen Crane for allowing the cow mesh model to be used freely in the public domain.
|
||||
|
||||
###### Source: http://www.cs.cmu.edu/~kmcrane/Projects/ModelRepository/
|
||||
|
||||
@@ -52,11 +52,17 @@ dataset_map_provider_JsonIndexDatasetMapProviderV2_args:
|
||||
dataset_class_type: JsonIndexDataset
|
||||
path_manager_factory_class_type: PathManagerFactory
|
||||
dataset_JsonIndexDataset_args:
|
||||
path_manager: null
|
||||
frame_annotations_file: ''
|
||||
sequence_annotations_file: ''
|
||||
subset_lists_file: ''
|
||||
subsets: null
|
||||
limit_to: 0
|
||||
limit_sequences_to: 0
|
||||
pick_sequence: []
|
||||
exclude_sequence: []
|
||||
limit_category_to: []
|
||||
dataset_root: ''
|
||||
load_images: true
|
||||
load_depths: true
|
||||
load_depth_masks: true
|
||||
@@ -74,6 +80,7 @@ dataset_map_provider_JsonIndexDatasetMapProviderV2_args:
|
||||
n_frames_per_sequence: -1
|
||||
seed: 0
|
||||
sort_frames: false
|
||||
eval_batches: null
|
||||
path_manager_factory_PathManagerFactory_args:
|
||||
silence_logs: true
|
||||
dataset_map_provider_LlffDatasetMapProvider_args:
|
||||
@@ -83,16 +90,6 @@ dataset_map_provider_LlffDatasetMapProvider_args:
|
||||
n_known_frames_for_test: null
|
||||
path_manager_factory_PathManagerFactory_args:
|
||||
silence_logs: true
|
||||
downscale_factor: 4
|
||||
dataset_map_provider_RenderedMeshDatasetMapProvider_args:
|
||||
num_views: 40
|
||||
data_file: null
|
||||
azimuth_range: 180.0
|
||||
resolution: 128
|
||||
use_point_light: true
|
||||
path_manager_factory_class_type: PathManagerFactory
|
||||
path_manager_factory_PathManagerFactory_args:
|
||||
silence_logs: true
|
||||
data_loader_map_provider_SequenceDataLoaderMapProvider_args:
|
||||
batch_size: 1
|
||||
num_workers: 0
|
||||
@@ -106,9 +103,3 @@ data_loader_map_provider_SequenceDataLoaderMapProvider_args:
|
||||
sample_consecutive_frames: false
|
||||
consecutive_frames_max_gap: 0
|
||||
consecutive_frames_max_gap_seconds: 0.1
|
||||
data_loader_map_provider_SimpleDataLoaderMapProvider_args:
|
||||
batch_size: 1
|
||||
num_workers: 0
|
||||
dataset_length_train: 0
|
||||
dataset_length_val: 0
|
||||
dataset_length_test: 0
|
||||
|
||||
@@ -1,24 +1,3 @@
|
||||
log_vars:
|
||||
- loss_rgb_psnr_fg
|
||||
- loss_rgb_psnr
|
||||
- loss_rgb_mse
|
||||
- loss_rgb_huber
|
||||
- loss_depth_abs
|
||||
- loss_depth_abs_fg
|
||||
- loss_mask_neg_iou
|
||||
- loss_mask_bce
|
||||
- loss_mask_beta_prior
|
||||
- loss_eikonal
|
||||
- loss_density_tv
|
||||
- loss_depth_neg_penalty
|
||||
- loss_autodecoder_norm
|
||||
- loss_prev_stage_rgb_mse
|
||||
- loss_prev_stage_rgb_psnr_fg
|
||||
- loss_prev_stage_rgb_psnr
|
||||
- loss_prev_stage_mask_bce
|
||||
- objective
|
||||
- epoch
|
||||
- sec/it
|
||||
mask_images: true
|
||||
mask_depths: true
|
||||
render_image_width: 400
|
||||
@@ -49,13 +28,38 @@ loss_weights:
|
||||
loss_prev_stage_rgb_mse: 1.0
|
||||
loss_mask_bce: 0.0
|
||||
loss_prev_stage_mask_bce: 0.0
|
||||
log_vars:
|
||||
- loss_rgb_psnr_fg
|
||||
- loss_rgb_psnr
|
||||
- loss_rgb_mse
|
||||
- loss_rgb_huber
|
||||
- loss_depth_abs
|
||||
- loss_depth_abs_fg
|
||||
- loss_mask_neg_iou
|
||||
- loss_mask_bce
|
||||
- loss_mask_beta_prior
|
||||
- loss_eikonal
|
||||
- loss_density_tv
|
||||
- loss_depth_neg_penalty
|
||||
- loss_autodecoder_norm
|
||||
- loss_prev_stage_rgb_mse
|
||||
- loss_prev_stage_rgb_psnr_fg
|
||||
- loss_prev_stage_rgb_psnr
|
||||
- loss_prev_stage_mask_bce
|
||||
- objective
|
||||
- epoch
|
||||
- sec/it
|
||||
global_encoder_SequenceAutodecoder_args:
|
||||
autodecoder_args:
|
||||
encoding_dim: 0
|
||||
n_instances: 1
|
||||
n_instances: 0
|
||||
init_scale: 1.0
|
||||
ignore_input: false
|
||||
raysampler_AdaptiveRaySampler_args:
|
||||
image_width: 400
|
||||
image_height: 400
|
||||
sampling_mode_training: mask_sample
|
||||
sampling_mode_evaluation: full_grid
|
||||
n_pts_per_ray_training: 64
|
||||
n_pts_per_ray_evaluation: 64
|
||||
n_rays_per_image_sampled_from_mask: 1024
|
||||
@@ -103,6 +107,7 @@ view_pooler_args:
|
||||
weight_by_ray_angle_gamma: 1.0
|
||||
min_ray_angle_weight: 0.1
|
||||
implicit_function_IdrFeatureField_args:
|
||||
feature_vector_size: 3
|
||||
d_in: 3
|
||||
d_out: 1
|
||||
dims:
|
||||
@@ -120,5 +125,6 @@ implicit_function_IdrFeatureField_args:
|
||||
weight_norm: true
|
||||
n_harmonic_functions_xyz: 1729
|
||||
pooled_feature_dim: 0
|
||||
encoding_dim: 0
|
||||
view_metrics_ViewMetrics_args: {}
|
||||
regularization_metrics_RegularizationMetrics_args: {}
|
||||
|
||||
@@ -378,20 +378,14 @@ class TestConfig(unittest.TestCase):
|
||||
with self.assertWarnsRegex(
|
||||
UserWarning, "New implementation of Grape is being chosen."
|
||||
):
|
||||
defaulted_bowl = FruitBowl()
|
||||
self.assertIsInstance(defaulted_bowl.main_fruit, Grape)
|
||||
self.assertEqual(defaulted_bowl.main_fruit.large, True)
|
||||
self.assertEqual(defaulted_bowl.main_fruit.get_color(), "green")
|
||||
bowl = FruitBowl(**bowl_args)
|
||||
self.assertIsInstance(bowl.main_fruit, Grape)
|
||||
|
||||
with self.assertWarnsRegex(
|
||||
UserWarning, "New implementation of Grape is being chosen."
|
||||
):
|
||||
args_bowl = FruitBowl(**bowl_args)
|
||||
self.assertIsInstance(args_bowl.main_fruit, Grape)
|
||||
# Redefining the same class won't help with defaults because encoded in args
|
||||
self.assertEqual(args_bowl.main_fruit.large, False)
|
||||
self.assertEqual(bowl.main_fruit.large, False)
|
||||
|
||||
# But the override worked.
|
||||
self.assertEqual(args_bowl.main_fruit.get_color(), "green")
|
||||
self.assertEqual(bowl.main_fruit.get_color(), "green")
|
||||
|
||||
# 2. Try redefining without the dataclass modifier
|
||||
# This relies on the fact that default creation processes the class.
|
||||
@@ -403,7 +397,7 @@ class TestConfig(unittest.TestCase):
|
||||
with self.assertWarnsRegex(
|
||||
UserWarning, "New implementation of Grape is being chosen."
|
||||
):
|
||||
FruitBowl(**bowl_args)
|
||||
bowl = FruitBowl(**bowl_args)
|
||||
|
||||
# 3. Adding a new class doesn't get picked up, because the first
|
||||
# get_default_args call has frozen FruitBowl. This is intrinsic to
|
||||
@@ -691,17 +685,12 @@ class TestConfig(unittest.TestCase):
|
||||
fruit2_class_type: str = "Pear"
|
||||
a: A
|
||||
a2: A
|
||||
a3: A
|
||||
|
||||
@classmethod
|
||||
def a_tweak_args(cls, type, args):
|
||||
assert type == A
|
||||
args.n = 993
|
||||
|
||||
@classmethod
|
||||
def a3_tweak_args(cls, type, args):
|
||||
del args["n"]
|
||||
|
||||
@classmethod
|
||||
def fruit_tweak_args(cls, type, args):
|
||||
assert issubclass(type, Fruit)
|
||||
@@ -712,7 +701,6 @@ class TestConfig(unittest.TestCase):
|
||||
args = get_default_args(Wrapper)
|
||||
self.assertEqual(args.a_args.n, 993)
|
||||
self.assertEqual(args.a2_args.n, 9)
|
||||
self.assertEqual(args.a3_args, {})
|
||||
self.assertEqual(args.fruit_Pear_args.n_pips, 19)
|
||||
self.assertEqual(args.fruit2_Pear_args.n_pips, 13)
|
||||
|
||||
|
||||
@@ -90,5 +90,5 @@ class TestGenericModel(unittest.TestCase):
|
||||
remove_unused_components(instance_args)
|
||||
yaml = OmegaConf.to_yaml(instance_args, sort_keys=False)
|
||||
if DEBUG:
|
||||
(DATA_DIR / "overrides_.yaml").write_text(yaml)
|
||||
(DATA_DIR / "overrides.yaml_").write_text(yaml)
|
||||
self.assertEqual(yaml, (DATA_DIR / "overrides.yaml").read_text())
|
||||
|
||||
@@ -1,57 +0,0 @@
|
||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the BSD-style license found in the
|
||||
# LICENSE file in the root directory of this source tree.
|
||||
|
||||
import os
|
||||
import unittest
|
||||
|
||||
import torch
|
||||
from pytorch3d.implicitron.dataset.dataset_base import FrameData
|
||||
from pytorch3d.implicitron.dataset.rendered_mesh_dataset_map_provider import (
|
||||
RenderedMeshDatasetMapProvider,
|
||||
)
|
||||
from pytorch3d.implicitron.tools.config import expand_args_fields
|
||||
from pytorch3d.renderer import FoVPerspectiveCameras
|
||||
from tests.common_testing import TestCaseMixin
|
||||
|
||||
|
||||
inside_re_worker = os.environ.get("INSIDE_RE_WORKER", False)
|
||||
|
||||
|
||||
class TestDataCow(TestCaseMixin, unittest.TestCase):
|
||||
def test_simple(self):
|
||||
if inside_re_worker:
|
||||
return
|
||||
expand_args_fields(RenderedMeshDatasetMapProvider)
|
||||
self._runtest(use_point_light=True, num_views=4)
|
||||
self._runtest(use_point_light=False, num_views=4)
|
||||
|
||||
def _runtest(self, **kwargs):
|
||||
provider = RenderedMeshDatasetMapProvider(**kwargs)
|
||||
dataset_map = provider.get_dataset_map()
|
||||
known_matrix = torch.zeros(1, 4, 4)
|
||||
known_matrix[0, 0, 0] = 1.7321
|
||||
known_matrix[0, 1, 1] = 1.7321
|
||||
known_matrix[0, 2, 2] = 1.0101
|
||||
known_matrix[0, 3, 2] = -1.0101
|
||||
known_matrix[0, 2, 3] = 1
|
||||
|
||||
self.assertIsNone(dataset_map.val)
|
||||
self.assertIsNone(dataset_map.test)
|
||||
self.assertEqual(len(dataset_map.train), provider.num_views)
|
||||
|
||||
value = dataset_map.train[0]
|
||||
self.assertIsInstance(value, FrameData)
|
||||
|
||||
self.assertEqual(value.image_rgb.shape, (3, 128, 128))
|
||||
self.assertEqual(value.fg_probability.shape, (1, 128, 128))
|
||||
# corner of image is background
|
||||
self.assertEqual(value.fg_probability[0, 0, 0], 0)
|
||||
self.assertEqual(value.fg_probability.max(), 1.0)
|
||||
self.assertIsInstance(value.camera, FoVPerspectiveCameras)
|
||||
self.assertEqual(len(value.camera), 1)
|
||||
self.assertIsNone(value.camera.K)
|
||||
matrix = value.camera.get_projection_transform().get_matrix()
|
||||
self.assertClose(matrix, known_matrix, atol=1e-4)
|
||||
@@ -69,7 +69,6 @@ class TestDataLlff(TestCaseMixin, unittest.TestCase):
|
||||
provider = LlffDatasetMapProvider(
|
||||
base_dir="manifold://co3d/tree/nerf_data/nerf_llff_data/fern",
|
||||
object_name="fern",
|
||||
downscale_factor=8,
|
||||
)
|
||||
dataset_map = provider.get_dataset_map()
|
||||
known_matrix = torch.zeros(1, 4, 4)
|
||||
|
||||
@@ -10,10 +10,6 @@ import unittest.mock
|
||||
|
||||
import torch
|
||||
from omegaconf import OmegaConf
|
||||
from pytorch3d.implicitron.dataset.data_loader_map_provider import (
|
||||
SequenceDataLoaderMapProvider,
|
||||
SimpleDataLoaderMapProvider,
|
||||
)
|
||||
from pytorch3d.implicitron.dataset.data_source import ImplicitronDataSource
|
||||
from pytorch3d.implicitron.dataset.json_index_dataset import JsonIndexDataset
|
||||
from pytorch3d.implicitron.tools.config import get_default_args
|
||||
@@ -68,6 +64,7 @@ class TestDataSource(unittest.TestCase):
|
||||
return
|
||||
args = get_default_args(ImplicitronDataSource)
|
||||
args.dataset_map_provider_class_type = "JsonIndexDatasetMapProvider"
|
||||
args.data_loader_map_provider_class_type = "SequenceDataLoaderMapProvider"
|
||||
dataset_args = args.dataset_map_provider_JsonIndexDatasetMapProvider_args
|
||||
dataset_args.category = "skateboard"
|
||||
dataset_args.test_restrict_sequence_id = 0
|
||||
@@ -76,35 +73,8 @@ class TestDataSource(unittest.TestCase):
|
||||
dataset_args.dataset_root = "manifold://co3d/tree/extracted"
|
||||
|
||||
data_source = ImplicitronDataSource(**args)
|
||||
self.assertIsInstance(
|
||||
data_source.data_loader_map_provider, SequenceDataLoaderMapProvider
|
||||
)
|
||||
_, data_loaders = data_source.get_datasets_and_dataloaders()
|
||||
self.assertEqual(len(data_loaders.train), 81)
|
||||
for i in data_loaders.train:
|
||||
self.assertEqual(i.frame_type, ["test_known"])
|
||||
break
|
||||
|
||||
def test_simple(self):
|
||||
if os.environ.get("INSIDE_RE_WORKER") is not None:
|
||||
return
|
||||
args = get_default_args(ImplicitronDataSource)
|
||||
args.dataset_map_provider_class_type = "JsonIndexDatasetMapProvider"
|
||||
args.data_loader_map_provider_class_type = "SimpleDataLoaderMapProvider"
|
||||
dataset_args = args.dataset_map_provider_JsonIndexDatasetMapProvider_args
|
||||
dataset_args.category = "skateboard"
|
||||
dataset_args.test_restrict_sequence_id = 0
|
||||
dataset_args.n_frames_per_sequence = -1
|
||||
|
||||
dataset_args.dataset_root = "manifold://co3d/tree/extracted"
|
||||
|
||||
data_source = ImplicitronDataSource(**args)
|
||||
self.assertIsInstance(
|
||||
data_source.data_loader_map_provider, SimpleDataLoaderMapProvider
|
||||
)
|
||||
_, data_loaders = data_source.get_datasets_and_dataloaders()
|
||||
|
||||
self.assertEqual(len(data_loaders.train), 81)
|
||||
for i in data_loaders.train:
|
||||
self.assertEqual(i.frame_type, ["test_known"])
|
||||
break
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user