1 Commits

Author SHA1 Message Date
Christoph Lassner
e7c1f026ea [pulsar] Removing LOGGER.debug statements for performance gain.
We identified that these logging statements can deteriorate performance in certain cases. I propose removing them from the regular renderer implementation and letting individuals re-insert debug logging wherever needed on a case-by-case basis.
2022-07-25 09:08:58 -07:00
266 changed files with 4593 additions and 16417 deletions

View File

@@ -159,7 +159,7 @@ jobs:
binary_macos_wheel:
<<: *binary_common
macos:
xcode: "13.4.1"
xcode: "12.0"
steps:
- checkout
- run:

View File

@@ -159,7 +159,7 @@ jobs:
binary_macos_wheel:
<<: *binary_common
macos:
xcode: "13.4.1"
xcode: "12.0"
steps:
- checkout
- run:
@@ -180,6 +180,42 @@ workflows:
jobs:
# - main:
# context: DOCKERHUB_TOKEN
- binary_linux_conda:
context: DOCKERHUB_TOKEN
cu_version: cu101
name: linux_conda_py37_cu101_pyt180
python_version: '3.7'
pytorch_version: 1.8.0
- binary_linux_conda:
context: DOCKERHUB_TOKEN
cu_version: cu102
name: linux_conda_py37_cu102_pyt180
python_version: '3.7'
pytorch_version: 1.8.0
- binary_linux_conda:
context: DOCKERHUB_TOKEN
cu_version: cu111
name: linux_conda_py37_cu111_pyt180
python_version: '3.7'
pytorch_version: 1.8.0
- binary_linux_conda:
context: DOCKERHUB_TOKEN
cu_version: cu101
name: linux_conda_py37_cu101_pyt181
python_version: '3.7'
pytorch_version: 1.8.1
- binary_linux_conda:
context: DOCKERHUB_TOKEN
cu_version: cu102
name: linux_conda_py37_cu102_pyt181
python_version: '3.7'
pytorch_version: 1.8.1
- binary_linux_conda:
context: DOCKERHUB_TOKEN
cu_version: cu111
name: linux_conda_py37_cu111_pyt181
python_version: '3.7'
pytorch_version: 1.8.1
- binary_linux_conda:
context: DOCKERHUB_TOKEN
cu_version: cu102
@@ -307,26 +343,42 @@ workflows:
name: linux_conda_py37_cu116_pyt1120
python_version: '3.7'
pytorch_version: 1.12.0
- binary_linux_conda:
context: DOCKERHUB_TOKEN
cu_version: cu101
name: linux_conda_py38_cu101_pyt180
python_version: '3.8'
pytorch_version: 1.8.0
- binary_linux_conda:
context: DOCKERHUB_TOKEN
cu_version: cu102
name: linux_conda_py37_cu102_pyt1121
python_version: '3.7'
pytorch_version: 1.12.1
name: linux_conda_py38_cu102_pyt180
python_version: '3.8'
pytorch_version: 1.8.0
- binary_linux_conda:
conda_docker_image: pytorch/conda-builder:cuda113
context: DOCKERHUB_TOKEN
cu_version: cu113
name: linux_conda_py37_cu113_pyt1121
python_version: '3.7'
pytorch_version: 1.12.1
cu_version: cu111
name: linux_conda_py38_cu111_pyt180
python_version: '3.8'
pytorch_version: 1.8.0
- binary_linux_conda:
conda_docker_image: pytorch/conda-builder:cuda116
context: DOCKERHUB_TOKEN
cu_version: cu116
name: linux_conda_py37_cu116_pyt1121
python_version: '3.7'
pytorch_version: 1.12.1
cu_version: cu101
name: linux_conda_py38_cu101_pyt181
python_version: '3.8'
pytorch_version: 1.8.1
- binary_linux_conda:
context: DOCKERHUB_TOKEN
cu_version: cu102
name: linux_conda_py38_cu102_pyt181
python_version: '3.8'
pytorch_version: 1.8.1
- binary_linux_conda:
context: DOCKERHUB_TOKEN
cu_version: cu111
name: linux_conda_py38_cu111_pyt181
python_version: '3.8'
pytorch_version: 1.8.1
- binary_linux_conda:
context: DOCKERHUB_TOKEN
cu_version: cu102
@@ -454,26 +506,42 @@ workflows:
name: linux_conda_py38_cu116_pyt1120
python_version: '3.8'
pytorch_version: 1.12.0
- binary_linux_conda:
context: DOCKERHUB_TOKEN
cu_version: cu101
name: linux_conda_py39_cu101_pyt180
python_version: '3.9'
pytorch_version: 1.8.0
- binary_linux_conda:
context: DOCKERHUB_TOKEN
cu_version: cu102
name: linux_conda_py38_cu102_pyt1121
python_version: '3.8'
pytorch_version: 1.12.1
name: linux_conda_py39_cu102_pyt180
python_version: '3.9'
pytorch_version: 1.8.0
- binary_linux_conda:
conda_docker_image: pytorch/conda-builder:cuda113
context: DOCKERHUB_TOKEN
cu_version: cu113
name: linux_conda_py38_cu113_pyt1121
python_version: '3.8'
pytorch_version: 1.12.1
cu_version: cu111
name: linux_conda_py39_cu111_pyt180
python_version: '3.9'
pytorch_version: 1.8.0
- binary_linux_conda:
conda_docker_image: pytorch/conda-builder:cuda116
context: DOCKERHUB_TOKEN
cu_version: cu116
name: linux_conda_py38_cu116_pyt1121
python_version: '3.8'
pytorch_version: 1.12.1
cu_version: cu101
name: linux_conda_py39_cu101_pyt181
python_version: '3.9'
pytorch_version: 1.8.1
- binary_linux_conda:
context: DOCKERHUB_TOKEN
cu_version: cu102
name: linux_conda_py39_cu102_pyt181
python_version: '3.9'
pytorch_version: 1.8.1
- binary_linux_conda:
context: DOCKERHUB_TOKEN
cu_version: cu111
name: linux_conda_py39_cu111_pyt181
python_version: '3.9'
pytorch_version: 1.8.1
- binary_linux_conda:
context: DOCKERHUB_TOKEN
cu_version: cu102
@@ -601,26 +669,6 @@ workflows:
name: linux_conda_py39_cu116_pyt1120
python_version: '3.9'
pytorch_version: 1.12.0
- binary_linux_conda:
context: DOCKERHUB_TOKEN
cu_version: cu102
name: linux_conda_py39_cu102_pyt1121
python_version: '3.9'
pytorch_version: 1.12.1
- binary_linux_conda:
conda_docker_image: pytorch/conda-builder:cuda113
context: DOCKERHUB_TOKEN
cu_version: cu113
name: linux_conda_py39_cu113_pyt1121
python_version: '3.9'
pytorch_version: 1.12.1
- binary_linux_conda:
conda_docker_image: pytorch/conda-builder:cuda116
context: DOCKERHUB_TOKEN
cu_version: cu116
name: linux_conda_py39_cu116_pyt1121
python_version: '3.9'
pytorch_version: 1.12.1
- binary_linux_conda:
context: DOCKERHUB_TOKEN
cu_version: cu102
@@ -667,26 +715,6 @@ workflows:
name: linux_conda_py310_cu116_pyt1120
python_version: '3.10'
pytorch_version: 1.12.0
- binary_linux_conda:
context: DOCKERHUB_TOKEN
cu_version: cu102
name: linux_conda_py310_cu102_pyt1121
python_version: '3.10'
pytorch_version: 1.12.1
- binary_linux_conda:
conda_docker_image: pytorch/conda-builder:cuda113
context: DOCKERHUB_TOKEN
cu_version: cu113
name: linux_conda_py310_cu113_pyt1121
python_version: '3.10'
pytorch_version: 1.12.1
- binary_linux_conda:
conda_docker_image: pytorch/conda-builder:cuda116
context: DOCKERHUB_TOKEN
cu_version: cu116
name: linux_conda_py310_cu116_pyt1121
python_version: '3.10'
pytorch_version: 1.12.1
- binary_linux_conda_cuda:
name: testrun_conda_cuda_py37_cu102_pyt190
context: DOCKERHUB_TOKEN

View File

@@ -20,6 +20,8 @@ from packaging import version
# version of pytorch.
# Pytorch 1.4 also supports cuda 10.0 but we no longer build for cuda 10.0 at all.
CONDA_CUDA_VERSIONS = {
"1.8.0": ["cu101", "cu102", "cu111"],
"1.8.1": ["cu101", "cu102", "cu111"],
"1.9.0": ["cu102", "cu111"],
"1.9.1": ["cu102", "cu111"],
"1.10.0": ["cu102", "cu111", "cu113"],
@@ -27,7 +29,6 @@ CONDA_CUDA_VERSIONS = {
"1.10.2": ["cu102", "cu111", "cu113"],
"1.11.0": ["cu102", "cu111", "cu113", "cu115"],
"1.12.0": ["cu102", "cu113", "cu116"],
"1.12.1": ["cu102", "cu113", "cu116"],
}

View File

@@ -16,13 +16,14 @@ We do not always accept new features, and we take the following factors into con
When sending a PR, please ensure you complete the following steps:
1. Fork the repo and create your branch from `main`. Follow the instructions
1. Fork the repo and create your branch from `master`. Follow the instructions
in [INSTALL.md](../INSTALL.md) to build the repo.
2. If you've added code that should be tested, add tests.
3. If you've changed any APIs, please update the documentation.
4. Ensure the test suite passes, by running this from the project root:
4. Ensure the test suite passes:
```
python -m unittest discover -v -s tests -t .
cd pytorch3d/tests
python -m unittest -v
```
5. Make sure your code lints by running `dev/linter.sh` from the project root.
6. If a PR contains multiple orthogonal changes, split it into multiple separate PRs.

27
.github/workflows/stale.yml vendored Normal file
View File

@@ -0,0 +1,27 @@
name: Mark stale issues and pull requests
on:
schedule:
- cron: '31 5 * * *'
jobs:
stale:
runs-on: ubuntu-latest
permissions:
issues: write
pull-requests: write
steps:
- uses: actions/stale@v3
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
exempt-issue-labels: 'enhancement,how-to'
stale-issue-message: 'This issue is stale because it has been open 30 days with no activity. Remove stale label or comment or this will be closed in 5 days.'
stale-pr-message: 'This PR is stale because it has been open 45 days with no activity. Remove stale label or comment or this will be closed in 10 days.'
close-issue-message: 'This issue was closed because it has been stalled for 5 days with no activity.'
close-pr-message: 'This PR was closed because it has been stalled for 10 days with no activity.'
days-before-issue-stale: 30
days-before-pr-stale: 45
days-before-issue-close: 5
days-before-pr-close: 10

View File

@@ -12,7 +12,6 @@ Key features include:
- Data structure for storing and manipulating triangle meshes
- Efficient operations on triangle meshes (projective transformations, graph convolution, sampling, loss functions)
- A differentiable mesh renderer
- Implicitron, see [its README](projects/implicitron_trainer), a framework for new-view synthesis via implicit representations.
PyTorch3D is designed to integrate smoothly with deep learning methods for predicting and manipulating 3D data.
For this reason, all operators in PyTorch3D:
@@ -94,7 +93,6 @@ In alphabetical order:
* Amitav Baruah
* Steve Branson
* Krzysztof Chalupka
* Luya Gao
* Georgia Gkioxari
* Taylor Gordon
@@ -138,10 +136,6 @@ If you are using the pulsar backend for sphere-rendering (the `PulsarPointRender
Please see below for a timeline of the codebase updates in reverse chronological order. We are sharing updates on the releases as well as research projects which are built with PyTorch3D. The changelogs for the releases are available under [`Releases`](https://github.com/facebookresearch/pytorch3d/releases), and the builds can be installed using `conda` as per the instructions in [INSTALL.md](INSTALL.md).
**[Aug 10th 2022]:** PyTorch3D [v0.7.0](https://github.com/facebookresearch/pytorch3d/releases/tag/v0.7.0) released with Implicitron and MeshRasterizerOpenGL.
**[Apr 28th 2022]:** PyTorch3D [v0.6.2](https://github.com/facebookresearch/pytorch3d/releases/tag/v0.6.2) released
**[Dec 16th 2021]:** PyTorch3D [v0.6.1](https://github.com/facebookresearch/pytorch3d/releases/tag/v0.6.1) released
**[Oct 6th 2021]:** PyTorch3D [v0.6.0](https://github.com/facebookresearch/pytorch3d/releases/tag/v0.6.0) released

View File

@@ -17,8 +17,7 @@ It has no dependencies.
def get_test_files() -> List[Path]:
root = Path(__file__).parent.parent
dirs = ["tests", "projects/implicitron_trainer"]
return [i for dir in dirs for i in (root / dir).glob("**/test*.py")]
return list((root / "tests").glob("**/test*.py"))
def tests_from_file(path: Path, base: str) -> List[str]:

View File

@@ -62,7 +62,7 @@ Below we compare the performance for Objectron (in C++) and our algorithm, in C+
```python
from pytorch3d.ops import box3d_overlap
# Assume inputs: boxes1 (M, 8, 3) and boxes2 (N, 8, 3)
intersection_vol, iou_3d = box3d_overlap(boxes1, boxes2)
intersection_vol, iou_3d = box3d_overal(boxes1, boxes2)
```
For more details, read [iou_box3d.py](https://github.com/facebookresearch/pytorch3d/blob/main/pytorch3d/ops/iou_box3d.py).

View File

@@ -89,7 +89,7 @@
"except ModuleNotFoundError:\n",
" need_pytorch3d=True\n",
"if need_pytorch3d:\n",
" if torch.__version__.startswith(\"1.12.\") and sys.platform.startswith(\"linux\"):\n",
" if torch.__version__.startswith(\"1.11.\") and sys.platform.startswith(\"linux\"):\n",
" # We try to install PyTorch3D via a released wheel.\n",
" pyt_version_str=torch.__version__.split(\"+\")[0].replace(\".\", \"\")\n",
" version_str=\"\".join([\n",

View File

@@ -76,7 +76,7 @@
"except ModuleNotFoundError:\n",
" need_pytorch3d=True\n",
"if need_pytorch3d:\n",
" if torch.__version__.startswith(\"1.12.\") and sys.platform.startswith(\"linux\"):\n",
" if torch.__version__.startswith(\"1.11.\") and sys.platform.startswith(\"linux\"):\n",
" # We try to install PyTorch3D via a released wheel.\n",
" pyt_version_str=torch.__version__.split(\"+\")[0].replace(\".\", \"\")\n",
" version_str=\"\".join([\n",

View File

@@ -1,5 +1,5 @@
# Acknowledgements
Thank you to Keenan Crane for allowing the cow mesh model to be used freely in the public domain.
Thank you to Keenen Crane for allowing the cow mesh model to be used freely in the public domain.
###### Source: http://www.cs.cmu.edu/~kmcrane/Projects/ModelRepository/

View File

@@ -51,7 +51,7 @@
"except ModuleNotFoundError:\n",
" need_pytorch3d=True\n",
"if need_pytorch3d:\n",
" if torch.__version__.startswith(\"1.12.\") and sys.platform.startswith(\"linux\"):\n",
" if torch.__version__.startswith(\"1.11.\") and sys.platform.startswith(\"linux\"):\n",
" # We try to install PyTorch3D via a released wheel.\n",
" pyt_version_str=torch.__version__.split(\"+\")[0].replace(\".\", \"\")\n",
" version_str=\"\".join([\n",

View File

@@ -90,7 +90,7 @@
"except ModuleNotFoundError:\n",
" need_pytorch3d=True\n",
"if need_pytorch3d:\n",
" if torch.__version__.startswith(\"1.12.\") and sys.platform.startswith(\"linux\"):\n",
" if torch.__version__.startswith(\"1.11.\") and sys.platform.startswith(\"linux\"):\n",
" # We try to install PyTorch3D via a released wheel.\n",
" pyt_version_str=torch.__version__.split(\"+\")[0].replace(\".\", \"\")\n",
" version_str=\"\".join([\n",

View File

@@ -56,7 +56,7 @@
"except ModuleNotFoundError:\n",
" need_pytorch3d=True\n",
"if need_pytorch3d:\n",
" if torch.__version__.startswith(\"1.12.\") and sys.platform.startswith(\"linux\"):\n",
" if torch.__version__.startswith(\"1.11.\") and sys.platform.startswith(\"linux\"):\n",
" # We try to install PyTorch3D via a released wheel.\n",
" pyt_version_str=torch.__version__.split(\"+\")[0].replace(\".\", \"\")\n",
" version_str=\"\".join([\n",

View File

@@ -68,7 +68,7 @@
"except ModuleNotFoundError:\n",
" need_pytorch3d=True\n",
"if need_pytorch3d:\n",
" if torch.__version__.startswith(\"1.12.\") and sys.platform.startswith(\"linux\"):\n",
" if torch.__version__.startswith(\"1.11.\") and sys.platform.startswith(\"linux\"):\n",
" # We try to install PyTorch3D via a released wheel.\n",
" pyt_version_str=torch.__version__.split(\"+\")[0].replace(\".\", \"\")\n",
" version_str=\"\".join([\n",

View File

@@ -47,7 +47,7 @@
"except ModuleNotFoundError:\n",
" need_pytorch3d=True\n",
"if need_pytorch3d:\n",
" if torch.__version__.startswith(\"1.12.\") and sys.platform.startswith(\"linux\"):\n",
" if torch.__version__.startswith(\"1.11.\") and sys.platform.startswith(\"linux\"):\n",
" # We try to install PyTorch3D via a released wheel.\n",
" pyt_version_str=torch.__version__.split(\"+\")[0].replace(\".\", \"\")\n",
" version_str=\"\".join([\n",

File diff suppressed because it is too large Load Diff

View File

@@ -1,899 +0,0 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false,
"customInput": null,
"customOutput": null,
"executionStartTime": 1659619824914,
"executionStopTime": 1659619825485,
"originalKey": "d38652e8-200a-413c-a36a-f4d349b78a9d",
"requestMsgId": "641de8aa-0e42-4446-9304-c160a2d226bf",
"showInput": true
},
"outputs": [],
"source": [
"# Copyright (c) Meta Platforms, Inc. and affiliates. All rights reserved."
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"customInput": null,
"originalKey": "a48a9dcf-e80f-474b-a0c4-2c9a765b15c5",
"showInput": false
},
"source": [
"# A simple model using Implicitron\n",
"\n",
"In this demo, we use the VolumeRenderer from PyTorch3D as a custom implicit function in Implicitron. We will see\n",
"* some of the main objects in Implicitron\n",
"* how to plug in a custom part of a model"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"customInput": null,
"originalKey": "51337c0e-ad27-4b75-ad6a-737dca5d7b95",
"showInput": false
},
"source": [
"## 0. Install and import modules\n",
"\n",
"Ensure `torch` and `torchvision` are installed. If `pytorch3d` is not installed, install it using the following cell:\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false,
"customInput": null,
"customOutput": null,
"executionStartTime": 1659619898147,
"executionStopTime": 1659619898274,
"originalKey": "76f1ecd4-6b73-4214-81b0-118ef8d86872",
"requestMsgId": "deb6a860-6923-4227-abef-d31388b5142d",
"showInput": true
},
"outputs": [],
"source": [
"import os\n",
"import sys\n",
"import torch\n",
"need_pytorch3d=False\n",
"try:\n",
" import pytorch3d\n",
"except ModuleNotFoundError:\n",
" need_pytorch3d=True\n",
"if need_pytorch3d:\n",
" if torch.__version__.startswith(\"1.12.\") and sys.platform.startswith(\"linux\"):\n",
" # We try to install PyTorch3D via a released wheel.\n",
" pyt_version_str=torch.__version__.split(\"+\")[0].replace(\".\", \"\")\n",
" version_str=\"\".join([\n",
" f\"py3{sys.version_info.minor}_cu\",\n",
" torch.version.cuda.replace(\".\",\"\"),\n",
" f\"_pyt{pyt_version_str}\"\n",
" ])\n",
" !pip install fvcore iopath\n",
" !pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/{version_str}/download.html\n",
" else:\n",
" # We try to install PyTorch3D from source.\n",
" !curl -LO https://github.com/NVIDIA/cub/archive/1.10.0.tar.gz\n",
" !tar xzf 1.10.0.tar.gz\n",
" os.environ[\"CUB_HOME\"] = os.getcwd() + \"/cub-1.10.0\"\n",
" !pip install 'git+https://github.com/facebookresearch/pytorch3d.git@stable'"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"customInput": null,
"originalKey": "2c1020e6-eb4a-4644-9719-9147500d8e4f",
"showInput": false
},
"source": [
"Ensure omegaconf and visdom are installed. If not, run this cell. (It should not be necessary to restart the runtime.)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"customInput": null,
"customOutput": null,
"originalKey": "9e751931-a38d-44c9-9ff1-ac2f7d3a3f99",
"showInput": true
},
"outputs": [],
"source": [
"!pip install omegaconf visdom"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"code_folding": [],
"collapsed": false,
"customOutput": null,
"executionStartTime": 1659612480556,
"executionStopTime": 1659612480644,
"hidden_ranges": [],
"originalKey": "86807e4a-1675-4520-a033-c7af85b233ec",
"requestMsgId": "880a7e20-4a90-4b37-a5eb-bccc0b23cac6"
},
"outputs": [],
"source": [
"import logging\n",
"from typing import Tuple\n",
"\n",
"import matplotlib.animation as animation\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import torch\n",
"import tqdm\n",
"from IPython.display import HTML\n",
"from omegaconf import OmegaConf\n",
"from PIL import Image\n",
"from pytorch3d.implicitron.dataset.dataset_base import FrameData\n",
"from pytorch3d.implicitron.dataset.rendered_mesh_dataset_map_provider import RenderedMeshDatasetMapProvider\n",
"from pytorch3d.implicitron.models.generic_model import GenericModel\n",
"from pytorch3d.implicitron.models.implicit_function.base import ImplicitFunctionBase, ImplicitronRayBundle\n",
"from pytorch3d.implicitron.models.renderer.base import EvaluationMode\n",
"from pytorch3d.implicitron.tools.config import get_default_args, registry, remove_unused_components\n",
"from pytorch3d.renderer.implicit.renderer import VolumeSampler\n",
"from pytorch3d.structures import Volumes\n",
"from pytorch3d.vis.plotly_vis import plot_batch_individually, plot_scene"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"code_folding": [],
"collapsed": false,
"customInput": null,
"customOutput": null,
"executionStartTime": 1659610929375,
"executionStopTime": 1659610929383,
"hidden_ranges": [],
"originalKey": "b2d9f5bd-a9d4-4f78-b21e-92f2658e0fe9",
"requestMsgId": "7e43e623-4030-438b-af4e-b96170c9a052",
"showInput": true
},
"outputs": [],
"source": [
"output_resolution = 80"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"code_folding": [],
"collapsed": false,
"customInput": null,
"customOutput": null,
"executionStartTime": 1659610930042,
"executionStopTime": 1659610930050,
"hidden_ranges": [],
"originalKey": "0b0c2087-4c86-4c57-b0ee-6f48a70a9c78",
"requestMsgId": "46883aad-f00b-4fd4-ac17-eec0b2ac272a",
"showInput": true
},
"outputs": [],
"source": [
"torch.set_printoptions(sci_mode=False)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"customInput": null,
"originalKey": "37809d0d-b02e-42df-85b6-cdd038373653",
"showInput": false
},
"source": [
"## 1. Load renders of a mesh (the cow mesh) as a dataset\n",
"\n",
"A dataset's train, val and test parts in Implicitron are represented as a `dataset_map`, and provided by an implementation of `DatasetMapProvider`. \n",
"`RenderedMeshDatasetMapProvider` is one which generates a single-scene dataset with only a train component by taking a mesh and rendering it.\n",
"We use it with the cow mesh."
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"collapsed": false,
"customInput": null,
"customOutput": null,
"executionStartTime": 1659620739780,
"executionStopTime": 1659620739914,
"originalKey": "cc68cb9c-b8bf-4e9e-bef1-2cfafdf6caa2",
"requestMsgId": "398cfcae-5d43-4b6f-9c75-db3d297364d4",
"showInput": false
},
"source": [
"If running this notebook using **Google Colab**, run the following cell to fetch the mesh obj and texture files and save it at the path data/cow_mesh.\n",
"If running locally, the data is already available at the correct path."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"customInput": null,
"customOutput": null,
"originalKey": "2c55e002-a885-4169-8fdc-af9078b05968",
"showInput": true
},
"outputs": [],
"source": [
"!mkdir -p data/cow_mesh\n",
"!wget -P data/cow_mesh https://dl.fbaipublicfiles.com/pytorch3d/data/cow_mesh/cow.obj\n",
"!wget -P data/cow_mesh https://dl.fbaipublicfiles.com/pytorch3d/data/cow_mesh/cow.mtl\n",
"!wget -P data/cow_mesh https://dl.fbaipublicfiles.com/pytorch3d/data/cow_mesh/cow_texture.png"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"code_folding": [],
"collapsed": false,
"customOutput": null,
"executionStartTime": 1659621652237,
"executionStopTime": 1659621652903,
"hidden_ranges": [],
"originalKey": "eb77aaec-048c-40bd-bd69-0e66b6ab60b1",
"requestMsgId": "09b9975c-ff86-41c9-b4a9-975d23afc562",
"showInput": true
},
"outputs": [],
"source": [
"cow_provider = RenderedMeshDatasetMapProvider(\n",
" data_file=\"data/cow_mesh/cow.obj\",\n",
" use_point_light=False,\n",
" resolution=output_resolution,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"code_folding": [],
"collapsed": false,
"customInput": null,
"customOutput": null,
"executionStartTime": 1659610966145,
"executionStopTime": 1659610966255,
"hidden_ranges": [],
"originalKey": "8210e15b-da48-4306-a49a-41c4e7e7d42f",
"requestMsgId": "c243edd2-a106-4fba-8471-dfa4f99a2088",
"showInput": true
},
"outputs": [],
"source": [
"dataset_map = cow_provider.get_dataset_map()\n",
"tr_cameras = [training_frame.camera for training_frame in dataset_map.train]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"code_folding": [],
"collapsed": false,
"customInput": null,
"customOutput": null,
"executionStartTime": 1659610967703,
"executionStopTime": 1659610967848,
"hidden_ranges": [],
"originalKey": "458d72ad-d9a7-4f13-b5b7-90d2aec61c16",
"requestMsgId": "7f9431f3-8717-4d89-a7fe-1420dd0e00c4",
"showInput": true
},
"outputs": [],
"source": [
"# The cameras are all in the XZ plane, in a circle about 2.7 from the origin\n",
"centers = torch.cat([i.get_camera_center() for i in tr_cameras])\n",
"print(centers.min(0).values)\n",
"print(centers.max(0).values)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"code_folding": [],
"collapsed": false,
"customInput": null,
"customOutput": null,
"executionStartTime": 1659552920194,
"executionStopTime": 1659552923122,
"hidden_ranges": [],
"originalKey": "931e712b-b141-437a-97fb-dc2a07ce3458",
"requestMsgId": "931e712b-b141-437a-97fb-dc2a07ce3458",
"showInput": true
},
"outputs": [],
"source": [
"# visualization of the cameras\n",
"plot = plot_scene({\"k\": {i: camera for i, camera in enumerate(tr_cameras)}}, camera_scale=0.25)\n",
"plot.layout.scene.aspectmode = \"data\"\n",
"plot"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"customInput": null,
"originalKey": "afa9c02d-f76b-4f68-83e9-9733c615406b",
"showInput": false
},
"source": [
"## 2. Custom implicit function 🧊\n",
"\n",
"At the core of neural rendering methods are functions of spatial coordinates called implicit functions, which are used in some kind of rendering process.\n",
"(Often those functions can additionally take other data as well, such as view direction.)\n",
"A common rendering process is ray marching over densities and colors provided by an implicit function.\n",
"In our case, taking samples from a 3D volume grid is a very simple function of spatial coordinates. \n",
"\n",
"Here we define our own implicit function, which uses PyTorch3D's existing functionality for sampling from a volume grid.\n",
"We do this by subclassing `ImplicitFunctionBase`.\n",
"We need to register our subclass with a special decorator.\n",
"We use Python's dataclass annotations for configuring the module."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"code_folding": [],
"collapsed": false,
"customInput": null,
"customOutput": null,
"executionStartTime": 1659613575850,
"executionStopTime": 1659613575940,
"hidden_ranges": [],
"originalKey": "61b55043-dc52-4de7-992e-e2195edd2123",
"requestMsgId": "dfaace3c-098c-4ffe-9240-6a7ae0ff271e",
"showInput": true
},
"outputs": [],
"source": [
"@registry.register\n",
"class MyVolumes(ImplicitFunctionBase, torch.nn.Module):\n",
" grid_resolution: int = 50 # common HWD of volumes, the number of voxels in each direction\n",
" extent: float = 1.0 # In world coordinates, the volume occupies is [-extent, extent] along each axis\n",
"\n",
" def __post_init__(self):\n",
" # We have to call this explicitly if there are other base classes like Module\n",
" super().__init__()\n",
"\n",
" # We define parameters like other torch.nn.Module objects.\n",
" # In this case, both our parameter tensors are trainable; they govern the contents of the volume grid.\n",
" density = torch.full((self.grid_resolution, self.grid_resolution, self.grid_resolution), -2.0)\n",
" self.density = torch.nn.Parameter(density)\n",
" color = torch.full((3, self.grid_resolution, self.grid_resolution, self.grid_resolution), 0.0)\n",
" self.color = torch.nn.Parameter(color)\n",
" self.density_activation = torch.nn.Softplus()\n",
"\n",
" def forward(\n",
" self,\n",
" ray_bundle: ImplicitronRayBundle,\n",
" fun_viewpool=None,\n",
" global_code=None,\n",
" ):\n",
" densities = self.density_activation(self.density[None, None])\n",
" voxel_size = 2.0 * float(self.extent) / self.grid_resolution\n",
" features = self.color.sigmoid()[None]\n",
"\n",
" # Like other PyTorch3D structures, the actual Volumes object should only exist as long\n",
" # as one iteration of training. It is local to this function.\n",
"\n",
" volume = Volumes(densities=densities, features=features, voxel_size=voxel_size)\n",
" sampler = VolumeSampler(volumes=volume)\n",
" densities, features = sampler(ray_bundle)\n",
"\n",
" # When an implicit function is used for raymarching, i.e. for MultiPassEmissionAbsorptionRenderer,\n",
" # it must return (densities, features, an auxiliary tuple)\n",
" return densities, features, {}\n"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"customInput": null,
"originalKey": "abaf2cd6-1b68-400e-a142-8fb9f49953f3",
"showInput": false
},
"source": [
"## 3. Construct the model object.\n",
"\n",
"The main model object in PyTorch3D is `GenericModel`, which has pluggable components for the major steps, including the renderer and the implicit function(s).\n",
"There are two ways to construct it which are equivalent here."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false,
"customInput": null,
"customOutput": null,
"executionStartTime": 1659621267561,
"executionStopTime": 1659621267938,
"originalKey": "f26c3dce-fbae-4592-bd0e-e4a8abc57c2c",
"requestMsgId": "9213687e-1caf-46a8-a4e5-a9c531530092",
"showInput": true
},
"outputs": [],
"source": [
"CONSTRUCT_MODEL_FROM_CONFIG = True\n",
"if CONSTRUCT_MODEL_FROM_CONFIG:\n",
" # Via a DictConfig - this is how our training loop with hydra works\n",
" cfg = get_default_args(GenericModel)\n",
" cfg.implicit_function_class_type = \"MyVolumes\"\n",
" cfg.render_image_height=output_resolution\n",
" cfg.render_image_width=output_resolution\n",
" cfg.loss_weights={\"loss_rgb_huber\": 1.0}\n",
" cfg.tqdm_trigger_threshold=19000\n",
" cfg.raysampler_AdaptiveRaySampler_args.scene_extent= 4.0\n",
" gm = GenericModel(**cfg)\n",
"else:\n",
" # constructing GenericModel directly\n",
" gm = GenericModel(\n",
" implicit_function_class_type=\"MyVolumes\",\n",
" render_image_height=output_resolution,\n",
" render_image_width=output_resolution,\n",
" loss_weights={\"loss_rgb_huber\": 1.0},\n",
" tqdm_trigger_threshold=19000,\n",
" raysampler_AdaptiveRaySampler_args = {\"scene_extent\": 4.0}\n",
" )\n",
"\n",
" # In this case we can get the equivalent DictConfig cfg object to the way gm is configured as follows\n",
" cfg = OmegaConf.structured(gm)\n"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"code_folding": [],
"collapsed": false,
"customInput": null,
"customOutput": null,
"executionStartTime": 1659611214689,
"executionStopTime": 1659611214748,
"hidden_ranges": [],
"originalKey": "4e659f7d-ce66-4999-83de-005eb09d7705",
"requestMsgId": "7b815b2b-cf19-44d0-ae89-76fde6df35ec",
"showInput": false
},
"source": [
" The default renderer is an emission-absorbtion raymarcher. We keep that default."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"code_folding": [],
"collapsed": false,
"customInput": null,
"customOutput": null,
"executionStartTime": 1659621268007,
"executionStopTime": 1659621268190,
"hidden_ranges": [],
"originalKey": "d37ae488-c57c-44d3-9def-825dc1a6495b",
"requestMsgId": "71143ec1-730f-4876-8a14-e46eea9d6dd1",
"showInput": true
},
"outputs": [],
"source": [
"# We can display the configuration in use as follows.\n",
"remove_unused_components(cfg)\n",
"yaml = OmegaConf.to_yaml(cfg, sort_keys=False)\n",
"%page -r yaml"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"code_folding": [],
"collapsed": false,
"customInput": null,
"customOutput": null,
"executionStartTime": 1659621268727,
"executionStopTime": 1659621268776,
"hidden_ranges": [],
"originalKey": "52e53179-3c6e-4c1f-a38a-3a6d803687bb",
"requestMsgId": "05de9bc3-3f74-4a6f-851c-9ec919b59506",
"showInput": true
},
"outputs": [],
"source": [
"device = torch.device(\"cuda:0\")\n",
"gm.to(device)\n",
"assert next(gm.parameters()).is_cuda"
]
},
{
"cell_type": "markdown",
"metadata": {
"customInput": null,
"originalKey": "528a7d53-c645-49c2-9021-09adbb18cd23",
"showInput": false
},
"source": [
"## 4. train the model "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"code_folding": [],
"collapsed": false,
"customInput": null,
"customOutput": null,
"executionStartTime": 1659621270236,
"executionStopTime": 1659621270446,
"hidden_ranges": [],
"originalKey": "953280bd-3161-42ba-8dcb-0c8ef2d5cc25",
"requestMsgId": "9bba424b-7bfd-4e5a-9d79-ae316e20bab0",
"showInput": true
},
"outputs": [],
"source": [
"train_data_collated = [FrameData.collate([frame.to(device)]) for frame in dataset_map.train]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"code_folding": [],
"collapsed": false,
"customInput": null,
"customOutput": null,
"executionStartTime": 1659621270815,
"executionStopTime": 1659621270948,
"hidden_ranges": [],
"originalKey": "2fcf07f0-0c28-49c7-8c76-1c9a9d810167",
"requestMsgId": "821deb43-6084-4ece-83c3-dee214562c47",
"showInput": true
},
"outputs": [],
"source": [
"gm.train()\n",
"optimizer = torch.optim.Adam(gm.parameters(), lr=0.1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"code_folding": [],
"collapsed": false,
"customOutput": null,
"executionStartTime": 1659621271875,
"executionStopTime": 1659621298146,
"hidden_ranges": [],
"originalKey": "105099f7-ed0c-4e7f-a976-61a93fd0a8fe",
"requestMsgId": "0c87c108-83e3-4129-ad02-85e0140f1368",
"showInput": true
},
"outputs": [],
"source": [
"iterator = tqdm.tqdm(range(2000))\n",
"for n_batch in iterator:\n",
" optimizer.zero_grad()\n",
"\n",
" frame = train_data_collated[n_batch % len(dataset_map.train)]\n",
" out = gm(**frame, evaluation_mode=EvaluationMode.TRAINING)\n",
" out[\"objective\"].backward()\n",
" if n_batch % 100 == 0:\n",
" iterator.set_postfix_str(f\"loss: {float(out['objective']):.5f}\")\n",
" optimizer.step()"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"collapsed": false,
"customInput": null,
"customOutput": null,
"executionStartTime": 1659535024768,
"executionStopTime": 1659535024906,
"originalKey": "e3cd494a-536b-48bc-8290-c048118c82eb",
"requestMsgId": "e3cd494a-536b-48bc-8290-c048118c82eb",
"showInput": false
},
"source": [
"## 5. Evaluate the module\n",
"\n",
"We generate complete images from all the viewpoints to see how they look."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"code_folding": [],
"collapsed": false,
"customInput": null,
"customOutput": null,
"executionStartTime": 1659621299859,
"executionStopTime": 1659621311133,
"hidden_ranges": [],
"originalKey": "fbe1b2ea-cc24-4b20-a2d7-0249185e34a5",
"requestMsgId": "771ef1f8-5eee-4932-9e81-33604bf0512a",
"showInput": true
},
"outputs": [],
"source": [
"def to_numpy_image(image):\n",
" # Takes an image of shape (C, H, W) in [0,1], where C=3 or 1\n",
" # to a numpy uint image of shape (H, W, 3)\n",
" return (image * 255).to(torch.uint8).permute(1, 2, 0).detach().cpu().expand(-1, -1, 3).numpy()\n",
"def resize_image(image):\n",
" # Takes images of shape (B, C, H, W) to (B, C, output_resolution, output_resolution)\n",
" return torch.nn.functional.interpolate(image, size=(output_resolution, output_resolution))\n",
"\n",
"gm.eval()\n",
"images = []\n",
"expected = []\n",
"masks = []\n",
"masks_expected = []\n",
"for frame in tqdm.tqdm(train_data_collated):\n",
" with torch.no_grad():\n",
" out = gm(**frame, evaluation_mode=EvaluationMode.EVALUATION)\n",
"\n",
" image_rgb = to_numpy_image(out[\"images_render\"][0])\n",
" mask = to_numpy_image(out[\"masks_render\"][0])\n",
" expd = to_numpy_image(resize_image(frame.image_rgb)[0])\n",
" mask_expected = to_numpy_image(resize_image(frame.fg_probability)[0])\n",
"\n",
" images.append(image_rgb)\n",
" masks.append(mask)\n",
" expected.append(expd)\n",
" masks_expected.append(mask_expected)"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {
"collapsed": false,
"customInput": null,
"customOutput": null,
"executionStartTime": 1659614622542,
"executionStopTime": 1659614622757,
"originalKey": "24953039-9780-40fd-bd81-5d63e9f40069",
"requestMsgId": "7af895a3-dfe4-4c28-ac3b-4ff0fbb40c7f",
"showInput": false
},
"source": [
"We draw a grid showing predicted image and expected image, followed by predicted mask and expected mask, from each viewpoint. \n",
"This is a grid of four rows of images, wrapped in to several large rows, i.e..\n",
"<small><center>\n",
"```\n",
"┌────────┬────────┐ ┌────────┐\n",
"│pred │pred │ │pred │\n",
"│image │image │ │image │\n",
"│1 │2 │ │n │\n",
"├────────┼────────┤ ├────────┤\n",
"│expected│expected│ │expected│\n",
"│image │image │ ... │image │\n",
"│1 │2 │ │n │\n",
"├────────┼────────┤ ├────────┤\n",
"│pred │pred │ │pred │\n",
"│mask │mask │ │mask │\n",
"│1 │2 │ │n │\n",
"├────────┼────────┤ ├────────┤\n",
"│expected│expected│ │expected│\n",
"│mask │mask │ │mask │\n",
"│1 │2 │ │n │\n",
"├────────┼────────┤ ├────────┤\n",
"│pred │pred │ │pred │\n",
"│image │image │ │image │\n",
"│n+1 │n+1 │ │2n │\n",
"├────────┼────────┤ ├────────┤\n",
"│expected│expected│ │expected│\n",
"│image │image │ ... │image │\n",
"│n+1 │n+2 │ │2n │\n",
"├────────┼────────┤ ├────────┤\n",
"│pred │pred │ │pred │\n",
"│mask │mask │ │mask │\n",
"│n+1 │n+2 │ │2n │\n",
"├────────┼────────┤ ├────────┤\n",
"│expected│expected│ │expected│\n",
"│mask │mask │ │mask │\n",
"│n+1 │n+2 │ │2n │\n",
"└────────┴────────┘ └────────┘\n",
" ...\n",
"```\n",
"</center></small>"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"code_folding": [],
"collapsed": false,
"customInput": null,
"customOutput": null,
"executionStartTime": 1659621313894,
"executionStopTime": 1659621314042,
"hidden_ranges": [],
"originalKey": "c488a34a-e46d-4649-93fb-4b1bb5a0e439",
"requestMsgId": "4221e632-fca1-4fe5-b2e3-f92c37aa40e4",
"showInput": true
},
"outputs": [],
"source": [
"images_to_display = [images.copy(), expected.copy(), masks.copy(), masks_expected.copy()]\n",
"n_rows = 4\n",
"n_images = len(images)\n",
"blank_image = images[0] * 0\n",
"n_per_row = 1+(n_images-1)//n_rows\n",
"for _ in range(n_per_row*n_rows - n_images):\n",
" for group in images_to_display:\n",
" group.append(blank_image)\n",
"\n",
"images_to_display_listed = [[[i] for i in j] for j in images_to_display]\n",
"split = []\n",
"for row in range(n_rows):\n",
" for group in images_to_display_listed:\n",
" split.append(group[row*n_per_row:(row+1)*n_per_row]) \n",
"\n",
"Image.fromarray(np.block(split))\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"code_folding": [],
"collapsed": false,
"customInput": null,
"customOutput": null,
"executionStartTime": 1659621323795,
"executionStopTime": 1659621323820,
"hidden_ranges": [],
"originalKey": "49eab9e1-4fe2-4fbe-b4f3-7b6953340170",
"requestMsgId": "85b402ad-f903-431f-a13e-c2d697e869bb",
"showInput": true
},
"outputs": [],
"source": [
"# Print the maximum channel intensity in the first image.\n",
"print(images[1].max()/255)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"code_folding": [],
"collapsed": false,
"customInput": null,
"customOutput": null,
"executionStartTime": 1659621408642,
"executionStopTime": 1659621409559,
"hidden_ranges": [],
"originalKey": "137d2c43-d39d-4266-ac5e-2b714da5e0ee",
"requestMsgId": "8e27ec57-c2d6-4ae0-be69-b63b6af929ff",
"showInput": true
},
"outputs": [],
"source": [
"plt.ioff()\n",
"fig, ax = plt.subplots(figsize=(3,3))\n",
"\n",
"ax.grid(None)\n",
"ims = [[ax.imshow(im, animated=True)] for im in images]\n",
"ani = animation.ArtistAnimation(fig, ims, interval=80, blit=True)\n",
"ani_html = ani.to_jshtml()\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false,
"customInput": null,
"customOutput": null,
"executionStartTime": 1659621409620,
"executionStopTime": 1659621409725,
"originalKey": "783e70d6-7cf1-4d76-a126-ba11ffc2f5be",
"requestMsgId": "b6843506-c5fa-4508-80fc-8ecae51a934a",
"showInput": true
},
"outputs": [],
"source": [
"HTML(ani_html)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false,
"customInput": null,
"customOutput": null,
"executionStartTime": 1659614670081,
"executionStopTime": 1659614670168,
"originalKey": "0286c350-2362-4f47-8181-2fc2ba51cfcf",
"requestMsgId": "976f4db9-d4c7-466c-bcfd-218234400226",
"showInput": true
},
"outputs": [],
"source": [
"# If you want to see the output of the model with the volume forced to opaque white, run this and re-evaluate\n",
"# with torch.no_grad():\n",
"# gm._implicit_functions[0]._fn.density.fill_(9.0)\n",
"# gm._implicit_functions[0]._fn.color.fill_(9.0)\n"
]
}
],
"metadata": {
"bento_stylesheets": {
"bento/extensions/flow/main.css": true,
"bento/extensions/kernel_selector/main.css": true,
"bento/extensions/kernel_ui/main.css": true,
"bento/extensions/new_kernel/main.css": true,
"bento/extensions/system_usage/main.css": true,
"bento/extensions/theme/main.css": true
},
"captumWidgetMessage": {},
"dataExplorerConfig": {},
"kernelspec": {
"display_name": "pytorch3d",
"language": "python",
"metadata": {
"cinder_runtime": false,
"fbpkg_supported": true,
"is_prebuilt": true,
"kernel_name": "bento_kernel_pytorch3d",
"nightly_builds": true
},
"name": "bento_kernel_pytorch3d"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3"
},
"last_base_url": "https://9177.od.fbinfra.net:443/",
"last_kernel_id": "bb33cd83-7924-489a-8bd8-2d9d62eb0126",
"last_msg_id": "99f7088e-d22b355b859660479ef0574e_5743",
"last_server_session_id": "2944b203-9ea8-4c0e-9634-645dfea5f26b",
"outputWidgetContext": {}
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@@ -50,7 +50,7 @@
"except ModuleNotFoundError:\n",
" need_pytorch3d=True\n",
"if need_pytorch3d:\n",
" if torch.__version__.startswith(\"1.12.\") and sys.platform.startswith(\"linux\"):\n",
" if torch.__version__.startswith(\"1.11.\") and sys.platform.startswith(\"linux\"):\n",
" # We try to install PyTorch3D via a released wheel.\n",
" pyt_version_str=torch.__version__.split(\"+\")[0].replace(\".\", \"\")\n",
" version_str=\"\".join([\n",

View File

@@ -57,7 +57,7 @@
"except ModuleNotFoundError:\n",
" need_pytorch3d=True\n",
"if need_pytorch3d:\n",
" if torch.__version__.startswith(\"1.12.\") and sys.platform.startswith(\"linux\"):\n",
" if torch.__version__.startswith(\"1.11.\") and sys.platform.startswith(\"linux\"):\n",
" # We try to install PyTorch3D via a released wheel.\n",
" pyt_version_str=torch.__version__.split(\"+\")[0].replace(\".\", \"\")\n",
" version_str=\"\".join([\n",
@@ -129,7 +129,7 @@
"## Load the SMPL model\n",
"\n",
"#### Download the SMPL model\n",
"- Go to https://smpl.is.tue.mpg.de/download.php and sign up.\n",
"- Go to http://smpl.is.tue.mpg.de/downloads and sign up.\n",
"- Download SMPL for Python Users and unzip.\n",
"- Copy the file male template file **'models/basicModel_m_lbs_10_207_0_v1.0.0.pkl'** to the data/DensePose/ folder.\n",
" - rename the file to **'smpl_model.pkl'** or rename the string where it's commented below\n",

View File

@@ -73,7 +73,7 @@
"except ModuleNotFoundError:\n",
" need_pytorch3d=True\n",
"if need_pytorch3d:\n",
" if torch.__version__.startswith(\"1.12.\") and sys.platform.startswith(\"linux\"):\n",
" if torch.__version__.startswith(\"1.11.\") and sys.platform.startswith(\"linux\"):\n",
" # We try to install PyTorch3D via a released wheel.\n",
" pyt_version_str=torch.__version__.split(\"+\")[0].replace(\".\", \"\")\n",
" version_str=\"\".join([\n",

View File

@@ -44,8 +44,6 @@ def generate_cow_renders(
data_dir: The folder that contains the cow mesh files. If the cow mesh
files do not exist in the folder, this function will automatically
download them.
azimuth_range: number of degrees on each side of the start position to
take samples
Returns:
cameras: A batch of `num_views` `FoVPerspectiveCameras` from which the

View File

@@ -47,6 +47,7 @@ test:
- imageio
- hydra-core
- accelerate
- lpips
commands:
#pytest .
python -m unittest discover -v -s tests -t .

View File

@@ -37,7 +37,7 @@ See [Running](#running) section below for examples of training and evaluation co
To plug in custom implementations, for example, of renderer or implicit-function protocols, you need to create your own runner script and import the plug-in implementations there.
First, install PyTorch3D and Implicitron dependencies as described in the previous section.
Then, implement the custom script; copying `pytorch3d/projects/implicitron_trainer` is a good place to start.
Then, implement the custom script; copying `pytorch3d/projects/implicitron_trainer/experiment.py` is a good place to start.
See [Custom plugins](#custom-plugins) for more information on how to import implementations and enable them in the configs.
@@ -66,10 +66,8 @@ If you have a custom `experiment.py` script (as in the Option 2 above), replace
To run training, pass a yaml config file, followed by a list of overridden arguments.
For example, to train NeRF on the first skateboard sequence from CO3D dataset, you can run:
```shell
dataset_args=data_source_ImplicitronDataSource_args.dataset_map_provider_JsonIndexDatasetMapProvider_args
pytorch3d_implicitron_runner --config-path ./configs/ --config-name repro_singleseq_nerf \
$dataset_args.dataset_root=<DATASET_ROOT> $dataset_args.category='skateboard' \
$dataset_args.test_restrict_sequence_id=0 test_when_finished=True exp_dir=<CHECKPOINT_DIR>
dataset_args=data_source_args.dataset_map_provider_JsonIndexDatasetMapProvider_args
pytorch3d_implicitron_runner --config-path ./configs/ --config-name repro_singleseq_nerf $dataset_args.dataset_root=<DATASET_ROOT> $dataset_args.category='skateboard' $dataset_args.test_restrict_sequence_id=0 test_when_finished=True exp_dir=<CHECKPOINT_DIR>
```
Here, `--config-path` points to the config path relative to `pytorch3d_implicitron_runner` location;
@@ -87,10 +85,8 @@ To run evaluation on the latest checkpoint after (or during) training, simply ad
E.g. for executing the evaluation on the NeRF skateboard sequence, you can run:
```shell
dataset_args=data_source_ImplicitronDataSource_args.dataset_map_provider_JsonIndexDatasetMapProvider_args
pytorch3d_implicitron_runner --config-path ./configs/ --config-name repro_singleseq_nerf \
$dataset_args.dataset_root=<CO3D_DATASET_ROOT> $dataset_args.category='skateboard' \
$dataset_args.test_restrict_sequence_id=0 exp_dir=<CHECKPOINT_DIR> eval_only=True
dataset_args=data_source_args.dataset_map_provider_JsonIndexDatasetMapProvider_args
pytorch3d_implicitron_runner --config-path ./configs/ --config-name repro_singleseq_nerf $dataset_args.dataset_root=<CO3D_DATASET_ROOT> $dataset_args.category='skateboard' $dataset_args.test_restrict_sequence_id=0 exp_dir=<CHECKPOINT_DIR> eval_only=True
```
Evaluation prints the metrics to `stdout` and dumps them to a json file in `exp_dir`.
@@ -105,8 +101,7 @@ conda install ffmpeg
Here is an example of calling the script:
```shell
projects/implicitron_trainer/visualize_reconstruction.py exp_dir=<CHECKPOINT_DIR> \
visdom_show_preds=True n_eval_cameras=40 render_size="[64,64]" video_size="[256,256]"
projects/implicitron_trainer/visualize_reconstruction.py exp_dir=<CHECKPOINT_DIR> visdom_show_preds=True n_eval_cameras=40 render_size="[64,64]" video_size="[256,256]"
```
The argument `n_eval_cameras` sets the number of renderring viewpoints sampled on a trajectory, which defaults to a circular fly-around;
@@ -129,21 +124,18 @@ In the config, inner parameters can be propagated using `_args` postfix, e.g. to
The root of the hierarchy is defined by `ExperimentConfig` dataclass.
It has top-level fields like `eval_only` which was used above for running evaluation by adding a CLI override.
Additionally, it has non-leaf nodes like `model_factory_ImplicitronModelFactory_args.model_GenericModel_args`, which dispatches the config parameters to `GenericModel`.
Thus, changing the model parameters may be achieved in two ways: either by editing the config file, e.g.
Additionally, it has non-leaf nodes like `generic_model_args`, which dispatches the config parameters to `GenericModel`. Thus, changing the model parameters may be achieved in two ways: either by editing the config file, e.g.
```yaml
model_factory_ImplicitronModelFactory_args:
model_GenericModel_args:
render_image_width: 800
raysampler_args:
n_pts_per_ray_training: 128
generic_model_args:
render_image_width: 800
raysampler_args:
n_pts_per_ray_training: 128
```
or, equivalently, by adding the following to `pytorch3d_implicitron_runner` arguments:
```shell
model_args=model_factory_ImplicitronModelFactory_args.model_GenericModel_args
$model_args.render_image_width=800 $model_args.raysampler_args.n_pts_per_ray_training=128
generic_model_args.render_image_width=800 generic_model_args.raysampler_args.n_pts_per_ray_training=128
```
See the documentation in `pytorch3d/implicitron/tools/config.py` for more details.
@@ -157,12 +149,11 @@ This means that other Configurables can refer to them using the base type, while
In that case, `_args` node name has to include the implementation type.
More specifically, to change renderer settings, the config will look like this:
```yaml
model_factory_ImplicitronModelFactory_args:
model_GenericModel_args:
renderer_class_type: LSTMRenderer
renderer_LSTMRenderer_args:
num_raymarch_steps: 10
hidden_size: 16
generic_model_args:
renderer_class_type: LSTMRenderer
renderer_LSTMRenderer_args:
num_raymarch_steps: 10
hidden_size: 16
```
See the documentation in `pytorch3d/implicitron/tools/config.py` for more details on the configuration system.
@@ -197,46 +188,29 @@ class XRayRenderer(BaseRenderer, torch.nn.Module):
```
Please note `@registry.register` decorator that registers the plug-in as an implementation of `Renderer`.
IMPORTANT: In order for it to run, the class (or its enclosing module) has to be imported in your launch script.
Additionally, this has to be done before parsing the root configuration class `ExperimentConfig`.
IMPORTANT: In order for it to run, the class (or its enclosing module) has to be imported in your launch script. Additionally, this has to be done before parsing the root configuration class `ExperimentConfig`.
Simply add `import .x_ray_renderer` in the beginning of `experiment.py`.
After that, you should be able to change the config with:
```yaml
model_factory_ImplicitronModelFactory_args:
model_GenericModel_args:
renderer_class_type: XRayRenderer
renderer_XRayRenderer_args:
n_pts_per_ray: 128
generic_model_args:
renderer_class_type: XRayRenderer
renderer_XRayRenderer_args:
n_pts_per_ray: 128
```
to replace the implementation and potentially override the parameters.
# Code and config structure
The main object for this trainer loop is `Experiment`. It has four top-level replaceable components.
* `data_source`: This is a `DataSourceBase` which defaults to `ImplicitronDataSource`.
It constructs the data sets and dataloaders.
* `model_factory`: This is a `ModelFactoryBase` which defaults to `ImplicitronModelFactory`.
It constructs the model, which is usually an instance of implicitron's main `GenericModel` class, and can load its weights from a checkpoint.
* `optimizer_factory`: This is an `OptimizerFactoryBase` which defaults to `ImplicitronOptimizerFactory`.
It constructs the optimizer and can load its weights from a checkpoint.
* `training_loop`: This is a `TrainingLoopBase` which defaults to `ImplicitronTrainingLoop` and defines the main training loop.
As per above, the config structure is parsed automatically from the module hierarchy.
In particular, for ImplicitronModelFactory with generic model, model parameters are contained in the `model_factory_ImplicitronModelFactory_args.model_GenericModel_args` node, and dataset parameters in `data_source_ImplicitronDataSource_args` node.
In particular, model parameters are contained in `generic_model_args` node, and dataset parameters in `data_source_args` node.
Here is the class structure of GenericModel (single-line edges show aggregation, while double lines show available implementations):
Here is the class structure (single-line edges show aggregation, while double lines show available implementations):
```
model_GenericModel_args: GenericModel
└-- global_encoder_*_args: GlobalEncoderBase
╘== SequenceAutodecoder
└-- autodecoder_args: Autodecoder
╘== HarmonicTimeEncoder
└-- raysampler_*_args: RaySampler
╘== AdaptiveRaysampler
╘== NearFarRaysampler
generic_model_args: GenericModel
└-- sequence_autodecoder_args: Autodecoder
└-- raysampler_args: RaySampler
└-- renderer_*_args: BaseRenderer
╘== MultiPassEmissionAbsorptionRenderer
╘== LSTMRenderer
@@ -254,17 +228,19 @@ model_GenericModel_args: GenericModel
╘== IdrFeatureField
└-- image_feature_extractor_*_args: FeatureExtractorBase
╘== ResNetFeatureExtractor
└-- view_pooler_args: ViewPooler
└-- view_sampler_args: ViewSampler
└-- feature_aggregator_*_args: FeatureAggregatorBase
╘== IdentityFeatureAggregator
╘== AngleWeightedIdentityFeatureAggregator
╘== AngleWeightedReductionFeatureAggregator
╘== ReductionFeatureAggregator
└-- view_sampler_args: ViewSampler
└-- feature_aggregator_*_args: FeatureAggregatorBase
╘== IdentityFeatureAggregator
╘== AngleWeightedIdentityFeatureAggregator
╘== AngleWeightedReductionFeatureAggregator
╘== ReductionFeatureAggregator
solver_args: init_optimizer
data_source_args: ImplicitronDataSource
└-- dataset_map_provider_*_args
└-- data_loader_map_provider_*_args
```
Please look at the annotations of the respective classes or functions for the lists of hyperparameters.
`tests/experiment.yaml` shows every possible option if you have no user-defined classes.
# Reproducing CO3D experiments

View File

@@ -2,11 +2,10 @@ defaults:
- default_config
- _self_
exp_dir: ./data/exps/base/
training_loop_ImplicitronTrainingLoop_args:
visdom_port: 8097
visualize_interval: 0
max_epochs: 1000
data_source_ImplicitronDataSource_args:
architecture: generic
visualize_interval: 0
visdom_port: 8097
data_source_args:
data_loader_map_provider_class_type: SequenceDataLoaderMapProvider
dataset_map_provider_class_type: JsonIndexDatasetMapProvider
data_loader_map_provider_SequenceDataLoaderMapProvider_args:
@@ -22,59 +21,55 @@ data_source_ImplicitronDataSource_args:
load_point_clouds: false
mask_depths: false
mask_images: false
model_factory_ImplicitronModelFactory_args:
model_GenericModel_args:
loss_weights:
loss_mask_bce: 1.0
loss_prev_stage_mask_bce: 1.0
loss_autodecoder_norm: 0.01
loss_rgb_mse: 1.0
loss_prev_stage_rgb_mse: 1.0
output_rasterized_mc: false
chunk_size_grid: 102400
render_image_height: 400
render_image_width: 400
num_passes: 2
implicit_function_NeuralRadianceFieldImplicitFunction_args:
n_harmonic_functions_xyz: 10
n_harmonic_functions_dir: 4
n_hidden_neurons_xyz: 256
n_hidden_neurons_dir: 128
n_layers_xyz: 8
append_xyz:
- 5
raysampler_AdaptiveRaySampler_args:
n_rays_per_image_sampled_from_mask: 1024
scene_extent: 8.0
n_pts_per_ray_training: 64
n_pts_per_ray_evaluation: 64
stratified_point_sampling_training: true
stratified_point_sampling_evaluation: false
renderer_MultiPassEmissionAbsorptionRenderer_args:
n_pts_per_ray_fine_training: 64
n_pts_per_ray_fine_evaluation: 64
append_coarse_samples_to_fine: true
density_noise_std_train: 1.0
view_pooler_args:
view_sampler_args:
masked_sampling: false
image_feature_extractor_ResNetFeatureExtractor_args:
stages:
- 1
- 2
- 3
- 4
proj_dim: 16
image_rescale: 0.32
first_max_pool: false
optimizer_factory_ImplicitronOptimizerFactory_args:
breed: Adam
weight_decay: 0.0
lr_policy: MultiStepLR
multistep_lr_milestones: []
generic_model_args:
loss_weights:
loss_mask_bce: 1.0
loss_prev_stage_mask_bce: 1.0
loss_autodecoder_norm: 0.01
loss_rgb_mse: 1.0
loss_prev_stage_rgb_mse: 1.0
output_rasterized_mc: false
chunk_size_grid: 102400
render_image_height: 400
render_image_width: 400
num_passes: 2
implicit_function_NeuralRadianceFieldImplicitFunction_args:
n_harmonic_functions_xyz: 10
n_harmonic_functions_dir: 4
n_hidden_neurons_xyz: 256
n_hidden_neurons_dir: 128
n_layers_xyz: 8
append_xyz:
- 5
latent_dim: 0
raysampler_AdaptiveRaySampler_args:
n_rays_per_image_sampled_from_mask: 1024
scene_extent: 8.0
n_pts_per_ray_training: 64
n_pts_per_ray_evaluation: 64
stratified_point_sampling_training: true
stratified_point_sampling_evaluation: false
renderer_MultiPassEmissionAbsorptionRenderer_args:
n_pts_per_ray_fine_training: 64
n_pts_per_ray_fine_evaluation: 64
append_coarse_samples_to_fine: true
density_noise_std_train: 1.0
view_pooler_args:
view_sampler_args:
masked_sampling: false
image_feature_extractor_ResNetFeatureExtractor_args:
stages:
- 1
- 2
- 3
- 4
proj_dim: 16
image_rescale: 0.32
first_max_pool: false
solver_args:
breed: adam
lr: 0.0005
gamma: 0.1
lr_policy: multistep
max_epochs: 2000
momentum: 0.9
betas:
- 0.9
- 0.999
weight_decay: 0.0

View File

@@ -1,18 +1,17 @@
model_factory_ImplicitronModelFactory_args:
model_GenericModel_args:
image_feature_extractor_class_type: ResNetFeatureExtractor
image_feature_extractor_ResNetFeatureExtractor_args:
add_images: true
add_masks: true
first_max_pool: true
image_rescale: 0.375
l2_norm: true
name: resnet34
normalize_image: true
pretrained: true
stages:
- 1
- 2
- 3
- 4
proj_dim: 32
generic_model_args:
image_feature_extractor_class_type: ResNetFeatureExtractor
image_feature_extractor_ResNetFeatureExtractor_args:
add_images: true
add_masks: true
first_max_pool: true
image_rescale: 0.375
l2_norm: true
name: resnet34
normalize_image: true
pretrained: true
stages:
- 1
- 2
- 3
- 4
proj_dim: 32

View File

@@ -1,18 +1,17 @@
model_factory_ImplicitronModelFactory_args:
model_GenericModel_args:
image_feature_extractor_class_type: ResNetFeatureExtractor
image_feature_extractor_ResNetFeatureExtractor_args:
add_images: true
add_masks: true
first_max_pool: false
image_rescale: 0.375
l2_norm: true
name: resnet34
normalize_image: true
pretrained: true
stages:
- 1
- 2
- 3
- 4
proj_dim: 16
generic_model_args:
image_feature_extractor_class_type: ResNetFeatureExtractor
image_feature_extractor_ResNetFeatureExtractor_args:
add_images: true
add_masks: true
first_max_pool: false
image_rescale: 0.375
l2_norm: true
name: resnet34
normalize_image: true
pretrained: true
stages:
- 1
- 2
- 3
- 4
proj_dim: 16

View File

@@ -1,19 +1,18 @@
model_factory_ImplicitronModelFactory_args:
model_GenericModel_args:
image_feature_extractor_class_type: ResNetFeatureExtractor
image_feature_extractor_ResNetFeatureExtractor_args:
stages:
- 1
- 2
- 3
first_max_pool: false
proj_dim: -1
l2_norm: false
image_rescale: 0.375
name: resnet34
normalize_image: true
pretrained: true
view_pooler_args:
feature_aggregator_AngleWeightedReductionFeatureAggregator_args:
reduction_functions:
- AVG
generic_model_args:
image_feature_extractor_class_type: ResNetFeatureExtractor
image_feature_extractor_ResNetFeatureExtractor_args:
stages:
- 1
- 2
- 3
first_max_pool: false
proj_dim: -1
l2_norm: false
image_rescale: 0.375
name: resnet34
normalize_image: true
pretrained: true
view_pooler_args:
feature_aggregator_AngleWeightedReductionFeatureAggregator_args:
reduction_functions:
- AVG

View File

@@ -1,7 +1,7 @@
defaults:
- repro_base.yaml
- _self_
data_source_ImplicitronDataSource_args:
data_source_args:
data_loader_map_provider_SequenceDataLoaderMapProvider_args:
batch_size: 10
dataset_length_train: 1000
@@ -26,13 +26,10 @@ data_source_ImplicitronDataSource_args:
n_frames_per_sequence: -1
test_on_train: true
test_restrict_sequence_id: 0
optimizer_factory_ImplicitronOptimizerFactory_args:
multistep_lr_milestones:
- 1000
training_loop_ImplicitronTrainingLoop_args:
solver_args:
max_epochs: 3000
evaluator_ImplicitronEvaluator_args:
camera_difficulty_bin_breaks:
- 0.666667
- 0.833334
is_multisequence: true
milestones:
- 1000
camera_difficulty_bin_breaks:
- 0.666667
- 0.833334

View File

@@ -1,8 +0,0 @@
data_source_ImplicitronDataSource_args:
dataset_map_provider_class_type: JsonIndexDatasetMapProviderV2
dataset_map_provider_JsonIndexDatasetMapProviderV2_args:
category: teddybear
subset_name: fewview_dev
training_loop_ImplicitronTrainingLoop_args:
evaluator_ImplicitronEvaluator_args:
is_multisequence: true

View File

@@ -1,65 +1,65 @@
defaults:
- repro_multiseq_base.yaml
- _self_
model_factory_ImplicitronModelFactory_args:
model_GenericModel_args:
loss_weights:
loss_mask_bce: 100.0
loss_kl: 0.0
loss_rgb_mse: 1.0
loss_eikonal: 0.1
chunk_size_grid: 65536
num_passes: 1
output_rasterized_mc: true
sampling_mode_training: mask_sample
global_encoder_class_type: SequenceAutodecoder
global_encoder_SequenceAutodecoder_args:
autodecoder_args:
n_instances: 20000
init_scale: 1.0
encoding_dim: 256
implicit_function_IdrFeatureField_args:
n_harmonic_functions_xyz: 6
bias: 0.6
d_in: 3
d_out: 1
generic_model_args:
loss_weights:
loss_mask_bce: 100.0
loss_kl: 0.0
loss_rgb_mse: 1.0
loss_eikonal: 0.1
chunk_size_grid: 65536
num_passes: 1
output_rasterized_mc: true
sampling_mode_training: mask_sample
global_encoder_class_type: SequenceAutodecoder
global_encoder_SequenceAutodecoder_args:
autodecoder_args:
n_instances: 20000
init_scale: 1.0
encoding_dim: 256
implicit_function_IdrFeatureField_args:
n_harmonic_functions_xyz: 6
bias: 0.6
d_in: 3
d_out: 1
dims:
- 512
- 512
- 512
- 512
- 512
- 512
- 512
- 512
geometric_init: true
pooled_feature_dim: 0
skip_in:
- 6
weight_norm: true
renderer_SignedDistanceFunctionRenderer_args:
ray_tracer_args:
line_search_step: 0.5
line_step_iters: 3
n_secant_steps: 8
n_steps: 100
object_bounding_sphere: 8.0
sdf_threshold: 5.0e-05
ray_normal_coloring_network_args:
d_in: 9
d_out: 3
dims:
- 512
- 512
- 512
- 512
- 512
- 512
- 512
- 512
geometric_init: true
mode: idr
n_harmonic_functions_dir: 4
pooled_feature_dim: 0
skip_in:
- 6
weight_norm: true
renderer_SignedDistanceFunctionRenderer_args:
ray_tracer_args:
line_search_step: 0.5
line_step_iters: 3
n_secant_steps: 8
n_steps: 100
sdf_threshold: 5.0e-05
ray_normal_coloring_network_args:
d_in: 9
d_out: 3
dims:
- 512
- 512
- 512
- 512
mode: idr
n_harmonic_functions_dir: 4
pooled_feature_dim: 0
weight_norm: true
raysampler_AdaptiveRaySampler_args:
n_rays_per_image_sampled_from_mask: 1024
n_pts_per_ray_training: 0
n_pts_per_ray_evaluation: 0
scene_extent: 8.0
renderer_class_type: SignedDistanceFunctionRenderer
implicit_function_class_type: IdrFeatureField
raysampler_AdaptiveRaySampler_args:
n_rays_per_image_sampled_from_mask: 1024
n_pts_per_ray_training: 0
n_pts_per_ray_evaluation: 0
scene_extent: 8.0
renderer_class_type: SignedDistanceFunctionRenderer
implicit_function_class_type: IdrFeatureField

View File

@@ -1,12 +1,11 @@
defaults:
- repro_multiseq_base.yaml
- _self_
model_factory_ImplicitronModelFactory_args:
model_GenericModel_args:
chunk_size_grid: 16000
view_pooler_enabled: false
global_encoder_class_type: SequenceAutodecoder
global_encoder_SequenceAutodecoder_args:
autodecoder_args:
n_instances: 20000
encoding_dim: 256
generic_model_args:
chunk_size_grid: 16000
view_pooler_enabled: false
global_encoder_class_type: SequenceAutodecoder
global_encoder_SequenceAutodecoder_args:
autodecoder_args:
n_instances: 20000
encoding_dim: 256

View File

@@ -2,11 +2,9 @@ defaults:
- repro_multiseq_base.yaml
- repro_feat_extractor_unnormed.yaml
- _self_
model_factory_ImplicitronModelFactory_args:
model_GenericModel_args:
chunk_size_grid: 16000
view_pooler_enabled: true
raysampler_AdaptiveRaySampler_args:
n_rays_per_image_sampled_from_mask: 850
training_loop_ImplicitronTrainingLoop_args:
clip_grad: 1.0
clip_grad: 1.0
generic_model_args:
chunk_size_grid: 16000
view_pooler_enabled: true
raysampler_AdaptiveRaySampler_args:
n_rays_per_image_sampled_from_mask: 850

View File

@@ -2,17 +2,16 @@ defaults:
- repro_multiseq_base.yaml
- repro_feat_extractor_transformer.yaml
- _self_
model_factory_ImplicitronModelFactory_args:
model_GenericModel_args:
chunk_size_grid: 16000
raysampler_AdaptiveRaySampler_args:
n_rays_per_image_sampled_from_mask: 800
n_pts_per_ray_training: 32
n_pts_per_ray_evaluation: 32
renderer_MultiPassEmissionAbsorptionRenderer_args:
n_pts_per_ray_fine_training: 16
n_pts_per_ray_fine_evaluation: 16
implicit_function_class_type: NeRFormerImplicitFunction
view_pooler_enabled: true
view_pooler_args:
feature_aggregator_class_type: IdentityFeatureAggregator
generic_model_args:
chunk_size_grid: 16000
raysampler_AdaptiveRaySampler_args:
n_rays_per_image_sampled_from_mask: 800
n_pts_per_ray_training: 32
n_pts_per_ray_evaluation: 32
renderer_MultiPassEmissionAbsorptionRenderer_args:
n_pts_per_ray_fine_training: 16
n_pts_per_ray_fine_evaluation: 16
implicit_function_class_type: NeRFormerImplicitFunction
view_pooler_enabled: true
view_pooler_args:
feature_aggregator_class_type: IdentityFeatureAggregator

View File

@@ -1,7 +1,6 @@
defaults:
- repro_multiseq_nerformer.yaml
- _self_
model_factory_ImplicitronModelFactory_args:
model_GenericModel_args:
view_pooler_args:
feature_aggregator_class_type: AngleWeightedIdentityFeatureAggregator
generic_model_args:
view_pooler_args:
feature_aggregator_class_type: AngleWeightedIdentityFeatureAggregator

View File

@@ -1,35 +1,34 @@
defaults:
- repro_multiseq_base.yaml
- _self_
model_factory_ImplicitronModelFactory_args:
model_GenericModel_args:
chunk_size_grid: 16000
view_pooler_enabled: false
n_train_target_views: -1
num_passes: 1
loss_weights:
loss_rgb_mse: 200.0
loss_prev_stage_rgb_mse: 0.0
loss_mask_bce: 1.0
loss_prev_stage_mask_bce: 0.0
loss_autodecoder_norm: 0.001
depth_neg_penalty: 10000.0
global_encoder_class_type: SequenceAutodecoder
global_encoder_SequenceAutodecoder_args:
autodecoder_args:
encoding_dim: 256
n_instances: 20000
raysampler_class_type: NearFarRaySampler
raysampler_NearFarRaySampler_args:
n_rays_per_image_sampled_from_mask: 2048
min_depth: 0.05
max_depth: 0.05
n_pts_per_ray_training: 1
n_pts_per_ray_evaluation: 1
stratified_point_sampling_training: false
stratified_point_sampling_evaluation: false
renderer_class_type: LSTMRenderer
implicit_function_class_type: SRNHyperNetImplicitFunction
optimizer_factory_ImplicitronOptimizerFactory_args:
breed: Adam
generic_model_args:
chunk_size_grid: 16000
view_pooler_enabled: false
n_train_target_views: -1
num_passes: 1
loss_weights:
loss_rgb_mse: 200.0
loss_prev_stage_rgb_mse: 0.0
loss_mask_bce: 1.0
loss_prev_stage_mask_bce: 0.0
loss_autodecoder_norm: 0.001
depth_neg_penalty: 10000.0
global_encoder_class_type: SequenceAutodecoder
global_encoder_SequenceAutodecoder_args:
autodecoder_args:
encoding_dim: 256
n_instances: 20000
raysampler_class_type: NearFarRaySampler
raysampler_NearFarRaySampler_args:
n_rays_per_image_sampled_from_mask: 2048
min_depth: 0.05
max_depth: 0.05
n_pts_per_ray_training: 1
n_pts_per_ray_evaluation: 1
stratified_point_sampling_training: false
stratified_point_sampling_evaluation: false
renderer_class_type: LSTMRenderer
implicit_function_class_type: SRNHyperNetImplicitFunction
solver_args:
breed: adam
lr: 5.0e-05

View File

@@ -1,11 +1,10 @@
defaults:
- repro_multiseq_srn_ad_hypernet.yaml
- _self_
model_factory_ImplicitronModelFactory_args:
model_GenericModel_args:
num_passes: 1
implicit_function_SRNHyperNetImplicitFunction_args:
pixel_generator_args:
n_harmonic_functions: 0
hypernet_args:
n_harmonic_functions: 0
generic_model_args:
num_passes: 1
implicit_function_SRNHyperNetImplicitFunction_args:
pixel_generator_args:
n_harmonic_functions: 0
hypernet_args:
n_harmonic_functions: 0

View File

@@ -2,30 +2,29 @@ defaults:
- repro_multiseq_base.yaml
- repro_feat_extractor_normed.yaml
- _self_
model_factory_ImplicitronModelFactory_args:
model_GenericModel_args:
chunk_size_grid: 32000
num_passes: 1
n_train_target_views: -1
loss_weights:
loss_rgb_mse: 200.0
loss_prev_stage_rgb_mse: 0.0
loss_mask_bce: 1.0
loss_prev_stage_mask_bce: 0.0
loss_autodecoder_norm: 0.0
depth_neg_penalty: 10000.0
raysampler_class_type: NearFarRaySampler
raysampler_NearFarRaySampler_args:
n_rays_per_image_sampled_from_mask: 2048
min_depth: 0.05
max_depth: 0.05
n_pts_per_ray_training: 1
n_pts_per_ray_evaluation: 1
stratified_point_sampling_training: false
stratified_point_sampling_evaluation: false
renderer_class_type: LSTMRenderer
implicit_function_class_type: SRNImplicitFunction
view_pooler_enabled: true
optimizer_factory_ImplicitronOptimizerFactory_args:
breed: Adam
generic_model_args:
chunk_size_grid: 32000
num_passes: 1
n_train_target_views: -1
loss_weights:
loss_rgb_mse: 200.0
loss_prev_stage_rgb_mse: 0.0
loss_mask_bce: 1.0
loss_prev_stage_mask_bce: 0.0
loss_autodecoder_norm: 0.0
depth_neg_penalty: 10000.0
raysampler_class_type: NearFarRaySampler
raysampler_NearFarRaySampler_args:
n_rays_per_image_sampled_from_mask: 2048
min_depth: 0.05
max_depth: 0.05
n_pts_per_ray_training: 1
n_pts_per_ray_evaluation: 1
stratified_point_sampling_training: false
stratified_point_sampling_evaluation: false
renderer_class_type: LSTMRenderer
implicit_function_class_type: SRNImplicitFunction
view_pooler_enabled: true
solver_args:
breed: adam
lr: 5.0e-05

View File

@@ -1,11 +1,10 @@
defaults:
- repro_multiseq_srn_wce.yaml
- _self_
model_factory_ImplicitronModelFactory_args:
model_GenericModel_args:
num_passes: 1
implicit_function_SRNImplicitFunction_args:
pixel_generator_args:
n_harmonic_functions: 0
raymarch_function_args:
n_harmonic_functions: 0
generic_model_args:
num_passes: 1
implicit_function_SRNImplicitFunction_args:
pixel_generator_args:
n_harmonic_functions: 0
raymarch_function_args:
n_harmonic_functions: 0

View File

@@ -1,4 +0,0 @@
defaults:
- repro_multiseq_nerf_wce.yaml
- repro_multiseq_co3dv2_base.yaml
- _self_

View File

@@ -1,4 +0,0 @@
defaults:
- repro_multiseq_nerformer.yaml
- repro_multiseq_co3dv2_base.yaml
- _self_

View File

@@ -1,4 +0,0 @@
defaults:
- repro_multiseq_srn_ad_hypernet.yaml
- repro_multiseq_co3dv2_base.yaml
- _self_

View File

@@ -1,4 +0,0 @@
defaults:
- repro_multiseq_srn_wce.yaml
- repro_multiseq_co3dv2_base.yaml
- _self_

View File

@@ -1,7 +1,7 @@
defaults:
- repro_base
- _self_
data_source_ImplicitronDataSource_args:
data_source_args:
data_loader_map_provider_SequenceDataLoaderMapProvider_args:
batch_size: 1
dataset_length_train: 1000
@@ -12,30 +12,28 @@ data_source_ImplicitronDataSource_args:
n_frames_per_sequence: -1
test_restrict_sequence_id: 0
test_on_train: false
model_factory_ImplicitronModelFactory_args:
model_GenericModel_args:
render_image_height: 800
render_image_width: 800
log_vars:
- loss_rgb_psnr_fg
- loss_rgb_psnr
- loss_eikonal
- loss_prev_stage_rgb_psnr
- loss_mask_bce
- loss_prev_stage_mask_bce
- loss_rgb_mse
- loss_prev_stage_rgb_mse
- loss_depth_abs
- loss_depth_abs_fg
- loss_kl
- loss_mask_neg_iou
- objective
- epoch
- sec/it
optimizer_factory_ImplicitronOptimizerFactory_args:
generic_model_args:
render_image_height: 800
render_image_width: 800
log_vars:
- loss_rgb_psnr_fg
- loss_rgb_psnr
- loss_eikonal
- loss_prev_stage_rgb_psnr
- loss_mask_bce
- loss_prev_stage_mask_bce
- loss_rgb_mse
- loss_prev_stage_rgb_mse
- loss_depth_abs
- loss_depth_abs_fg
- loss_kl
- loss_mask_neg_iou
- objective
- epoch
- sec/it
solver_args:
lr: 0.0005
multistep_lr_milestones:
max_epochs: 400
milestones:
- 200
- 300
training_loop_ImplicitronTrainingLoop_args:
max_epochs: 400

View File

@@ -1,8 +0,0 @@
data_source_ImplicitronDataSource_args:
dataset_map_provider_class_type: JsonIndexDatasetMapProviderV2
dataset_map_provider_JsonIndexDatasetMapProviderV2_args:
category: teddybear
subset_name: manyview_dev_0
training_loop_ImplicitronTrainingLoop_args:
evaluator_ImplicitronEvaluator_args:
is_multisequence: false

View File

@@ -1,57 +1,57 @@
defaults:
- repro_singleseq_base
- _self_
model_factory_ImplicitronModelFactory_args:
model_GenericModel_args:
loss_weights:
loss_mask_bce: 100.0
loss_kl: 0.0
loss_rgb_mse: 1.0
loss_eikonal: 0.1
chunk_size_grid: 65536
num_passes: 1
view_pooler_enabled: false
implicit_function_IdrFeatureField_args:
n_harmonic_functions_xyz: 6
bias: 0.6
d_in: 3
d_out: 1
generic_model_args:
loss_weights:
loss_mask_bce: 100.0
loss_kl: 0.0
loss_rgb_mse: 1.0
loss_eikonal: 0.1
chunk_size_grid: 65536
num_passes: 1
view_pooler_enabled: false
implicit_function_IdrFeatureField_args:
n_harmonic_functions_xyz: 6
bias: 0.6
d_in: 3
d_out: 1
dims:
- 512
- 512
- 512
- 512
- 512
- 512
- 512
- 512
geometric_init: true
pooled_feature_dim: 0
skip_in:
- 6
weight_norm: true
renderer_SignedDistanceFunctionRenderer_args:
ray_tracer_args:
line_search_step: 0.5
line_step_iters: 3
n_secant_steps: 8
n_steps: 100
object_bounding_sphere: 8.0
sdf_threshold: 5.0e-05
ray_normal_coloring_network_args:
d_in: 9
d_out: 3
dims:
- 512
- 512
- 512
- 512
- 512
- 512
- 512
- 512
geometric_init: true
mode: idr
n_harmonic_functions_dir: 4
pooled_feature_dim: 0
skip_in:
- 6
weight_norm: true
renderer_SignedDistanceFunctionRenderer_args:
ray_tracer_args:
line_search_step: 0.5
line_step_iters: 3
n_secant_steps: 8
n_steps: 100
sdf_threshold: 5.0e-05
ray_normal_coloring_network_args:
d_in: 9
d_out: 3
dims:
- 512
- 512
- 512
- 512
mode: idr
n_harmonic_functions_dir: 4
pooled_feature_dim: 0
weight_norm: true
raysampler_AdaptiveRaySampler_args:
n_rays_per_image_sampled_from_mask: 1024
n_pts_per_ray_training: 0
n_pts_per_ray_evaluation: 0
renderer_class_type: SignedDistanceFunctionRenderer
implicit_function_class_type: IdrFeatureField
raysampler_AdaptiveRaySampler_args:
n_rays_per_image_sampled_from_mask: 1024
n_pts_per_ray_training: 0
n_pts_per_ray_evaluation: 0
renderer_class_type: SignedDistanceFunctionRenderer
implicit_function_class_type: IdrFeatureField

View File

@@ -1,55 +0,0 @@
defaults:
- repro_singleseq_base
- _self_
exp_dir: "./data/nerf_blender_repro/${oc.env:BLENDER_SINGLESEQ_CLASS}"
data_source_ImplicitronDataSource_args:
data_loader_map_provider_SequenceDataLoaderMapProvider_args:
dataset_length_train: 100
dataset_map_provider_class_type: BlenderDatasetMapProvider
dataset_map_provider_BlenderDatasetMapProvider_args:
base_dir: ${oc.env:BLENDER_DATASET_ROOT}
n_known_frames_for_test: null
object_name: ${oc.env:BLENDER_SINGLESEQ_CLASS}
path_manager_factory_class_type: PathManagerFactory
path_manager_factory_PathManagerFactory_args:
silence_logs: true
model_factory_ImplicitronModelFactory_args:
model_GenericModel_args:
mask_images: false
raysampler_class_type: AdaptiveRaySampler
raysampler_AdaptiveRaySampler_args:
n_pts_per_ray_training: 64
n_pts_per_ray_evaluation: 64
n_rays_per_image_sampled_from_mask: 4096
stratified_point_sampling_training: true
stratified_point_sampling_evaluation: false
scene_extent: 2.0
scene_center:
- 0.0
- 0.0
- 0.0
renderer_MultiPassEmissionAbsorptionRenderer_args:
density_noise_std_train: 0.0
n_pts_per_ray_fine_training: 128
n_pts_per_ray_fine_evaluation: 128
raymarcher_EmissionAbsorptionRaymarcher_args:
blend_output: false
loss_weights:
loss_rgb_mse: 1.0
loss_prev_stage_rgb_mse: 1.0
loss_mask_bce: 0.0
loss_prev_stage_mask_bce: 0.0
loss_autodecoder_norm: 0.00
optimizer_factory_ImplicitronOptimizerFactory_args:
exponential_lr_step_size: 3001
lr_policy: LinearExponential
linear_exponential_lr_milestone: 200
training_loop_ImplicitronTrainingLoop_args:
max_epochs: 6000
metric_print_interval: 10
store_checkpoints_purge: 3
test_when_finished: true
validation_interval: 100

View File

@@ -2,9 +2,8 @@ defaults:
- repro_singleseq_wce_base.yaml
- repro_feat_extractor_unnormed.yaml
- _self_
model_factory_ImplicitronModelFactory_args:
model_GenericModel_args:
chunk_size_grid: 16000
view_pooler_enabled: true
raysampler_AdaptiveRaySampler_args:
n_rays_per_image_sampled_from_mask: 850
generic_model_args:
chunk_size_grid: 16000
view_pooler_enabled: true
raysampler_AdaptiveRaySampler_args:
n_rays_per_image_sampled_from_mask: 850

View File

@@ -2,17 +2,16 @@ defaults:
- repro_singleseq_wce_base.yaml
- repro_feat_extractor_transformer.yaml
- _self_
model_factory_ImplicitronModelFactory_args:
model_GenericModel_args:
chunk_size_grid: 16000
view_pooler_enabled: true
implicit_function_class_type: NeRFormerImplicitFunction
raysampler_AdaptiveRaySampler_args:
n_rays_per_image_sampled_from_mask: 800
n_pts_per_ray_training: 32
n_pts_per_ray_evaluation: 32
renderer_MultiPassEmissionAbsorptionRenderer_args:
n_pts_per_ray_fine_training: 16
n_pts_per_ray_fine_evaluation: 16
view_pooler_args:
feature_aggregator_class_type: IdentityFeatureAggregator
generic_model_args:
chunk_size_grid: 16000
view_pooler_enabled: true
implicit_function_class_type: NeRFormerImplicitFunction
raysampler_AdaptiveRaySampler_args:
n_rays_per_image_sampled_from_mask: 800
n_pts_per_ray_training: 32
n_pts_per_ray_evaluation: 32
renderer_MultiPassEmissionAbsorptionRenderer_args:
n_pts_per_ray_fine_training: 16
n_pts_per_ray_fine_evaluation: 16
view_pooler_args:
feature_aggregator_class_type: IdentityFeatureAggregator

View File

@@ -1,29 +1,28 @@
defaults:
- repro_singleseq_base.yaml
- _self_
model_factory_ImplicitronModelFactory_args:
model_GenericModel_args:
num_passes: 1
chunk_size_grid: 32000
view_pooler_enabled: false
loss_weights:
loss_rgb_mse: 200.0
loss_prev_stage_rgb_mse: 0.0
loss_mask_bce: 1.0
loss_prev_stage_mask_bce: 0.0
loss_autodecoder_norm: 0.0
depth_neg_penalty: 10000.0
raysampler_class_type: NearFarRaySampler
raysampler_NearFarRaySampler_args:
n_rays_per_image_sampled_from_mask: 2048
min_depth: 0.05
max_depth: 0.05
n_pts_per_ray_training: 1
n_pts_per_ray_evaluation: 1
stratified_point_sampling_training: false
stratified_point_sampling_evaluation: false
renderer_class_type: LSTMRenderer
implicit_function_class_type: SRNImplicitFunction
optimizer_factory_ImplicitronOptimizerFactory_args:
breed: Adam
generic_model_args:
num_passes: 1
chunk_size_grid: 32000
view_pooler_enabled: false
loss_weights:
loss_rgb_mse: 200.0
loss_prev_stage_rgb_mse: 0.0
loss_mask_bce: 1.0
loss_prev_stage_mask_bce: 0.0
loss_autodecoder_norm: 0.0
depth_neg_penalty: 10000.0
raysampler_class_type: NearFarRaySampler
raysampler_NearFarRaySampler_args:
n_rays_per_image_sampled_from_mask: 2048
min_depth: 0.05
max_depth: 0.05
n_pts_per_ray_training: 1
n_pts_per_ray_evaluation: 1
stratified_point_sampling_training: false
stratified_point_sampling_evaluation: false
renderer_class_type: LSTMRenderer
implicit_function_class_type: SRNImplicitFunction
solver_args:
breed: adam
lr: 5.0e-05

View File

@@ -1,11 +1,10 @@
defaults:
- repro_singleseq_srn.yaml
- _self_
model_factory_ImplicitronModelFactory_args:
model_GenericModel_args:
num_passes: 1
implicit_function_SRNImplicitFunction_args:
pixel_generator_args:
n_harmonic_functions: 0
raymarch_function_args:
n_harmonic_functions: 0
generic_model_args:
num_passes: 1
implicit_function_SRNImplicitFunction_args:
pixel_generator_args:
n_harmonic_functions: 0
raymarch_function_args:
n_harmonic_functions: 0

View File

@@ -2,29 +2,28 @@ defaults:
- repro_singleseq_wce_base
- repro_feat_extractor_normed.yaml
- _self_
model_factory_ImplicitronModelFactory_args:
model_GenericModel_args:
num_passes: 1
chunk_size_grid: 32000
view_pooler_enabled: true
loss_weights:
loss_rgb_mse: 200.0
loss_prev_stage_rgb_mse: 0.0
loss_mask_bce: 1.0
loss_prev_stage_mask_bce: 0.0
loss_autodecoder_norm: 0.0
depth_neg_penalty: 10000.0
raysampler_class_type: NearFarRaySampler
raysampler_NearFarRaySampler_args:
n_rays_per_image_sampled_from_mask: 2048
min_depth: 0.05
max_depth: 0.05
n_pts_per_ray_training: 1
n_pts_per_ray_evaluation: 1
stratified_point_sampling_training: false
stratified_point_sampling_evaluation: false
renderer_class_type: LSTMRenderer
implicit_function_class_type: SRNImplicitFunction
optimizer_factory_ImplicitronOptimizerFactory_args:
breed: Adam
generic_model_args:
num_passes: 1
chunk_size_grid: 32000
view_pooler_enabled: true
loss_weights:
loss_rgb_mse: 200.0
loss_prev_stage_rgb_mse: 0.0
loss_mask_bce: 1.0
loss_prev_stage_mask_bce: 0.0
loss_autodecoder_norm: 0.0
depth_neg_penalty: 10000.0
raysampler_class_type: NearFarRaySampler
raysampler_NearFarRaySampler_args:
n_rays_per_image_sampled_from_mask: 2048
min_depth: 0.05
max_depth: 0.05
n_pts_per_ray_training: 1
n_pts_per_ray_evaluation: 1
stratified_point_sampling_training: false
stratified_point_sampling_evaluation: false
renderer_class_type: LSTMRenderer
implicit_function_class_type: SRNImplicitFunction
solver_args:
breed: adam
lr: 5.0e-05

View File

@@ -1,11 +1,10 @@
defaults:
- repro_singleseq_srn_wce.yaml
- _self_
model_factory_ImplicitronModelFactory_args:
model_GenericModel_args:
num_passes: 1
implicit_function_SRNImplicitFunction_args:
pixel_generator_args:
n_harmonic_functions: 0
raymarch_function_args:
n_harmonic_functions: 0
generic_model_args:
num_passes: 1
implicit_function_SRNImplicitFunction_args:
pixel_generator_args:
n_harmonic_functions: 0
raymarch_function_args:
n_harmonic_functions: 0

View File

@@ -1,4 +0,0 @@
defaults:
- repro_singleseq_idr.yaml
- repro_singleseq_co3dv2_base.yaml
- _self_

View File

@@ -1,4 +0,0 @@
defaults:
- repro_singleseq_nerf.yaml
- repro_singleseq_co3dv2_base.yaml
- _self_

View File

@@ -1,4 +0,0 @@
defaults:
- repro_singleseq_nerformer.yaml
- repro_singleseq_co3dv2_base.yaml
- _self_

View File

@@ -1,4 +0,0 @@
defaults:
- repro_singleseq_srn_noharm.yaml
- repro_singleseq_co3dv2_base.yaml
- _self_

View File

@@ -1,7 +1,7 @@
defaults:
- repro_singleseq_base
- _self_
data_source_ImplicitronDataSource_args:
data_source_args:
data_loader_map_provider_SequenceDataLoaderMapProvider_args:
batch_size: 10
dataset_length_train: 1000

View File

@@ -8,28 +8,27 @@
""""
This file is the entry point for launching experiments with Implicitron.
Main functions
---------------
- `run_training` is the wrapper for the train, val, test loops
and checkpointing
- `trainvalidate` is the inner loop which runs the model forward/backward
pass, visualizations and metric printing
Launch Training
---------------
Experiment config .yaml files are located in the
`projects/implicitron_trainer/configs` folder. To launch an experiment,
specify the name of the file. Specific config values can also be overridden
from the command line, for example:
`projects/implicitron_trainer/configs` folder. To launch
an experiment, specify the name of the file. Specific config values can
also be overridden from the command line, for example:
```
./experiment.py --config-name base_config.yaml override.param.one=42 override.param.two=84
```
To run an experiment on a specific GPU, specify the `gpu_idx` key in the
config file / CLI. To run on a different device, specify the device in
`run_training`.
Main functions
---------------
- The Experiment class defines `run` which creates the model, optimizer, and other
objects used in training, then starts TrainingLoop's `run` function.
- TrainingLoop takes care of the actual training logic: forward and backward passes,
evaluation and testing, as well as model checkpointing, visualization, and metric
printing.
To run an experiment on a specific GPU, specify the `gpu_idx` key
in the config file / CLI. To run on a different device, specify the
device in `run_training`.
Outputs
--------
@@ -41,51 +40,51 @@ The outputs of the experiment are saved and logged in multiple ways:
Stats are logged and plotted to the file "train_stats.pdf" in the
same directory. The stats are also saved as part of the checkpoint file.
- Visualizations
Predictions are plotted to a visdom server running at the
Prredictions are plotted to a visdom server running at the
port specified by the `visdom_server` and `visdom_port` keys in the
config file.
"""
import copy
import json
import logging
import os
import random
import time
import warnings
from dataclasses import field
from typing import Any, Dict, Optional, Tuple
import hydra
import lpips
import numpy as np
import torch
import tqdm
from accelerate import Accelerator
from omegaconf import DictConfig, OmegaConf
from packaging import version
from pytorch3d.implicitron.dataset.data_source import (
DataSourceBase,
ImplicitronDataSource,
)
from pytorch3d.implicitron.models.generic_model import ImplicitronModelBase
from pytorch3d.implicitron.dataset import utils as ds_utils
from pytorch3d.implicitron.dataset.data_loader_map_provider import DataLoaderMap
from pytorch3d.implicitron.dataset.data_source import ImplicitronDataSource, Task
from pytorch3d.implicitron.dataset.dataset_map_provider import DatasetMap
from pytorch3d.implicitron.evaluation import evaluate_new_view_synthesis as evaluate
from pytorch3d.implicitron.models.generic_model import EvaluationMode, GenericModel
from pytorch3d.implicitron.models.renderer.multipass_ea import (
MultiPassEmissionAbsorptionRenderer,
)
from pytorch3d.implicitron.models.renderer.ray_sampler import AdaptiveRaySampler
from pytorch3d.implicitron.tools import model_io, vis_utils
from pytorch3d.implicitron.tools.config import (
Configurable,
expand_args_fields,
remove_unused_components,
run_auto_creation,
)
from pytorch3d.implicitron.tools.stats import Stats
from pytorch3d.renderer.cameras import CamerasBase
from .impl.model_factory import ModelFactoryBase
from .impl.optimizer_factory import OptimizerFactoryBase
from .impl.training_loop import TrainingLoopBase
from .impl.utils import seed_all_random_engines
from .impl.experiment_config import ExperimentConfig
from .impl.optimization import init_optimizer
logger = logging.getLogger(__name__)
# workaround for https://github.com/facebookresearch/hydra/issues/2262
_RUN = hydra.types.RunMode.RUN
if version.parse(hydra.__version__) < version.Version("1.1"):
raise ValueError(
f"Hydra version {hydra.__version__} is too old."
@@ -101,139 +100,551 @@ except ModuleNotFoundError:
no_accelerate = os.environ.get("PYTORCH3D_NO_ACCELERATE") is not None
class Experiment(Configurable): # pyre-ignore: 13
def init_model(
*,
cfg: DictConfig,
accelerator: Optional[Accelerator] = None,
force_load: bool = False,
clear_stats: bool = False,
load_model_only: bool = False,
) -> Tuple[GenericModel, Stats, Optional[Dict[str, Any]]]:
"""
This class is at the top level of Implicitron's config hierarchy. Its
members are high-level components necessary for training an implicit rende-
ring network.
Returns an instance of `GenericModel`.
Members:
data_source: An object that produces datasets and dataloaders.
model_factory: An object that produces an implicit rendering model as
well as its corresponding Stats object.
optimizer_factory: An object that produces the optimizer and lr
scheduler.
training_loop: An object that runs training given the outputs produced
by the data_source, model_factory and optimizer_factory.
seed: A random seed to ensure reproducibility.
detect_anomaly: Whether torch.autograd should detect anomalies. Useful
for debugging, but might slow down the training.
exp_dir: Root experimentation directory. Checkpoints and training stats
will be saved here.
If `cfg.resume` is set or `force_load` is true,
attempts to load the last checkpoint from `cfg.exp_dir`. Failure to do so
will return the model with initial weights, unless `force_load` is passed,
in which case a FileNotFoundError is raised.
Args:
force_load: If true, force load model from checkpoint even if
cfg.resume is false.
clear_stats: If true, clear the stats object loaded from checkpoint
load_model_only: If true, load only the model weights from checkpoint
and do not load the state of the optimizer and stats.
Returns:
model: The model with optionally loaded weights from checkpoint
stats: The stats structure (optionally loaded from checkpoint)
optimizer_state: The optimizer state dict containing
`state` and `param_groups` keys (optionally loaded from checkpoint)
Raise:
FileNotFoundError if `force_load` is passed but checkpoint is not found.
"""
data_source: DataSourceBase
data_source_class_type: str = "ImplicitronDataSource"
model_factory: ModelFactoryBase
model_factory_class_type: str = "ImplicitronModelFactory"
optimizer_factory: OptimizerFactoryBase
optimizer_factory_class_type: str = "ImplicitronOptimizerFactory"
training_loop: TrainingLoopBase
training_loop_class_type: str = "ImplicitronTrainingLoop"
# Initialize the model
if cfg.architecture == "generic":
model = GenericModel(**cfg.generic_model_args)
else:
raise ValueError(f"No such arch {cfg.architecture}.")
seed: int = 42
detect_anomaly: bool = False
exp_dir: str = "./data/default_experiment/"
# Determine the network outputs that should be logged
if hasattr(model, "log_vars"):
log_vars = copy.deepcopy(list(model.log_vars))
else:
log_vars = ["objective"]
hydra: dict = field(
default_factory=lambda: {
"run": {"dir": "."}, # Make hydra not change the working dir.
"output_subdir": None, # disable storing the .hydra logs
"mode": _RUN,
}
visdom_env_charts = vis_utils.get_visdom_env(cfg) + "_charts"
# Init the stats struct
stats = Stats(
log_vars,
visdom_env=visdom_env_charts,
verbose=False,
visdom_server=cfg.visdom_server,
visdom_port=cfg.visdom_port,
)
def __post_init__(self):
seed_all_random_engines(
self.seed
) # Set all random engine seeds for reproducibility
# Retrieve the last checkpoint
if cfg.resume_epoch > 0:
model_path = model_io.get_checkpoint(cfg.exp_dir, cfg.resume_epoch)
else:
model_path = model_io.find_last_checkpoint(cfg.exp_dir)
run_auto_creation(self)
optimizer_state = None
if model_path is not None:
logger.info("found previous model %s" % model_path)
if force_load or cfg.resume:
logger.info(" -> resuming")
def run(self) -> None:
# Initialize the accelerator if desired.
if no_accelerate:
accelerator = None
device = torch.device("cuda:0")
map_location = None
if accelerator is not None and not accelerator.is_local_main_process:
map_location = {
"cuda:%d" % 0: "cuda:%d" % accelerator.local_process_index
}
if load_model_only:
model_state_dict = torch.load(
model_io.get_model_path(model_path), map_location=map_location
)
stats_load, optimizer_state = None, None
else:
model_state_dict, stats_load, optimizer_state = model_io.load_model(
model_path, map_location=map_location
)
# Determine if stats should be reset
if not clear_stats:
if stats_load is None:
logger.info("\n\n\n\nCORRUPT STATS -> clearing stats\n\n\n\n")
last_epoch = model_io.parse_epoch_from_model_path(model_path)
logger.info(f"Estimated resume epoch = {last_epoch}")
# Reset the stats struct
for _ in range(last_epoch + 1):
stats.new_epoch()
assert last_epoch == stats.epoch
else:
stats = stats_load
# Update stats properties incase it was reset on load
stats.visdom_env = visdom_env_charts
stats.visdom_server = cfg.visdom_server
stats.visdom_port = cfg.visdom_port
stats.plot_file = os.path.join(cfg.exp_dir, "train_stats.pdf")
stats.synchronize_logged_vars(log_vars)
else:
logger.info(" -> clearing stats")
try:
# TODO: fix on creation of the buffers
# after the hack above, this will not pass in most cases
# ... but this is fine for now
model.load_state_dict(model_state_dict, strict=True)
except RuntimeError as e:
logger.error(e)
logger.info("Cant load state dict in strict mode! -> trying non-strict")
model.load_state_dict(model_state_dict, strict=False)
model.log_vars = log_vars
else:
accelerator = Accelerator(device_placement=False)
logger.info(accelerator.state)
device = accelerator.device
logger.info(" -> but not resuming -> starting from scratch")
elif force_load:
raise FileNotFoundError(f"Cannot find a checkpoint in {cfg.exp_dir}!")
logger.info(f"Running experiment on device: {device}")
os.makedirs(self.exp_dir, exist_ok=True)
return model, stats, optimizer_state
# set the debug mode
if self.detect_anomaly:
logger.info("Anomaly detection!")
torch.autograd.set_detect_anomaly(self.detect_anomaly)
# Initialize the datasets and dataloaders.
datasets, dataloaders = self.data_source.get_datasets_and_dataloaders()
def trainvalidate(
model,
stats,
epoch,
loader,
optimizer,
validation: bool,
*,
accelerator: Optional[Accelerator],
device: torch.device,
bp_var: str = "objective",
metric_print_interval: int = 5,
visualize_interval: int = 100,
visdom_env_root: str = "trainvalidate",
clip_grad: float = 0.0,
**kwargs,
) -> None:
"""
This is the main loop for training and evaluation including:
model forward pass, loss computation, backward pass and visualization.
# Init the model and the corresponding Stats object.
model = self.model_factory(
accelerator=accelerator,
exp_dir=self.exp_dir,
)
Args:
model: The model module optionally loaded from checkpoint
stats: The stats struct, also optionally loaded from checkpoint
epoch: The index of the current epoch
loader: The dataloader to use for the loop
optimizer: The optimizer module optionally loaded from checkpoint
validation: If true, run the loop with the model in eval mode
and skip the backward pass
bp_var: The name of the key in the model output `preds` dict which
should be used as the loss for the backward pass.
metric_print_interval: The batch interval at which the stats should be
logged.
visualize_interval: The batch interval at which the visualizations
should be plotted
visdom_env_root: The name of the visdom environment to use for plotting
clip_grad: Optionally clip the gradient norms.
If set to a value <=0.0, no clipping
device: The device on which to run the model.
stats = self.training_loop.load_stats(
log_vars=model.log_vars,
exp_dir=self.exp_dir,
resume=self.model_factory.resume,
resume_epoch=self.model_factory.resume_epoch, # pyre-ignore [16]
)
start_epoch = stats.epoch + 1
Returns:
None
"""
model.to(device)
if validation:
model.eval()
trainmode = "val"
else:
model.train()
trainmode = "train"
# Init the optimizer and LR scheduler.
optimizer, scheduler = self.optimizer_factory(
accelerator=accelerator,
exp_dir=self.exp_dir,
last_epoch=start_epoch,
model=model,
resume=self.model_factory.resume,
resume_epoch=self.model_factory.resume_epoch,
)
t_start = time.time()
# Wrap all modules in the distributed library
# Note: we don't pass the scheduler to prepare as it
# doesn't need to be stepped at each optimizer step
train_loader = dataloaders.train
val_loader = dataloaders.val
test_loader = dataloaders.test
if accelerator is not None:
(
model,
optimizer,
train_loader,
val_loader,
) = accelerator.prepare(model, optimizer, train_loader, val_loader)
# get the visdom env name
visdom_env_imgs = visdom_env_root + "_images_" + trainmode
viz = vis_utils.get_visdom_connection(
server=stats.visdom_server,
port=stats.visdom_port,
)
# pyre-fixme[16]: Optional type has no attribute `is_multisequence`.
if not self.training_loop.evaluator.is_multisequence:
all_train_cameras = self.data_source.all_train_cameras
# Iterate through the batches
n_batches = len(loader)
for it, net_input in enumerate(loader):
last_iter = it == n_batches - 1
# move to gpu where possible (in place)
net_input = net_input.to(device)
# run the forward pass
if not validation:
optimizer.zero_grad()
preds = model(**{**net_input, "evaluation_mode": EvaluationMode.TRAINING})
else:
all_train_cameras = None
with torch.no_grad():
preds = model(
**{**net_input, "evaluation_mode": EvaluationMode.EVALUATION}
)
# Enter the main training loop.
self.training_loop.run(
train_loader=train_loader,
val_loader=val_loader,
test_loader=test_loader,
# pyre-ignore[6]
train_dataset=datasets.train,
model=model,
optimizer=optimizer,
scheduler=scheduler,
all_train_cameras=all_train_cameras,
accelerator=accelerator,
# make sure we dont overwrite something
assert all(k not in preds for k in net_input.keys())
# merge everything into one big dict
preds.update(net_input)
# update the stats logger
stats.update(preds, time_start=t_start, stat_set=trainmode)
assert stats.it[trainmode] == it, "inconsistent stat iteration number!"
# print textual status update
if it % metric_print_interval == 0 or last_iter:
stats.print(stat_set=trainmode, max_it=n_batches)
# visualize results
if (
(accelerator is None or accelerator.is_local_main_process)
and visualize_interval > 0
and it % visualize_interval == 0
):
prefix = f"e{stats.epoch}_it{stats.it[trainmode]}"
model.visualize(
viz,
visdom_env_imgs,
preds,
prefix,
)
# optimizer step
if not validation:
loss = preds[bp_var]
assert torch.isfinite(loss).all(), "Non-finite loss!"
# backprop
if accelerator is None:
loss.backward()
else:
accelerator.backward(loss)
if clip_grad > 0.0:
# Optionally clip the gradient norms.
total_norm = torch.nn.utils.clip_grad_norm(
model.parameters(), clip_grad
)
if total_norm > clip_grad:
logger.info(
f"Clipping gradient: {total_norm}"
+ f" with coef {clip_grad / float(total_norm)}."
)
optimizer.step()
def run_training(cfg: DictConfig) -> None:
"""
Entry point to run the training and validation loops
based on the specified config file.
"""
# Initialize the accelerator
if no_accelerate:
accelerator = None
device = torch.device("cuda:0")
else:
accelerator = Accelerator(device_placement=False)
logger.info(accelerator.state)
device = accelerator.device
logger.info(f"Running experiment on device: {device}")
# set the debug mode
if cfg.detect_anomaly:
logger.info("Anomaly detection!")
torch.autograd.set_detect_anomaly(cfg.detect_anomaly)
# create the output folder
os.makedirs(cfg.exp_dir, exist_ok=True)
_seed_all_random_engines(cfg.seed)
remove_unused_components(cfg)
# dump the exp config to the exp dir
try:
cfg_filename = os.path.join(cfg.exp_dir, "expconfig.yaml")
OmegaConf.save(config=cfg, f=cfg_filename)
except PermissionError:
warnings.warn("Cant dump config due to insufficient permissions!")
# setup datasets
datasource = ImplicitronDataSource(**cfg.data_source_args)
datasets, dataloaders = datasource.get_datasets_and_dataloaders()
task = datasource.get_task()
# init the model
model, stats, optimizer_state = init_model(cfg=cfg, accelerator=accelerator)
start_epoch = stats.epoch + 1
# move model to gpu
model.to(device)
# only run evaluation on the test dataloader
if cfg.eval_only:
_eval_and_dump(
cfg,
task,
datasource.all_train_cameras,
datasets,
dataloaders,
model,
stats,
device=device,
exp_dir=self.exp_dir,
stats=stats,
seed=self.seed,
)
return
# init the optimizer
optimizer, scheduler = init_optimizer(
model,
optimizer_state=optimizer_state,
last_epoch=start_epoch,
**cfg.solver_args,
)
# check the scheduler and stats have been initialized correctly
assert scheduler.last_epoch == stats.epoch + 1
assert scheduler.last_epoch == start_epoch
# Wrap all modules in the distributed library
# Note: we don't pass the scheduler to prepare as it
# doesn't need to be stepped at each optimizer step
train_loader = dataloaders.train
val_loader = dataloaders.val
if accelerator is not None:
(
model,
optimizer,
train_loader,
val_loader,
) = accelerator.prepare(model, optimizer, train_loader, val_loader)
past_scheduler_lrs = []
# loop through epochs
for epoch in range(start_epoch, cfg.solver_args.max_epochs):
# automatic new_epoch and plotting of stats at every epoch start
with stats:
# Make sure to re-seed random generators to ensure reproducibility
# even after restart.
_seed_all_random_engines(cfg.seed + epoch)
cur_lr = float(scheduler.get_last_lr()[-1])
logger.info(f"scheduler lr = {cur_lr:1.2e}")
past_scheduler_lrs.append(cur_lr)
# train loop
trainvalidate(
model,
stats,
epoch,
train_loader,
optimizer,
False,
visdom_env_root=vis_utils.get_visdom_env(cfg),
device=device,
accelerator=accelerator,
**cfg,
)
# val loop (optional)
if val_loader is not None and epoch % cfg.validation_interval == 0:
trainvalidate(
model,
stats,
epoch,
val_loader,
optimizer,
True,
visdom_env_root=vis_utils.get_visdom_env(cfg),
device=device,
accelerator=accelerator,
**cfg,
)
# eval loop (optional)
if (
dataloaders.test is not None
and cfg.test_interval > 0
and epoch % cfg.test_interval == 0
):
_run_eval(
model,
datasource.all_train_cameras,
dataloaders.test,
task,
camera_difficulty_bin_breaks=cfg.camera_difficulty_bin_breaks,
device=device,
)
assert stats.epoch == epoch, "inconsistent stats!"
# delete previous models if required
# save model only on the main process
if cfg.store_checkpoints and (
accelerator is None or accelerator.is_local_main_process
):
if cfg.store_checkpoints_purge > 0:
for prev_epoch in range(epoch - cfg.store_checkpoints_purge):
model_io.purge_epoch(cfg.exp_dir, prev_epoch)
outfile = model_io.get_checkpoint(cfg.exp_dir, epoch)
unwrapped_model = (
model if accelerator is None else accelerator.unwrap_model(model)
)
model_io.safe_save_model(
unwrapped_model, stats, outfile, optimizer=optimizer
)
scheduler.step()
new_lr = float(scheduler.get_last_lr()[-1])
if new_lr != cur_lr:
logger.info(f"LR change! {cur_lr} -> {new_lr}")
if cfg.test_when_finished:
_eval_and_dump(
cfg,
task,
datasource.all_train_cameras,
datasets,
dataloaders,
model,
stats,
device=device,
)
def _eval_and_dump(
cfg,
task: Task,
all_train_cameras: Optional[CamerasBase],
datasets: DatasetMap,
dataloaders: DataLoaderMap,
model,
stats,
device,
) -> None:
"""
Run the evaluation loop with the test data loader and
save the predictions to the `exp_dir`.
"""
dataloader = dataloaders.test
if dataloader is None:
raise ValueError('DataLoaderMap have to contain the "test" entry for eval!')
results = _run_eval(
model,
all_train_cameras,
dataloader,
task,
camera_difficulty_bin_breaks=cfg.camera_difficulty_bin_breaks,
device=device,
)
# add the evaluation epoch to the results
for r in results:
r["eval_epoch"] = int(stats.epoch)
logger.info("Evaluation results")
evaluate.pretty_print_nvs_metrics(results)
with open(os.path.join(cfg.exp_dir, "results_test.json"), "w") as f:
json.dump(results, f)
def _get_eval_frame_data(frame_data):
"""
Masks the unknown image data to make sure we cannot use it at model evaluation time.
"""
frame_data_for_eval = copy.deepcopy(frame_data)
is_known = ds_utils.is_known_frame(frame_data.frame_type).type_as(
frame_data.image_rgb
)[:, None, None, None]
for k in ("image_rgb", "depth_map", "fg_probability", "mask_crop"):
value_masked = getattr(frame_data_for_eval, k).clone() * is_known
setattr(frame_data_for_eval, k, value_masked)
return frame_data_for_eval
def _run_eval(
model,
all_train_cameras,
loader,
task: Task,
camera_difficulty_bin_breaks: Tuple[float, float],
device,
):
"""
Run the evaluation loop on the test dataloader
"""
lpips_model = lpips.LPIPS(net="vgg")
lpips_model = lpips_model.to(device)
model.eval()
per_batch_eval_results = []
logger.info("Evaluating model ...")
for frame_data in tqdm.tqdm(loader):
frame_data = frame_data.to(device)
# mask out the unknown images so that the model does not see them
frame_data_for_eval = _get_eval_frame_data(frame_data)
with torch.no_grad():
preds = model(
**{**frame_data_for_eval, "evaluation_mode": EvaluationMode.EVALUATION}
)
# TODO: Cannot use accelerate gather for two reasons:.
# (1) TypeError: Can't apply _gpu_gather_one on object of type
# <class 'pytorch3d.implicitron.models.base_model.ImplicitronRender'>,
# only of nested list/tuple/dicts of objects that satisfy is_torch_tensor.
# (2) Same error above but for frame_data which contains Cameras.
implicitron_render = copy.deepcopy(preds["implicitron_render"])
per_batch_eval_results.append(
evaluate.eval_batch(
frame_data,
implicitron_render,
bg_color="black",
lpips_model=lpips_model,
source_cameras=all_train_cameras,
)
)
_, category_result = evaluate.summarize_nvs_eval_results(
per_batch_eval_results, task, camera_difficulty_bin_breaks
)
return category_result["results"]
def _seed_all_random_engines(seed: int) -> None:
np.random.seed(seed)
torch.manual_seed(seed)
random.seed(seed)
def _setup_envvars_for_cluster() -> bool:
@@ -267,20 +678,9 @@ def _setup_envvars_for_cluster() -> bool:
return True
def dump_cfg(cfg: DictConfig) -> None:
remove_unused_components(cfg)
# dump the exp config to the exp dir
os.makedirs(cfg.exp_dir, exist_ok=True)
try:
cfg_filename = os.path.join(cfg.exp_dir, "expconfig.yaml")
OmegaConf.save(config=cfg, f=cfg_filename)
except PermissionError:
warnings.warn("Can't dump config due to insufficient permissions!")
expand_args_fields(Experiment)
expand_args_fields(ExperimentConfig)
cs = hydra.core.config_store.ConfigStore.instance()
cs.store(name="default_config", node=Experiment)
cs.store(name="default_config", node=ExperimentConfig)
@hydra.main(config_path="./configs/", config_name="default_config")
@@ -294,14 +694,12 @@ def experiment(cfg: DictConfig) -> None:
logger.info("Running locally")
# TODO: The following may be needed for hydra/submitit it to work
expand_args_fields(ImplicitronModelBase)
expand_args_fields(GenericModel)
expand_args_fields(AdaptiveRaySampler)
expand_args_fields(MultiPassEmissionAbsorptionRenderer)
expand_args_fields(ImplicitronDataSource)
experiment = Experiment(**cfg)
dump_cfg(cfg)
experiment.run()
run_training(cfg)
if __name__ == "__main__":

View File

@@ -0,0 +1,49 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
from dataclasses import field
from typing import Any, Dict, Tuple
from omegaconf import DictConfig
from pytorch3d.implicitron.dataset.data_source import ImplicitronDataSource
from pytorch3d.implicitron.models.generic_model import GenericModel
from pytorch3d.implicitron.tools.config import Configurable, get_default_args_field
from .optimization import init_optimizer
class ExperimentConfig(Configurable):
generic_model_args: DictConfig = get_default_args_field(GenericModel)
solver_args: DictConfig = get_default_args_field(init_optimizer)
data_source_args: DictConfig = get_default_args_field(ImplicitronDataSource)
architecture: str = "generic"
detect_anomaly: bool = False
eval_only: bool = False
exp_dir: str = "./data/default_experiment/"
exp_idx: int = 0
gpu_idx: int = 0
metric_print_interval: int = 5
resume: bool = True
resume_epoch: int = -1
seed: int = 0
store_checkpoints: bool = True
store_checkpoints_purge: int = 1
test_interval: int = -1
test_when_finished: bool = False
validation_interval: int = 1
visdom_env: str = ""
visdom_port: int = 8097
visdom_server: str = "http://127.0.0.1"
visualize_interval: int = 1000
clip_grad: float = 0.0
camera_difficulty_bin_breaks: Tuple[float, ...] = 0.97, 0.98
hydra: Dict[str, Any] = field(
default_factory=lambda: {
"run": {"dir": "."}, # Make hydra not change the working dir.
"output_subdir": None, # disable storing the .hydra logs
}
)

View File

@@ -1,133 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
import logging
import os
from typing import Optional
import torch.optim
from accelerate import Accelerator
from pytorch3d.implicitron.models.base_model import ImplicitronModelBase
from pytorch3d.implicitron.tools import model_io
from pytorch3d.implicitron.tools.config import (
registry,
ReplaceableBase,
run_auto_creation,
)
from pytorch3d.implicitron.tools.stats import Stats
logger = logging.getLogger(__name__)
class ModelFactoryBase(ReplaceableBase):
resume: bool = True # resume from the last checkpoint
def __call__(self, **kwargs) -> ImplicitronModelBase:
"""
Initialize the model (possibly from a previously saved state).
Returns: An instance of ImplicitronModelBase.
"""
raise NotImplementedError()
def load_stats(self, **kwargs) -> Stats:
"""
Initialize or load a Stats object.
"""
raise NotImplementedError()
@registry.register
class ImplicitronModelFactory(ModelFactoryBase): # pyre-ignore [13]
"""
A factory class that initializes an implicit rendering model.
Members:
model: An ImplicitronModelBase object.
resume: If True, attempt to load the last checkpoint from `exp_dir`
passed to __call__. Failure to do so will return a model with ini-
tial weights unless `force_resume` is True.
resume_epoch: If `resume` is True: Resume a model at this epoch, or if
`resume_epoch` <= 0, then resume from the latest checkpoint.
force_resume: If True, throw a FileNotFoundError if `resume` is True but
a model checkpoint cannot be found.
"""
model: ImplicitronModelBase
model_class_type: str = "GenericModel"
resume: bool = True
resume_epoch: int = -1
force_resume: bool = False
def __post_init__(self):
run_auto_creation(self)
def __call__(
self,
exp_dir: str,
accelerator: Optional[Accelerator] = None,
) -> ImplicitronModelBase:
"""
Returns an instance of `ImplicitronModelBase`, possibly loaded from a
checkpoint (if self.resume, self.resume_epoch specify so).
Args:
exp_dir: Root experiment directory.
accelerator: An Accelerator object.
Returns:
model: The model with optionally loaded weights from checkpoint
Raise:
FileNotFoundError if `force_resume` is True but checkpoint not found.
"""
# Determine the network outputs that should be logged
if hasattr(self.model, "log_vars"):
log_vars = list(self.model.log_vars)
else:
log_vars = ["objective"]
if self.resume_epoch > 0:
# Resume from a certain epoch
model_path = model_io.get_checkpoint(exp_dir, self.resume_epoch)
if not os.path.isfile(model_path):
raise ValueError(f"Cannot find model from epoch {self.resume_epoch}.")
else:
# Retrieve the last checkpoint
model_path = model_io.find_last_checkpoint(exp_dir)
if model_path is not None:
logger.info(f"Found previous model {model_path}")
if self.force_resume or self.resume:
logger.info("Resuming.")
map_location = None
if accelerator is not None and not accelerator.is_local_main_process:
map_location = {
"cuda:%d" % 0: "cuda:%d" % accelerator.local_process_index
}
model_state_dict = torch.load(
model_io.get_model_path(model_path), map_location=map_location
)
try:
self.model.load_state_dict(model_state_dict, strict=True)
except RuntimeError as e:
logger.error(e)
logger.info(
"Cannot load state dict in strict mode! -> trying non-strict"
)
self.model.load_state_dict(model_state_dict, strict=False)
self.model.log_vars = log_vars
else:
logger.info("Not resuming -> starting from scratch.")
elif self.force_resume:
raise FileNotFoundError(f"Cannot find a checkpoint in {exp_dir}!")
return self.model

View File

@@ -0,0 +1,109 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
import logging
from typing import Any, Dict, Optional, Tuple
import torch
from pytorch3d.implicitron.models.generic_model import GenericModel
from pytorch3d.implicitron.tools.config import enable_get_default_args
logger = logging.getLogger(__name__)
def init_optimizer(
model: GenericModel,
optimizer_state: Optional[Dict[str, Any]],
last_epoch: int,
breed: str = "adam",
weight_decay: float = 0.0,
lr_policy: str = "multistep",
lr: float = 0.0005,
gamma: float = 0.1,
momentum: float = 0.9,
betas: Tuple[float, ...] = (0.9, 0.999),
milestones: Tuple[int, ...] = (),
max_epochs: int = 1000,
):
"""
Initialize the optimizer (optionally from checkpoint state)
and the learning rate scheduler.
Args:
model: The model with optionally loaded weights
optimizer_state: The state dict for the optimizer. If None
it has not been loaded from checkpoint
last_epoch: If the model was loaded from checkpoint this will be the
number of the last epoch that was saved
breed: The type of optimizer to use e.g. adam
weight_decay: The optimizer weight_decay (L2 penalty on model weights)
lr_policy: The policy to use for learning rate. Currently, only "multistep:
is supported.
lr: The value for the initial learning rate
gamma: Multiplicative factor of learning rate decay
momentum: Momentum factor for SGD optimizer
betas: Coefficients used for computing running averages of gradient and its square
in the Adam optimizer
milestones: List of increasing epoch indices at which the learning rate is
modified
max_epochs: The maximum number of epochs to run the optimizer for
Returns:
optimizer: Optimizer module, optionally loaded from checkpoint
scheduler: Learning rate scheduler module
Raise:
ValueError if `breed` or `lr_policy` are not supported.
"""
# Get the parameters to optimize
if hasattr(model, "_get_param_groups"): # use the model function
# pyre-ignore[29]
p_groups = model._get_param_groups(lr, wd=weight_decay)
else:
allprm = [prm for prm in model.parameters() if prm.requires_grad]
p_groups = [{"params": allprm, "lr": lr}]
# Intialize the optimizer
if breed == "sgd":
optimizer = torch.optim.SGD(
p_groups, lr=lr, momentum=momentum, weight_decay=weight_decay
)
elif breed == "adagrad":
optimizer = torch.optim.Adagrad(p_groups, lr=lr, weight_decay=weight_decay)
elif breed == "adam":
optimizer = torch.optim.Adam(
p_groups, lr=lr, betas=betas, weight_decay=weight_decay
)
else:
raise ValueError("no such solver type %s" % breed)
logger.info(" -> solver type = %s" % breed)
# Load state from checkpoint
if optimizer_state is not None:
logger.info(" -> setting loaded optimizer state")
optimizer.load_state_dict(optimizer_state)
# Initialize the learning rate scheduler
if lr_policy == "multistep":
scheduler = torch.optim.lr_scheduler.MultiStepLR(
optimizer,
milestones=milestones,
gamma=gamma,
)
else:
raise ValueError("no such lr policy %s" % lr_policy)
# When loading from checkpoint, this will make sure that the
# lr is correctly set even after returning
for _ in range(last_epoch):
scheduler.step()
optimizer.zero_grad()
return optimizer, scheduler
enable_get_default_args(init_optimizer)

View File

@@ -1,325 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
import inspect
import logging
import os
from collections import defaultdict
from dataclasses import field
from typing import Any, Dict, List, Optional, Tuple
import torch.optim
from accelerate import Accelerator
from pytorch3d.implicitron.models.base_model import ImplicitronModelBase
from pytorch3d.implicitron.tools import model_io
from pytorch3d.implicitron.tools.config import (
registry,
ReplaceableBase,
run_auto_creation,
)
logger = logging.getLogger(__name__)
class OptimizerFactoryBase(ReplaceableBase):
def __call__(
self, model: ImplicitronModelBase, **kwargs
) -> Tuple[torch.optim.Optimizer, Any]:
"""
Initialize the optimizer and lr scheduler.
Args:
model: The model with optionally loaded weights.
Returns:
An optimizer module (optionally loaded from a checkpoint) and
a learning rate scheduler module (should be a subclass of torch.optim's
lr_scheduler._LRScheduler).
"""
raise NotImplementedError()
@registry.register
class ImplicitronOptimizerFactory(OptimizerFactoryBase):
"""
A factory that initializes the optimizer and lr scheduler.
Members:
betas: Beta parameters for the Adam optimizer.
breed: The type of optimizer to use. We currently support SGD, Adagrad
and Adam.
exponential_lr_step_size: With Exponential policy only,
lr = lr * gamma ** (epoch/step_size)
gamma: Multiplicative factor of learning rate decay.
lr: The value for the initial learning rate.
lr_policy: The policy to use for learning rate. We currently support
MultiStepLR and Exponential policies.
momentum: A momentum value (for SGD only).
multistep_lr_milestones: With MultiStepLR policy only: list of
increasing epoch indices at which the learning rate is modified.
momentum: Momentum factor for SGD optimizer.
weight_decay: The optimizer weight_decay (L2 penalty on model weights).
foreach: Whether to use new "foreach" implementation of optimizer where
available (e.g. requires PyTorch 1.12.0 for Adam)
group_learning_rates: Parameters or modules can be assigned to parameter
groups. This dictionary has names of those parameter groups as keys
and learning rates as values. All parameter group names have to be
defined in this dictionary. Parameters which do not have predefined
parameter group are put into "default" parameter group which has
`lr` as its learning rate.
"""
betas: Tuple[float, ...] = (0.9, 0.999)
breed: str = "Adam"
exponential_lr_step_size: int = 250
gamma: float = 0.1
lr: float = 0.0005
lr_policy: str = "MultiStepLR"
momentum: float = 0.9
multistep_lr_milestones: tuple = ()
weight_decay: float = 0.0
linear_exponential_lr_milestone: int = 200
linear_exponential_start_gamma: float = 0.1
foreach: Optional[bool] = True
group_learning_rates: Dict[str, float] = field(default_factory=lambda: {})
def __post_init__(self):
run_auto_creation(self)
def __call__(
self,
last_epoch: int,
model: ImplicitronModelBase,
accelerator: Optional[Accelerator] = None,
exp_dir: Optional[str] = None,
resume: bool = True,
resume_epoch: int = -1,
**kwargs,
) -> Tuple[torch.optim.Optimizer, Any]:
"""
Initialize the optimizer (optionally from a checkpoint) and the lr scheduluer.
Args:
last_epoch: If the model was loaded from checkpoint this will be the
number of the last epoch that was saved.
model: The model with optionally loaded weights.
accelerator: An optional Accelerator instance.
exp_dir: Root experiment directory.
resume: If True, attempt to load optimizer checkpoint from exp_dir.
Failure to do so will return a newly initialized optimizer.
resume_epoch: If `resume` is True: Resume optimizer at this epoch. If
`resume_epoch` <= 0, then resume from the latest checkpoint.
Returns:
An optimizer module (optionally loaded from a checkpoint) and
a learning rate scheduler module (should be a subclass of torch.optim's
lr_scheduler._LRScheduler).
"""
# Get the parameters to optimize
if hasattr(model, "_get_param_groups"): # use the model function
# pyre-ignore[29]
p_groups = model._get_param_groups(self.lr, wd=self.weight_decay)
else:
p_groups = [
{"params": params, "lr": self._get_group_learning_rate(group)}
for group, params in self._get_param_groups(model).items()
]
# Intialize the optimizer
optimizer_kwargs: Dict[str, Any] = {
"lr": self.lr,
"weight_decay": self.weight_decay,
}
if self.breed == "SGD":
optimizer_class = torch.optim.SGD
optimizer_kwargs["momentum"] = self.momentum
elif self.breed == "Adagrad":
optimizer_class = torch.optim.Adagrad
elif self.breed == "Adam":
optimizer_class = torch.optim.Adam
optimizer_kwargs["betas"] = self.betas
else:
raise ValueError(f"No such solver type {self.breed}")
if "foreach" in inspect.signature(optimizer_class.__init__).parameters:
optimizer_kwargs["foreach"] = self.foreach
optimizer = optimizer_class(p_groups, **optimizer_kwargs)
logger.info(f"Solver type = {self.breed}")
# Load state from checkpoint
optimizer_state = self._get_optimizer_state(
exp_dir,
accelerator,
resume_epoch=resume_epoch,
resume=resume,
)
if optimizer_state is not None:
logger.info("Setting loaded optimizer state.")
optimizer.load_state_dict(optimizer_state)
# Initialize the learning rate scheduler
if self.lr_policy.casefold() == "MultiStepLR".casefold():
scheduler = torch.optim.lr_scheduler.MultiStepLR(
optimizer,
milestones=self.multistep_lr_milestones,
gamma=self.gamma,
)
elif self.lr_policy.casefold() == "Exponential".casefold():
scheduler = torch.optim.lr_scheduler.LambdaLR(
optimizer,
lambda epoch: self.gamma ** (epoch / self.exponential_lr_step_size),
verbose=False,
)
elif self.lr_policy.casefold() == "LinearExponential".casefold():
# linear learning rate progression between epochs 0 to
# self.linear_exponential_lr_milestone, followed by exponential
# lr decay for the rest of the epochs
def _get_lr(epoch: int):
m = self.linear_exponential_lr_milestone
if epoch < m:
w = (m - epoch) / m
gamma = w * self.linear_exponential_start_gamma + (1 - w)
else:
epoch_rest = epoch - m
gamma = self.gamma ** (epoch_rest / self.exponential_lr_step_size)
return gamma
scheduler = torch.optim.lr_scheduler.LambdaLR(
optimizer, _get_lr, verbose=False
)
else:
raise ValueError("no such lr policy %s" % self.lr_policy)
# When loading from checkpoint, this will make sure that the
# lr is correctly set even after returning.
for _ in range(last_epoch):
scheduler.step()
optimizer.zero_grad()
return optimizer, scheduler
def _get_optimizer_state(
self,
exp_dir: Optional[str],
accelerator: Optional[Accelerator] = None,
resume: bool = True,
resume_epoch: int = -1,
) -> Optional[Dict[str, Any]]:
"""
Load an optimizer state from a checkpoint.
resume: If True, attempt to load the last checkpoint from `exp_dir`
passed to __call__. Failure to do so will return a newly initialized
optimizer.
resume_epoch: If `resume` is True: Resume optimizer at this epoch. If
`resume_epoch` <= 0, then resume from the latest checkpoint.
"""
if exp_dir is None or not resume:
return None
if resume_epoch > 0:
save_path = model_io.get_checkpoint(exp_dir, resume_epoch)
if not os.path.isfile(save_path):
raise FileNotFoundError(
f"Cannot find optimizer from epoch {resume_epoch}."
)
else:
save_path = model_io.find_last_checkpoint(exp_dir)
optimizer_state = None
if save_path is not None:
logger.info(f"Found previous optimizer state {save_path} -> resuming.")
opt_path = model_io.get_optimizer_path(save_path)
if os.path.isfile(opt_path):
map_location = None
if accelerator is not None and not accelerator.is_local_main_process:
map_location = {
"cuda:%d" % 0: "cuda:%d" % accelerator.local_process_index
}
optimizer_state = torch.load(opt_path, map_location)
else:
raise FileNotFoundError(f"Optimizer state {opt_path} does not exist.")
return optimizer_state
def _get_param_groups(
self, module: torch.nn.Module
) -> Dict[str, List[torch.nn.Parameter]]:
"""
Recursively visits all the modules inside the `module` and sorts all the
parameters in parameter groups.
Uses `param_groups` dictionary member, where keys are names of individual
parameters or module members and values are the names of the parameter groups
for those parameters or members. "self" key is used to denote the parameter groups
at the module level. Possible keys, including the "self" key do not have to
be defined. By default all parameters have the learning rate defined in the
optimizer. This can be overridden by setting the parameter group in `param_groups`
member of a specific module, it can be overridden at the:
- module level with “self” key, all the parameters and child
module's parameters will inherit it
- member level, which is the same as if the `param_groups` in that
member has key=“self” and value equal to that parameter group.
This is useful if members do not have `param_groups`, for
example torch.nn.Linear.
- parameter level, only parameter with the same name as the key
will have it.
Args:
module: module from which to extract the parameters and their parameter
groups
Returns:
dictionary with parameter groups as keys and lists of parameters as values
"""
param_groups = defaultdict(list)
def traverse(module, default_group):
# If key self is defined in param_groups then chenge the default param
# group for all parameters and children in the module.
if hasattr(module, "param_groups") and "self" in module.param_groups:
default_group = module.param_groups["self"]
# Collect all the parameters that are directly inside the `module`,
# they will be in the default param group if they don't have
# defined group.
for name, param in module.named_parameters(recurse=False):
if param.requires_grad:
if hasattr(module, "param_groups") and name in module.param_groups:
param_groups[module.param_groups[name]].append(param)
else:
param_groups[default_group].append(param)
# If children have defined default param group then use it else pass
# own default.
for child_name, child in module.named_children():
if (
hasattr(module, "param_groups")
and child_name in module.param_groups
):
traverse(child, module.param_groups[child_name])
else:
traverse(child, default_group)
traverse(module, "default")
return param_groups
def _get_group_learning_rate(self, group_name: str) -> float:
"""
Wraps the `group_learning_rates` dictionary providing errors and returns
`self.lr` for "default" group_name.
Args:
group_name: a string representing the name of the group
Returns:
learning rate for a specific group
"""
if group_name == "default":
return self.lr
lr = self.group_learning_rates.get(group_name, None)
if lr is None:
raise ValueError(f"no learning rate given for group {group_name}")
return lr

View File

@@ -1,456 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
import logging
import os
import time
from typing import Any, List, Optional
import torch
from accelerate import Accelerator
from pytorch3d.implicitron.evaluation.evaluator import EvaluatorBase
from pytorch3d.implicitron.models.base_model import ImplicitronModelBase
from pytorch3d.implicitron.models.generic_model import EvaluationMode
from pytorch3d.implicitron.tools import model_io, vis_utils
from pytorch3d.implicitron.tools.config import (
registry,
ReplaceableBase,
run_auto_creation,
)
from pytorch3d.implicitron.tools.stats import Stats
from pytorch3d.renderer.cameras import CamerasBase
from torch.utils.data import DataLoader, Dataset
from .utils import seed_all_random_engines
logger = logging.getLogger(__name__)
# pyre-fixme[13]: Attribute `evaluator` is never initialized.
class TrainingLoopBase(ReplaceableBase):
"""
Members:
evaluator: An EvaluatorBase instance, used to evaluate training results.
"""
evaluator: Optional[EvaluatorBase]
evaluator_class_type: Optional[str] = "ImplicitronEvaluator"
def run(
self,
train_loader: DataLoader,
val_loader: Optional[DataLoader],
test_loader: Optional[DataLoader],
train_dataset: Dataset,
model: ImplicitronModelBase,
optimizer: torch.optim.Optimizer,
scheduler: Any,
**kwargs,
) -> None:
raise NotImplementedError()
def load_stats(
self,
log_vars: List[str],
exp_dir: str,
resume: bool = True,
resume_epoch: int = -1,
**kwargs,
) -> Stats:
raise NotImplementedError()
@registry.register
class ImplicitronTrainingLoop(TrainingLoopBase):
"""
Members:
eval_only: If True, only run evaluation using the test dataloader.
max_epochs: Train for this many epochs. Note that if the model was
loaded from a checkpoint, we will restart training at the appropriate
epoch and run for (max_epochs - checkpoint_epoch) epochs.
store_checkpoints: If True, store model and optimizer state checkpoints.
store_checkpoints_purge: If >= 0, remove any checkpoints older or equal
to this many epochs.
test_interval: Evaluate on a test dataloader each `test_interval` epochs.
test_when_finished: If True, evaluate on a test dataloader when training
completes.
validation_interval: Validate each `validation_interval` epochs.
clip_grad: Optionally clip the gradient norms.
If set to a value <=0.0, no clipping
metric_print_interval: The batch interval at which the stats should be
logged.
visualize_interval: The batch interval at which the visualizations
should be plotted
visdom_env: The name of the Visdom environment to use for plotting.
visdom_port: The Visdom port.
visdom_server: Address of the Visdom server.
"""
# Parameters of the outer training loop.
eval_only: bool = False
max_epochs: int = 1000
store_checkpoints: bool = True
store_checkpoints_purge: int = 1
test_interval: int = -1
test_when_finished: bool = False
validation_interval: int = 1
# Gradient clipping.
clip_grad: float = 0.0
# Visualization/logging parameters.
metric_print_interval: int = 5
visualize_interval: int = 1000
visdom_env: str = ""
visdom_port: int = int(os.environ.get("VISDOM_PORT", 8097))
visdom_server: str = "http://127.0.0.1"
def __post_init__(self):
run_auto_creation(self)
def run(
self,
*,
train_loader: DataLoader,
val_loader: Optional[DataLoader],
test_loader: Optional[DataLoader],
train_dataset: Dataset,
model: ImplicitronModelBase,
optimizer: torch.optim.Optimizer,
scheduler: Any,
accelerator: Optional[Accelerator],
all_train_cameras: Optional[CamerasBase],
device: torch.device,
exp_dir: str,
stats: Stats,
seed: int,
**kwargs,
):
"""
Entry point to run the training and validation loops
based on the specified config file.
"""
start_epoch = stats.epoch + 1
assert scheduler.last_epoch == stats.epoch + 1
assert scheduler.last_epoch == start_epoch
# only run evaluation on the test dataloader
if self.eval_only:
if test_loader is not None:
# pyre-fixme[16]: `Optional` has no attribute `run`.
self.evaluator.run(
all_train_cameras=all_train_cameras,
dataloader=test_loader,
device=device,
dump_to_json=True,
epoch=stats.epoch,
exp_dir=exp_dir,
model=model,
)
return
else:
raise ValueError(
"Cannot evaluate and dump results to json, no test data provided."
)
# loop through epochs
for epoch in range(start_epoch, self.max_epochs):
# automatic new_epoch and plotting of stats at every epoch start
with stats:
# Make sure to re-seed random generators to ensure reproducibility
# even after restart.
seed_all_random_engines(seed + epoch)
cur_lr = float(scheduler.get_last_lr()[-1])
logger.debug(f"scheduler lr = {cur_lr:1.2e}")
# train loop
self._training_or_validation_epoch(
accelerator=accelerator,
device=device,
epoch=epoch,
loader=train_loader,
model=model,
optimizer=optimizer,
stats=stats,
validation=False,
)
# val loop (optional)
if val_loader is not None and epoch % self.validation_interval == 0:
self._training_or_validation_epoch(
accelerator=accelerator,
device=device,
epoch=epoch,
loader=val_loader,
model=model,
optimizer=optimizer,
stats=stats,
validation=True,
)
# eval loop (optional)
if (
test_loader is not None
and self.test_interval > 0
and epoch % self.test_interval == 0
):
self.evaluator.run(
all_train_cameras=all_train_cameras,
device=device,
dataloader=test_loader,
model=model,
)
assert stats.epoch == epoch, "inconsistent stats!"
self._checkpoint(accelerator, epoch, exp_dir, model, optimizer, stats)
scheduler.step()
new_lr = float(scheduler.get_last_lr()[-1])
if new_lr != cur_lr:
logger.info(f"LR change! {cur_lr} -> {new_lr}")
if self.test_when_finished:
if test_loader is not None:
self.evaluator.run(
all_train_cameras=all_train_cameras,
device=device,
dump_to_json=True,
epoch=stats.epoch,
exp_dir=exp_dir,
dataloader=test_loader,
model=model,
)
else:
raise ValueError(
"Cannot evaluate and dump results to json, no test data provided."
)
def load_stats(
self,
log_vars: List[str],
exp_dir: str,
resume: bool = True,
resume_epoch: int = -1,
**kwargs,
) -> Stats:
"""
Load Stats that correspond to the model's log_vars and resume_epoch.
Args:
log_vars: A list of variable names to log. Should be a subset of the
`preds` returned by the forward function of the corresponding
ImplicitronModelBase instance.
exp_dir: Root experiment directory.
resume: If False, do not load stats from the checkpoint speci-
fied by resume and resume_epoch; instead, create a fresh stats object.
stats: The stats structure (optionally loaded from checkpoint)
"""
# Init the stats struct
visdom_env_charts = (
vis_utils.get_visdom_env(self.visdom_env, exp_dir) + "_charts"
)
stats = Stats(
# log_vars should be a list, but OmegaConf might load them as ListConfig
list(log_vars),
plot_file=os.path.join(exp_dir, "train_stats.pdf"),
visdom_env=visdom_env_charts,
verbose=False,
visdom_server=self.visdom_server,
visdom_port=self.visdom_port,
)
model_path = None
if resume:
if resume_epoch > 0:
model_path = model_io.get_checkpoint(exp_dir, resume_epoch)
if not os.path.isfile(model_path):
raise FileNotFoundError(
f"Cannot find stats from epoch {resume_epoch}."
)
else:
model_path = model_io.find_last_checkpoint(exp_dir)
if model_path is not None:
stats_path = model_io.get_stats_path(model_path)
stats_load = model_io.load_stats(stats_path)
# Determine if stats should be reset
if resume:
if stats_load is None:
logger.warning("\n\n\n\nCORRUPT STATS -> clearing stats\n\n\n\n")
last_epoch = model_io.parse_epoch_from_model_path(model_path)
logger.info(f"Estimated resume epoch = {last_epoch}")
# Reset the stats struct
for _ in range(last_epoch + 1):
stats.new_epoch()
assert last_epoch == stats.epoch
else:
logger.info(f"Found previous stats in {stats_path} -> resuming.")
stats = stats_load
# Update stats properties incase it was reset on load
stats.visdom_env = visdom_env_charts
stats.visdom_server = self.visdom_server
stats.visdom_port = self.visdom_port
stats.plot_file = os.path.join(exp_dir, "train_stats.pdf")
stats.synchronize_logged_vars(log_vars)
else:
logger.info("Clearing stats")
return stats
def _training_or_validation_epoch(
self,
epoch: int,
loader: DataLoader,
model: ImplicitronModelBase,
optimizer: torch.optim.Optimizer,
stats: Stats,
validation: bool,
*,
accelerator: Optional[Accelerator],
bp_var: str = "objective",
device: torch.device,
**kwargs,
) -> None:
"""
This is the main loop for training and evaluation including:
model forward pass, loss computation, backward pass and visualization.
Args:
epoch: The index of the current epoch
loader: The dataloader to use for the loop
model: The model module optionally loaded from checkpoint
optimizer: The optimizer module optionally loaded from checkpoint
stats: The stats struct, also optionally loaded from checkpoint
validation: If true, run the loop with the model in eval mode
and skip the backward pass
accelerator: An optional Accelerator instance.
bp_var: The name of the key in the model output `preds` dict which
should be used as the loss for the backward pass.
device: The device on which to run the model.
"""
if validation:
model.eval()
trainmode = "val"
else:
model.train()
trainmode = "train"
t_start = time.time()
# get the visdom env name
visdom_env_imgs = stats.visdom_env + "_images_" + trainmode
viz = vis_utils.get_visdom_connection(
server=stats.visdom_server,
port=stats.visdom_port,
)
# Iterate through the batches
n_batches = len(loader)
for it, net_input in enumerate(loader):
last_iter = it == n_batches - 1
# move to gpu where possible (in place)
net_input = net_input.to(device)
# run the forward pass
if not validation:
optimizer.zero_grad()
preds = model(
**{**net_input, "evaluation_mode": EvaluationMode.TRAINING}
)
else:
with torch.no_grad():
preds = model(
**{**net_input, "evaluation_mode": EvaluationMode.EVALUATION}
)
# make sure we dont overwrite something
assert all(k not in preds for k in net_input.keys())
# merge everything into one big dict
preds.update(net_input)
# update the stats logger
stats.update(preds, time_start=t_start, stat_set=trainmode)
# pyre-ignore [16]
assert stats.it[trainmode] == it, "inconsistent stat iteration number!"
# print textual status update
if it % self.metric_print_interval == 0 or last_iter:
stats.print(stat_set=trainmode, max_it=n_batches)
# visualize results
if (
(accelerator is None or accelerator.is_local_main_process)
and self.visualize_interval > 0
and it % self.visualize_interval == 0
):
prefix = f"e{stats.epoch}_it{stats.it[trainmode]}"
if hasattr(model, "visualize"):
# pyre-ignore [29]
model.visualize(
viz,
visdom_env_imgs,
preds,
prefix,
)
# optimizer step
if not validation:
loss = preds[bp_var]
assert torch.isfinite(loss).all(), "Non-finite loss!"
# backprop
if accelerator is None:
loss.backward()
else:
accelerator.backward(loss)
if self.clip_grad > 0.0:
# Optionally clip the gradient norms.
total_norm = torch.nn.utils.clip_grad_norm(
model.parameters(), self.clip_grad
)
if total_norm > self.clip_grad:
logger.debug(
f"Clipping gradient: {total_norm}"
+ f" with coef {self.clip_grad / float(total_norm)}."
)
optimizer.step()
def _checkpoint(
self,
accelerator: Optional[Accelerator],
epoch: int,
exp_dir: str,
model: ImplicitronModelBase,
optimizer: torch.optim.Optimizer,
stats: Stats,
):
"""
Save a model and its corresponding Stats object to a file, if
`self.store_checkpoints` is True. In addition, if
`self.store_checkpoints_purge` is True, remove any checkpoints older
than `self.store_checkpoints_purge` epochs old.
"""
if self.store_checkpoints and (
accelerator is None or accelerator.is_local_main_process
):
if self.store_checkpoints_purge > 0:
for prev_epoch in range(epoch - self.store_checkpoints_purge):
model_io.purge_epoch(exp_dir, prev_epoch)
outfile = model_io.get_checkpoint(exp_dir, epoch)
unwrapped_model = (
model if accelerator is None else accelerator.unwrap_model(model)
)
model_io.safe_save_model(
unwrapped_model, stats, outfile, optimizer=optimizer
)

View File

@@ -1,17 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
import random
import numpy as np
import torch
def seed_all_random_engines(seed: int) -> None:
np.random.seed(seed)
torch.manual_seed(seed)
random.seed(seed)

View File

@@ -1,16 +1,296 @@
data_source_class_type: ImplicitronDataSource
model_factory_class_type: ImplicitronModelFactory
optimizer_factory_class_type: ImplicitronOptimizerFactory
training_loop_class_type: ImplicitronTrainingLoop
seed: 42
detect_anomaly: false
exp_dir: ./data/default_experiment/
hydra:
run:
dir: .
output_subdir: null
mode: RUN
data_source_ImplicitronDataSource_args:
generic_model_args:
mask_images: true
mask_depths: true
render_image_width: 400
render_image_height: 400
mask_threshold: 0.5
output_rasterized_mc: false
bg_color:
- 0.0
- 0.0
- 0.0
num_passes: 1
chunk_size_grid: 4096
render_features_dimensions: 3
tqdm_trigger_threshold: 16
n_train_target_views: 1
sampling_mode_training: mask_sample
sampling_mode_evaluation: full_grid
global_encoder_class_type: null
raysampler_class_type: AdaptiveRaySampler
renderer_class_type: MultiPassEmissionAbsorptionRenderer
image_feature_extractor_class_type: null
view_pooler_enabled: false
implicit_function_class_type: NeuralRadianceFieldImplicitFunction
view_metrics_class_type: ViewMetrics
regularization_metrics_class_type: RegularizationMetrics
loss_weights:
loss_rgb_mse: 1.0
loss_prev_stage_rgb_mse: 1.0
loss_mask_bce: 0.0
loss_prev_stage_mask_bce: 0.0
log_vars:
- loss_rgb_psnr_fg
- loss_rgb_psnr
- loss_rgb_mse
- loss_rgb_huber
- loss_depth_abs
- loss_depth_abs_fg
- loss_mask_neg_iou
- loss_mask_bce
- loss_mask_beta_prior
- loss_eikonal
- loss_density_tv
- loss_depth_neg_penalty
- loss_autodecoder_norm
- loss_prev_stage_rgb_mse
- loss_prev_stage_rgb_psnr_fg
- loss_prev_stage_rgb_psnr
- loss_prev_stage_mask_bce
- objective
- epoch
- sec/it
global_encoder_HarmonicTimeEncoder_args:
n_harmonic_functions: 10
append_input: true
time_divisor: 1.0
global_encoder_SequenceAutodecoder_args:
autodecoder_args:
encoding_dim: 0
n_instances: 0
init_scale: 1.0
ignore_input: false
raysampler_AdaptiveRaySampler_args:
image_width: 400
image_height: 400
sampling_mode_training: mask_sample
sampling_mode_evaluation: full_grid
n_pts_per_ray_training: 64
n_pts_per_ray_evaluation: 64
n_rays_per_image_sampled_from_mask: 1024
stratified_point_sampling_training: true
stratified_point_sampling_evaluation: false
scene_extent: 8.0
scene_center:
- 0.0
- 0.0
- 0.0
raysampler_NearFarRaySampler_args:
image_width: 400
image_height: 400
sampling_mode_training: mask_sample
sampling_mode_evaluation: full_grid
n_pts_per_ray_training: 64
n_pts_per_ray_evaluation: 64
n_rays_per_image_sampled_from_mask: 1024
stratified_point_sampling_training: true
stratified_point_sampling_evaluation: false
min_depth: 0.1
max_depth: 8.0
renderer_LSTMRenderer_args:
num_raymarch_steps: 10
init_depth: 17.0
init_depth_noise_std: 0.0005
hidden_size: 16
n_feature_channels: 256
bg_color: null
verbose: false
renderer_MultiPassEmissionAbsorptionRenderer_args:
raymarcher_class_type: EmissionAbsorptionRaymarcher
n_pts_per_ray_fine_training: 64
n_pts_per_ray_fine_evaluation: 64
stratified_sampling_coarse_training: true
stratified_sampling_coarse_evaluation: false
append_coarse_samples_to_fine: true
density_noise_std_train: 0.0
return_weights: false
raymarcher_CumsumRaymarcher_args:
surface_thickness: 1
bg_color:
- 0.0
background_opacity: 0.0
density_relu: true
blend_output: false
raymarcher_EmissionAbsorptionRaymarcher_args:
surface_thickness: 1
bg_color:
- 0.0
background_opacity: 10000000000.0
density_relu: true
blend_output: false
renderer_SignedDistanceFunctionRenderer_args:
render_features_dimensions: 3
ray_tracer_args:
object_bounding_sphere: 1.0
sdf_threshold: 5.0e-05
line_search_step: 0.5
line_step_iters: 1
sphere_tracing_iters: 10
n_steps: 100
n_secant_steps: 8
ray_normal_coloring_network_args:
feature_vector_size: 3
mode: idr
d_in: 9
d_out: 3
dims:
- 512
- 512
- 512
- 512
weight_norm: true
n_harmonic_functions_dir: 0
pooled_feature_dim: 0
bg_color:
- 0.0
soft_mask_alpha: 50.0
image_feature_extractor_ResNetFeatureExtractor_args:
name: resnet34
pretrained: true
stages:
- 1
- 2
- 3
- 4
normalize_image: true
image_rescale: 0.16
first_max_pool: true
proj_dim: 32
l2_norm: true
add_masks: true
add_images: true
global_average_pool: false
feature_rescale: 1.0
view_pooler_args:
feature_aggregator_class_type: AngleWeightedReductionFeatureAggregator
view_sampler_args:
masked_sampling: false
sampling_mode: bilinear
feature_aggregator_AngleWeightedIdentityFeatureAggregator_args:
exclude_target_view: true
exclude_target_view_mask_features: true
concatenate_output: true
weight_by_ray_angle_gamma: 1.0
min_ray_angle_weight: 0.1
feature_aggregator_AngleWeightedReductionFeatureAggregator_args:
exclude_target_view: true
exclude_target_view_mask_features: true
concatenate_output: true
reduction_functions:
- AVG
- STD
weight_by_ray_angle_gamma: 1.0
min_ray_angle_weight: 0.1
feature_aggregator_IdentityFeatureAggregator_args:
exclude_target_view: true
exclude_target_view_mask_features: true
concatenate_output: true
feature_aggregator_ReductionFeatureAggregator_args:
exclude_target_view: true
exclude_target_view_mask_features: true
concatenate_output: true
reduction_functions:
- AVG
- STD
implicit_function_IdrFeatureField_args:
feature_vector_size: 3
d_in: 3
d_out: 1
dims:
- 512
- 512
- 512
- 512
- 512
- 512
- 512
- 512
geometric_init: true
bias: 1.0
skip_in: []
weight_norm: true
n_harmonic_functions_xyz: 0
pooled_feature_dim: 0
encoding_dim: 0
implicit_function_NeRFormerImplicitFunction_args:
n_harmonic_functions_xyz: 10
n_harmonic_functions_dir: 4
n_hidden_neurons_dir: 128
latent_dim: 0
input_xyz: true
xyz_ray_dir_in_camera_coords: false
color_dim: 3
transformer_dim_down_factor: 2.0
n_hidden_neurons_xyz: 80
n_layers_xyz: 2
append_xyz:
- 1
implicit_function_NeuralRadianceFieldImplicitFunction_args:
n_harmonic_functions_xyz: 10
n_harmonic_functions_dir: 4
n_hidden_neurons_dir: 128
latent_dim: 0
input_xyz: true
xyz_ray_dir_in_camera_coords: false
color_dim: 3
transformer_dim_down_factor: 1.0
n_hidden_neurons_xyz: 256
n_layers_xyz: 8
append_xyz:
- 5
implicit_function_SRNHyperNetImplicitFunction_args:
hypernet_args:
n_harmonic_functions: 3
n_hidden_units: 256
n_layers: 2
n_hidden_units_hypernet: 256
n_layers_hypernet: 1
in_features: 3
out_features: 256
latent_dim_hypernet: 0
latent_dim: 0
xyz_in_camera_coords: false
pixel_generator_args:
n_harmonic_functions: 4
n_hidden_units: 256
n_hidden_units_color: 128
n_layers: 2
in_features: 256
out_features: 3
ray_dir_in_camera_coords: false
implicit_function_SRNImplicitFunction_args:
raymarch_function_args:
n_harmonic_functions: 3
n_hidden_units: 256
n_layers: 2
in_features: 3
out_features: 256
latent_dim: 0
xyz_in_camera_coords: false
raymarch_function: null
pixel_generator_args:
n_harmonic_functions: 4
n_hidden_units: 256
n_hidden_units_color: 128
n_layers: 2
in_features: 256
out_features: 3
ray_dir_in_camera_coords: false
view_metrics_ViewMetrics_args: {}
regularization_metrics_RegularizationMetrics_args: {}
solver_args:
breed: adam
weight_decay: 0.0
lr_policy: multistep
lr: 0.0005
gamma: 0.1
momentum: 0.9
betas:
- 0.9
- 0.999
milestones: []
max_epochs: 1000
data_source_args:
dataset_map_provider_class_type: ???
data_loader_map_provider_class_type: SequenceDataLoaderMapProvider
dataset_map_provider_BlenderDatasetMapProvider_args:
@@ -62,15 +342,20 @@ data_source_ImplicitronDataSource_args:
test_on_train: false
only_test_set: false
load_eval_batches: true
n_known_frames_for_test: 0
dataset_class_type: JsonIndexDataset
path_manager_factory_class_type: PathManagerFactory
dataset_JsonIndexDataset_args:
path_manager: null
frame_annotations_file: ''
sequence_annotations_file: ''
subset_lists_file: ''
subsets: null
limit_to: 0
limit_sequences_to: 0
pick_sequence: []
exclude_sequence: []
limit_category_to: []
dataset_root: ''
load_images: true
load_depths: true
load_depth_masks: true
@@ -88,6 +373,7 @@ data_source_ImplicitronDataSource_args:
n_frames_per_sequence: -1
seed: 0
sort_frames: false
eval_batches: null
path_manager_factory_PathManagerFactory_args:
silence_logs: true
dataset_map_provider_LlffDatasetMapProvider_args:
@@ -97,16 +383,6 @@ data_source_ImplicitronDataSource_args:
n_known_frames_for_test: null
path_manager_factory_PathManagerFactory_args:
silence_logs: true
downscale_factor: 4
dataset_map_provider_RenderedMeshDatasetMapProvider_args:
num_views: 40
data_file: null
azimuth_range: 180.0
resolution: 128
use_point_light: true
path_manager_factory_class_type: PathManagerFactory
path_manager_factory_PathManagerFactory_args:
silence_logs: true
data_loader_map_provider_SequenceDataLoaderMapProvider_args:
batch_size: 1
num_workers: 0
@@ -120,477 +396,30 @@ data_source_ImplicitronDataSource_args:
sample_consecutive_frames: false
consecutive_frames_max_gap: 0
consecutive_frames_max_gap_seconds: 0.1
data_loader_map_provider_SimpleDataLoaderMapProvider_args:
batch_size: 1
num_workers: 0
dataset_length_train: 0
dataset_length_val: 0
dataset_length_test: 0
model_factory_ImplicitronModelFactory_args:
resume: true
model_class_type: GenericModel
resume_epoch: -1
force_resume: false
model_GenericModel_args:
log_vars:
- loss_rgb_psnr_fg
- loss_rgb_psnr
- loss_rgb_mse
- loss_rgb_huber
- loss_depth_abs
- loss_depth_abs_fg
- loss_mask_neg_iou
- loss_mask_bce
- loss_mask_beta_prior
- loss_eikonal
- loss_density_tv
- loss_depth_neg_penalty
- loss_autodecoder_norm
- loss_prev_stage_rgb_mse
- loss_prev_stage_rgb_psnr_fg
- loss_prev_stage_rgb_psnr
- loss_prev_stage_mask_bce
- objective
- epoch
- sec/it
mask_images: true
mask_depths: true
render_image_width: 400
render_image_height: 400
mask_threshold: 0.5
output_rasterized_mc: false
bg_color:
- 0.0
- 0.0
- 0.0
num_passes: 1
chunk_size_grid: 4096
render_features_dimensions: 3
tqdm_trigger_threshold: 16
n_train_target_views: 1
sampling_mode_training: mask_sample
sampling_mode_evaluation: full_grid
global_encoder_class_type: null
raysampler_class_type: AdaptiveRaySampler
renderer_class_type: MultiPassEmissionAbsorptionRenderer
image_feature_extractor_class_type: null
view_pooler_enabled: false
implicit_function_class_type: NeuralRadianceFieldImplicitFunction
view_metrics_class_type: ViewMetrics
regularization_metrics_class_type: RegularizationMetrics
loss_weights:
loss_rgb_mse: 1.0
loss_prev_stage_rgb_mse: 1.0
loss_mask_bce: 0.0
loss_prev_stage_mask_bce: 0.0
global_encoder_HarmonicTimeEncoder_args:
n_harmonic_functions: 10
append_input: true
time_divisor: 1.0
global_encoder_SequenceAutodecoder_args:
autodecoder_args:
encoding_dim: 0
n_instances: 1
init_scale: 1.0
ignore_input: false
raysampler_AdaptiveRaySampler_args:
n_pts_per_ray_training: 64
n_pts_per_ray_evaluation: 64
n_rays_per_image_sampled_from_mask: 1024
n_rays_total_training: null
stratified_point_sampling_training: true
stratified_point_sampling_evaluation: false
scene_extent: 8.0
scene_center:
- 0.0
- 0.0
- 0.0
raysampler_NearFarRaySampler_args:
n_pts_per_ray_training: 64
n_pts_per_ray_evaluation: 64
n_rays_per_image_sampled_from_mask: 1024
n_rays_total_training: null
stratified_point_sampling_training: true
stratified_point_sampling_evaluation: false
min_depth: 0.1
max_depth: 8.0
renderer_LSTMRenderer_args:
num_raymarch_steps: 10
init_depth: 17.0
init_depth_noise_std: 0.0005
hidden_size: 16
n_feature_channels: 256
bg_color: null
verbose: false
renderer_MultiPassEmissionAbsorptionRenderer_args:
raymarcher_class_type: EmissionAbsorptionRaymarcher
n_pts_per_ray_fine_training: 64
n_pts_per_ray_fine_evaluation: 64
stratified_sampling_coarse_training: true
stratified_sampling_coarse_evaluation: false
append_coarse_samples_to_fine: true
density_noise_std_train: 0.0
return_weights: false
raymarcher_CumsumRaymarcher_args:
surface_thickness: 1
bg_color:
- 0.0
replicate_last_interval: false
background_opacity: 0.0
density_relu: true
blend_output: false
raymarcher_EmissionAbsorptionRaymarcher_args:
surface_thickness: 1
bg_color:
- 0.0
replicate_last_interval: false
background_opacity: 10000000000.0
density_relu: true
blend_output: false
renderer_SignedDistanceFunctionRenderer_args:
ray_normal_coloring_network_args:
feature_vector_size: 3
mode: idr
d_in: 9
d_out: 3
dims:
- 512
- 512
- 512
- 512
weight_norm: true
n_harmonic_functions_dir: 0
pooled_feature_dim: 0
bg_color:
- 0.0
soft_mask_alpha: 50.0
ray_tracer_args:
sdf_threshold: 5.0e-05
line_search_step: 0.5
line_step_iters: 1
sphere_tracing_iters: 10
n_steps: 100
n_secant_steps: 8
image_feature_extractor_ResNetFeatureExtractor_args:
name: resnet34
pretrained: true
stages:
- 1
- 2
- 3
- 4
normalize_image: true
image_rescale: 0.16
first_max_pool: true
proj_dim: 32
l2_norm: true
add_masks: true
add_images: true
global_average_pool: false
feature_rescale: 1.0
view_pooler_args:
feature_aggregator_class_type: AngleWeightedReductionFeatureAggregator
view_sampler_args:
masked_sampling: false
sampling_mode: bilinear
feature_aggregator_AngleWeightedIdentityFeatureAggregator_args:
exclude_target_view: true
exclude_target_view_mask_features: true
concatenate_output: true
weight_by_ray_angle_gamma: 1.0
min_ray_angle_weight: 0.1
feature_aggregator_AngleWeightedReductionFeatureAggregator_args:
exclude_target_view: true
exclude_target_view_mask_features: true
concatenate_output: true
reduction_functions:
- AVG
- STD
weight_by_ray_angle_gamma: 1.0
min_ray_angle_weight: 0.1
feature_aggregator_IdentityFeatureAggregator_args:
exclude_target_view: true
exclude_target_view_mask_features: true
concatenate_output: true
feature_aggregator_ReductionFeatureAggregator_args:
exclude_target_view: true
exclude_target_view_mask_features: true
concatenate_output: true
reduction_functions:
- AVG
- STD
implicit_function_IdrFeatureField_args:
d_in: 3
d_out: 1
dims:
- 512
- 512
- 512
- 512
- 512
- 512
- 512
- 512
geometric_init: true
bias: 1.0
skip_in: []
weight_norm: true
n_harmonic_functions_xyz: 0
pooled_feature_dim: 0
implicit_function_NeRFormerImplicitFunction_args:
n_harmonic_functions_xyz: 10
n_harmonic_functions_dir: 4
n_hidden_neurons_dir: 128
input_xyz: true
xyz_ray_dir_in_camera_coords: false
transformer_dim_down_factor: 2.0
n_hidden_neurons_xyz: 80
n_layers_xyz: 2
append_xyz:
- 1
implicit_function_NeuralRadianceFieldImplicitFunction_args:
n_harmonic_functions_xyz: 10
n_harmonic_functions_dir: 4
n_hidden_neurons_dir: 128
input_xyz: true
xyz_ray_dir_in_camera_coords: false
transformer_dim_down_factor: 1.0
n_hidden_neurons_xyz: 256
n_layers_xyz: 8
append_xyz:
- 5
implicit_function_SRNHyperNetImplicitFunction_args:
hypernet_args:
n_harmonic_functions: 3
n_hidden_units: 256
n_layers: 2
n_hidden_units_hypernet: 256
n_layers_hypernet: 1
in_features: 3
out_features: 256
xyz_in_camera_coords: false
pixel_generator_args:
n_harmonic_functions: 4
n_hidden_units: 256
n_hidden_units_color: 128
n_layers: 2
in_features: 256
out_features: 3
ray_dir_in_camera_coords: false
implicit_function_SRNImplicitFunction_args:
raymarch_function_args:
n_harmonic_functions: 3
n_hidden_units: 256
n_layers: 2
in_features: 3
out_features: 256
xyz_in_camera_coords: false
raymarch_function: null
pixel_generator_args:
n_harmonic_functions: 4
n_hidden_units: 256
n_hidden_units_color: 128
n_layers: 2
in_features: 256
out_features: 3
ray_dir_in_camera_coords: false
implicit_function_VoxelGridImplicitFunction_args:
harmonic_embedder_xyz_density_args:
n_harmonic_functions: 6
omega_0: 1.0
logspace: true
append_input: true
harmonic_embedder_xyz_color_args:
n_harmonic_functions: 6
omega_0: 1.0
logspace: true
append_input: true
harmonic_embedder_dir_color_args:
n_harmonic_functions: 6
omega_0: 1.0
logspace: true
append_input: true
decoder_density_class_type: MLPDecoder
decoder_color_class_type: MLPDecoder
use_multiple_streams: true
xyz_ray_dir_in_camera_coords: false
scaffold_calculating_epochs: []
scaffold_resolution:
- 128
- 128
- 128
scaffold_empty_space_threshold: 0.001
scaffold_occupancy_chunk_size: 'inf'
scaffold_max_pool_kernel_size: 3
scaffold_filter_points: true
volume_cropping_epochs: []
voxel_grid_density_args:
voxel_grid_class_type: FullResolutionVoxelGrid
extents:
- 2.0
- 2.0
- 2.0
translation:
- 0.0
- 0.0
- 0.0
init_std: 0.1
init_mean: 0.0
hold_voxel_grid_as_parameters: true
param_groups: {}
voxel_grid_CPFactorizedVoxelGrid_args:
align_corners: true
padding: zeros
mode: bilinear
n_features: 1
resolution_changes:
0:
- 128
- 128
- 128
n_components: 24
basis_matrix: true
voxel_grid_FullResolutionVoxelGrid_args:
align_corners: true
padding: zeros
mode: bilinear
n_features: 1
resolution_changes:
0:
- 128
- 128
- 128
voxel_grid_VMFactorizedVoxelGrid_args:
align_corners: true
padding: zeros
mode: bilinear
n_features: 1
resolution_changes:
0:
- 128
- 128
- 128
n_components: null
distribution_of_components: null
basis_matrix: true
voxel_grid_color_args:
voxel_grid_class_type: FullResolutionVoxelGrid
extents:
- 2.0
- 2.0
- 2.0
translation:
- 0.0
- 0.0
- 0.0
init_std: 0.1
init_mean: 0.0
hold_voxel_grid_as_parameters: true
param_groups: {}
voxel_grid_CPFactorizedVoxelGrid_args:
align_corners: true
padding: zeros
mode: bilinear
n_features: 1
resolution_changes:
0:
- 128
- 128
- 128
n_components: 24
basis_matrix: true
voxel_grid_FullResolutionVoxelGrid_args:
align_corners: true
padding: zeros
mode: bilinear
n_features: 1
resolution_changes:
0:
- 128
- 128
- 128
voxel_grid_VMFactorizedVoxelGrid_args:
align_corners: true
padding: zeros
mode: bilinear
n_features: 1
resolution_changes:
0:
- 128
- 128
- 128
n_components: null
distribution_of_components: null
basis_matrix: true
decoder_density_ElementwiseDecoder_args:
scale: 1.0
shift: 0.0
operation: IDENTITY
decoder_density_MLPDecoder_args:
param_groups: {}
network_args:
n_layers: 8
output_dim: 256
skip_dim: 39
hidden_dim: 256
input_skips:
- 5
skip_affine_trans: false
last_layer_bias_init: null
last_activation: RELU
use_xavier_init: true
decoder_color_ElementwiseDecoder_args:
scale: 1.0
shift: 0.0
operation: IDENTITY
decoder_color_MLPDecoder_args:
param_groups: {}
network_args:
n_layers: 8
output_dim: 256
skip_dim: 39
hidden_dim: 256
input_skips:
- 5
skip_affine_trans: false
last_layer_bias_init: null
last_activation: RELU
use_xavier_init: true
view_metrics_ViewMetrics_args: {}
regularization_metrics_RegularizationMetrics_args: {}
optimizer_factory_ImplicitronOptimizerFactory_args:
betas:
- 0.9
- 0.999
breed: Adam
exponential_lr_step_size: 250
gamma: 0.1
lr: 0.0005
lr_policy: MultiStepLR
momentum: 0.9
multistep_lr_milestones: []
weight_decay: 0.0
linear_exponential_lr_milestone: 200
linear_exponential_start_gamma: 0.1
foreach: true
group_learning_rates: {}
training_loop_ImplicitronTrainingLoop_args:
evaluator_class_type: ImplicitronEvaluator
evaluator_ImplicitronEvaluator_args:
is_multisequence: false
camera_difficulty_bin_breaks:
- 0.97
- 0.98
eval_only: false
max_epochs: 1000
store_checkpoints: true
store_checkpoints_purge: 1
test_interval: -1
test_when_finished: false
validation_interval: 1
clip_grad: 0.0
metric_print_interval: 5
visualize_interval: 1000
visdom_env: ''
visdom_port: 8097
visdom_server: http://127.0.0.1
architecture: generic
detect_anomaly: false
eval_only: false
exp_dir: ./data/default_experiment/
exp_idx: 0
gpu_idx: 0
metric_print_interval: 5
resume: true
resume_epoch: -1
seed: 0
store_checkpoints: true
store_checkpoints_purge: 1
test_interval: -1
test_when_finished: false
validation_interval: 1
visdom_env: ''
visdom_port: 8097
visdom_server: http://127.0.0.1
visualize_interval: 1000
clip_grad: 0.0
camera_difficulty_bin_breaks:
- 0.97
- 0.98
hydra:
run:
dir: .
output_subdir: null

View File

@@ -5,20 +5,23 @@
# LICENSE file in the root directory of this source tree.
import os
import tempfile
import unittest
from pathlib import Path
import torch
from hydra import compose, initialize_config_dir
from omegaconf import OmegaConf
from projects.implicitron_trainer.impl.optimizer_factory import (
ImplicitronOptimizerFactory,
)
from .. import experiment
from .utils import interactive_testing_requested, intercept_logs
def interactive_testing_requested() -> bool:
"""
Certain tests are only useful when run interactively, and so are not regularly run.
These are activated by this funciton returning True, which the user requests by
setting the environment variable `PYTORCH3D_INTERACTIVE_TESTING` to 1.
"""
return os.environ.get("PYTORCH3D_INTERACTIVE_TESTING", "") == "1"
internal = os.environ.get("FB_TEST", False)
@@ -30,10 +33,7 @@ DEBUG: bool = False
# TODO:
# - add enough files to skateboard_first_5 that this works on RE.
# - share common code with PyTorch3D tests?
def _parse_float_from_log(line):
return float(line.split()[-1])
# - deal with the temporary output files this test creates
class TestExperiment(unittest.TestCase):
@@ -44,18 +44,15 @@ class TestExperiment(unittest.TestCase):
# Test making minimal changes to the dataclass defaults.
if not interactive_testing_requested() or not internal:
return
# Manually override config values. Note that this is not necessary out-
# side of the tests!
cfg = OmegaConf.structured(experiment.Experiment)
cfg.data_source_ImplicitronDataSource_args.dataset_map_provider_class_type = (
cfg = OmegaConf.structured(experiment.ExperimentConfig)
cfg.data_source_args.dataset_map_provider_class_type = (
"JsonIndexDatasetMapProvider"
)
dataset_args = (
cfg.data_source_ImplicitronDataSource_args.dataset_map_provider_JsonIndexDatasetMapProvider_args
cfg.data_source_args.dataset_map_provider_JsonIndexDatasetMapProvider_args
)
dataloader_args = (
cfg.data_source_ImplicitronDataSource_args.data_loader_map_provider_SequenceDataLoaderMapProvider_args
cfg.data_source_args.data_loader_map_provider_SequenceDataLoaderMapProvider_args
)
dataset_args.category = "skateboard"
dataset_args.test_restrict_sequence_id = 0
@@ -65,80 +62,18 @@ class TestExperiment(unittest.TestCase):
dataset_args.dataset_JsonIndexDataset_args.image_width = 80
dataloader_args.dataset_length_train = 1
dataloader_args.dataset_length_val = 1
cfg.training_loop_ImplicitronTrainingLoop_args.max_epochs = 2
cfg.training_loop_ImplicitronTrainingLoop_args.store_checkpoints = False
cfg.optimizer_factory_ImplicitronOptimizerFactory_args.multistep_lr_milestones = [
0,
1,
]
cfg.solver_args.max_epochs = 2
if DEBUG:
experiment.dump_cfg(cfg)
with intercept_logs(
logger_name="projects.implicitron_trainer.impl.training_loop",
regexp="LR change!",
) as intercepted_logs:
experiment_runner = experiment.Experiment(**cfg)
experiment_runner.run()
# Make sure LR decreased on 0th and 1st epoch 10fold.
self.assertEqual(intercepted_logs[0].split()[-1], "5e-06")
def test_exponential_lr(self):
# Test making minimal changes to the dataclass defaults.
if not interactive_testing_requested():
return
cfg = OmegaConf.structured(experiment.Experiment)
cfg.data_source_ImplicitronDataSource_args.dataset_map_provider_class_type = (
"JsonIndexDatasetMapProvider"
)
dataset_args = (
cfg.data_source_ImplicitronDataSource_args.dataset_map_provider_JsonIndexDatasetMapProvider_args
)
dataloader_args = (
cfg.data_source_ImplicitronDataSource_args.data_loader_map_provider_SequenceDataLoaderMapProvider_args
)
dataset_args.category = "skateboard"
dataset_args.test_restrict_sequence_id = 0
dataset_args.dataset_root = "manifold://co3d/tree/extracted"
dataset_args.dataset_JsonIndexDataset_args.limit_sequences_to = 5
dataset_args.dataset_JsonIndexDataset_args.image_height = 80
dataset_args.dataset_JsonIndexDataset_args.image_width = 80
dataloader_args.dataset_length_train = 1
dataloader_args.dataset_length_val = 1
cfg.training_loop_ImplicitronTrainingLoop_args.max_epochs = 2
cfg.training_loop_ImplicitronTrainingLoop_args.store_checkpoints = False
cfg.optimizer_factory_ImplicitronOptimizerFactory_args.lr_policy = "Exponential"
cfg.optimizer_factory_ImplicitronOptimizerFactory_args.exponential_lr_step_size = (
2
)
if DEBUG:
experiment.dump_cfg(cfg)
with intercept_logs(
logger_name="projects.implicitron_trainer.impl.training_loop",
regexp="LR change!",
) as intercepted_logs:
experiment_runner = experiment.Experiment(**cfg)
experiment_runner.run()
# Make sure we followed the exponential lr schedule with gamma=0.1,
# exponential_lr_step_size=2 -- so after two epochs, should
# decrease lr 10x to 5e-5.
self.assertEqual(intercepted_logs[0].split()[-1], "0.00015811388300841897")
self.assertEqual(intercepted_logs[1].split()[-1], "5e-05")
experiment.run_training(cfg)
def test_yaml_contents(self):
# Check that the default config values, defined by Experiment and its
# members, is what we expect it to be.
cfg = OmegaConf.structured(experiment.Experiment)
cfg = OmegaConf.structured(experiment.ExperimentConfig)
yaml = OmegaConf.to_yaml(cfg, sort_keys=False)
if DEBUG:
(DATA_DIR / "experiment.yaml").write_text(yaml)
self.assertEqual(yaml, (DATA_DIR / "experiment.yaml").read_text())
def test_load_configs(self):
# Check that all the pre-prepared configs are valid.
config_files = []
for pattern in ("repro_singleseq*.yaml", "repro_multiseq*.yaml"):
@@ -154,116 +89,3 @@ class TestExperiment(unittest.TestCase):
with self.subTest(file.name):
with initialize_config_dir(config_dir=str(IMPLICITRON_CONFIGS_DIR)):
compose(file.name)
def test_optimizer_factory(self):
model = torch.nn.Linear(2, 2)
adam, sched = ImplicitronOptimizerFactory(breed="Adam")(0, model)
self.assertIsInstance(adam, torch.optim.Adam)
sgd, sched = ImplicitronOptimizerFactory(breed="SGD")(0, model)
self.assertIsInstance(sgd, torch.optim.SGD)
adagrad, sched = ImplicitronOptimizerFactory(breed="Adagrad")(0, model)
self.assertIsInstance(adagrad, torch.optim.Adagrad)
class TestNerfRepro(unittest.TestCase):
@unittest.skip("This test runs full blender training.")
def test_nerf_blender(self):
# Train vanilla NERF.
# Set env vars BLENDER_DATASET_ROOT and BLENDER_SINGLESEQ_CLASS first!
if not interactive_testing_requested():
return
with initialize_config_dir(config_dir=str(IMPLICITRON_CONFIGS_DIR)):
cfg = compose(config_name="repro_singleseq_nerf_blender", overrides=[])
experiment_runner = experiment.Experiment(**cfg)
experiment.dump_cfg(cfg)
experiment_runner.run()
@unittest.skip("This test runs full llff training.")
def test_nerf_llff(self):
# Train vanilla NERF.
# Set env vars LLFF_DATASET_ROOT and LLFF_SINGLESEQ_CLASS first!
LLFF_SINGLESEQ_CLASS = os.environ["LLFF_SINGLESEQ_CLASS"]
if not interactive_testing_requested():
return
with initialize_config_dir(config_dir=str(IMPLICITRON_CONFIGS_DIR)):
cfg = compose(
config_name=f"repro_singleseq_nerf_llff_{LLFF_SINGLESEQ_CLASS}",
overrides=[],
)
experiment_runner = experiment.Experiment(**cfg)
experiment.dump_cfg(cfg)
experiment_runner.run()
@unittest.skip("This test runs nerf training on co3d v2 - manyview.")
def test_nerf_co3dv2_manyview(self):
# Train NERF
if not interactive_testing_requested():
return
with initialize_config_dir(config_dir=str(IMPLICITRON_CONFIGS_DIR)):
cfg = compose(
config_name="repro_singleseq_v2_nerf",
overrides=[],
)
experiment_runner = experiment.Experiment(**cfg)
experiment.dump_cfg(cfg)
experiment_runner.run()
@unittest.skip("This test runs nerformer training on co3d v2 - fewview.")
def test_nerformer_co3dv2_fewview(self):
# Train NeRFormer
if not interactive_testing_requested():
return
with initialize_config_dir(config_dir=str(IMPLICITRON_CONFIGS_DIR)):
cfg = compose(
config_name="repro_multiseq_v2_nerformer",
overrides=[],
)
experiment_runner = experiment.Experiment(**cfg)
experiment.dump_cfg(cfg)
experiment_runner.run()
@unittest.skip("This test checks resuming of the NeRF training.")
def test_nerf_blender_resume(self):
# Train one train batch of NeRF, then resume for one more batch.
# Set env vars BLENDER_DATASET_ROOT and BLENDER_SINGLESEQ_CLASS first!
if not interactive_testing_requested():
return
with initialize_config_dir(config_dir=str(IMPLICITRON_CONFIGS_DIR)):
with tempfile.TemporaryDirectory() as exp_dir:
cfg = compose(config_name="repro_singleseq_nerf_blender", overrides=[])
cfg.exp_dir = exp_dir
# set dataset len to 1
# fmt: off
(
cfg
.data_source_ImplicitronDataSource_args
.data_loader_map_provider_SequenceDataLoaderMapProvider_args
.dataset_length_train
) = 1
# fmt: on
# run for one epoch
cfg.training_loop_ImplicitronTrainingLoop_args.max_epochs = 1
experiment_runner = experiment.Experiment(**cfg)
experiment.dump_cfg(cfg)
experiment_runner.run()
# update num epochs + 2, let the optimizer resume
cfg.training_loop_ImplicitronTrainingLoop_args.max_epochs = 3
experiment_runner = experiment.Experiment(**cfg)
experiment_runner.run()
# start from scratch
cfg.model_factory_ImplicitronModelFactory_args.resume = False
experiment_runner = experiment.Experiment(**cfg)
experiment_runner.run()
# force resume from epoch 1
cfg.model_factory_ImplicitronModelFactory_args.resume = True
cfg.model_factory_ImplicitronModelFactory_args.force_resume = True
cfg.model_factory_ImplicitronModelFactory_args.resume_epoch = 1
experiment_runner = experiment.Experiment(**cfg)
experiment_runner.run()

View File

@@ -1,162 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
import os
import unittest
import torch
from pytorch3d.implicitron.tools.config import expand_args_fields, get_default_args
from ..impl.optimizer_factory import ImplicitronOptimizerFactory
internal = os.environ.get("FB_TEST", False)
class TestOptimizerFactory(unittest.TestCase):
def setUp(self) -> None:
torch.manual_seed(42)
expand_args_fields(ImplicitronOptimizerFactory)
def _get_param_groups(self, model):
default_cfg = get_default_args(ImplicitronOptimizerFactory)
factory = ImplicitronOptimizerFactory(default_cfg)
return factory._get_param_groups(model)
def _assert_allin(self, a, param_groups, key):
with self.subTest(f"Testing key {key}"):
b = param_groups[key]
for el in a:
if el not in b:
raise ValueError(
f"Element {el}\n\n from:\n\n {a}\n\n not in:\n\n {b}\n\n."
+ f" Full param groups = \n\n{param_groups}"
)
for el in b:
if el not in a:
raise ValueError(
f"Element {el}\n\n from:\n\n {b}\n\n not in:\n\n {a}\n\n."
+ f" Full param groups = \n\n{param_groups}"
)
def test_default_param_group_assignment(self):
pa, pb, pc = [torch.nn.Parameter(data=torch.tensor(i * 1.0)) for i in range(3)]
na, nb = Node(params=[pa]), Node(params=[pb])
root = Node(children=[na, nb], params=[pc])
param_groups = self._get_param_groups(root)
self._assert_allin([pa, pb, pc], param_groups, "default")
def test_member_overrides_default_param_group_assignment(self):
pa, pb, pc = [torch.nn.Parameter(data=torch.tensor(i * 1.0)) for i in range(3)]
na, nb = Node(params=[pa]), Node(params=[pb])
root = Node(children=[na, nb], params=[pc], param_groups={"m1": "pb"})
param_groups = self._get_param_groups(root)
self._assert_allin([pa, pc], param_groups, "default")
self._assert_allin([pb], param_groups, "pb")
def test_self_overrides_member_param_group_assignment(self):
pa, pb, pc = [torch.nn.Parameter(data=torch.tensor(i * 1.0)) for i in range(3)]
na, nb = Node(params=[pa]), Node(params=[pb], param_groups={"self": "pb_self"})
root = Node(children=[na, nb], params=[pc], param_groups={"m1": "pb_member"})
param_groups = self._get_param_groups(root)
self._assert_allin([pa, pc], param_groups, "default")
self._assert_allin([pb], param_groups, "pb_self")
assert len(param_groups["pb_member"]) == 0, param_groups
def test_param_overrides_self_param_group_assignment(self):
pa, pb, pc = [torch.nn.Parameter(data=torch.tensor(i * 1.0)) for i in range(3)]
na, nb = Node(params=[pa]), Node(
params=[pb], param_groups={"self": "pb_self", "p1": "pb_param"}
)
root = Node(children=[na, nb], params=[pc], param_groups={"m1": "pb_member"})
param_groups = self._get_param_groups(root)
self._assert_allin([pa, pc], param_groups, "default")
self._assert_allin([pb], param_groups, "pb_self")
assert len(param_groups["pb_member"]) == 0, param_groups
def test_no_param_groups_defined(self):
pa, pb, pc = [torch.nn.Parameter(data=torch.tensor(i * 1.0)) for i in range(3)]
na, nb = Node(params=[pa]), Node(params=[pb])
root = Node(children=[na, nb], params=[pc])
param_groups = self._get_param_groups(root)
self._assert_allin([pa, pb, pc], param_groups, "default")
def test_tree_param_groups_defined(self):
"""
Test generic tree assignment.
A0
|---------------------------
| | |
Bb M J-
|----- |-------
| | | |
C Ddg K Ll
|--------------
| | | |
E4 Ff G H-
All nodes have one parameter. Character next to the capital
letter means they have added something to their `parameter_groups`:
- small letter same as capital means self is set to that letter
- small letter different then capital means that member is set
(the one that is named like that)
- number means parameter's parameter_group is set like that
- "-" means it does not have `parameter_groups` member
"""
p = [torch.nn.Parameter(data=torch.tensor(i * 1.0)) for i in range(12)]
L = Node(params=[p[11]], param_groups={"self": "l"})
K = Node(params=[p[10]], param_groups={})
J = Node(params=[p[9]], param_groups=None, children=[K, L])
M = Node(params=[p[8]], param_groups={})
E = Node(params=[p[4]], param_groups={"p0": "4"})
F = Node(params=[p[5]], param_groups={"self": "f"})
G = Node(params=[p[6]], param_groups={})
H = Node(params=[p[7]], param_groups=None)
D = Node(
params=[p[3]], param_groups={"self": "d", "m2": "g"}, children=[E, F, G, H]
)
C = Node(params=[p[2]], param_groups={})
B = Node(params=[p[1]], param_groups={"self": "b"}, children=[C, D])
A = Node(params=[p[0]], param_groups={"p0": "0"}, children=[B, M, J])
param_groups = self._get_param_groups(A)
# if parts of the group belong to two different categories assert is repeated
# parameter level
self._assert_allin([p[0]], param_groups, "0")
self._assert_allin([p[4]], param_groups, "4")
# self level
self._assert_allin([p[5]], param_groups, "f")
self._assert_allin([p[11]], param_groups, "l")
self._assert_allin([p[2], p[1]], param_groups, "b")
self._assert_allin([p[7], p[3]], param_groups, "d")
# member level
self._assert_allin([p[6]], param_groups, "g")
# inherit level
self._assert_allin([p[7], p[3]], param_groups, "d")
self._assert_allin([p[2], p[1]], param_groups, "b")
# default level
self._assert_allin([p[8], p[9], p[10]], param_groups, "default")
class Node(torch.nn.Module):
def __init__(self, children=(), params=(), param_groups=None):
super().__init__()
for i, child in enumerate(children):
self.add_module("m" + str(i), child)
for i, param in enumerate(params):
setattr(self, "p" + str(i), param)
if param_groups is not None:
self.param_groups = param_groups
def __str__(self):
return (
"modules:\n" + str(self._modules) + "\nparameters\n" + str(self._parameters)
)

View File

@@ -1,27 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
import os
import unittest
from .. import visualize_reconstruction
from .utils import interactive_testing_requested
internal = os.environ.get("FB_TEST", False)
class TestVisualize(unittest.TestCase):
def test_from_defaults(self):
if not interactive_testing_requested():
return
checkpoint_dir = os.environ["exp_dir"]
argv = [
f"exp_dir={checkpoint_dir}",
"n_eval_cameras=40",
"render_size=[64,64]",
"video_size=[256,256]",
]
visualize_reconstruction.main(argv)

View File

@@ -1,40 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
import contextlib
import logging
import os
import re
@contextlib.contextmanager
def intercept_logs(logger_name: str, regexp: str):
# Intercept logs that match a regexp, from a given logger.
intercepted_messages = []
logger = logging.getLogger(logger_name)
class LoggerInterceptor(logging.Filter):
def filter(self, record):
message = record.getMessage()
if re.search(regexp, message):
intercepted_messages.append(message)
return True
interceptor = LoggerInterceptor()
logger.addFilter(interceptor)
try:
yield intercepted_messages
finally:
logger.removeFilter(interceptor)
def interactive_testing_requested() -> bool:
"""
Certain tests are only useful when run interactively, and so are not regularly run.
These are activated by this funciton returning True, which the user requests by
setting the environment variable `PYTORCH3D_INTERACTIVE_TESTING` to 1.
"""
return os.environ.get("PYTORCH3D_INTERACTIVE_TESTING", "") == "1"

View File

@@ -5,68 +5,319 @@
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
"""
Script to visualize a previously trained model. Example call:
"""Script to visualize a previously trained model. Example call:
pytorch3d_implicitron_visualizer \
exp_dir='./exps/checkpoint_dir' visdom_show_preds=True visdom_port=8097 \
projects/implicitron_trainer/visualize_reconstruction.py
exp_dir='./exps/checkpoint_dir' visdom_show_preds=True visdom_port=8097
n_eval_cameras=40 render_size="[64,64]" video_size="[256,256]"
"""
import math
import os
import random
import sys
from typing import Optional, Tuple
import numpy as np
import torch
from omegaconf import DictConfig, OmegaConf
from pytorch3d.implicitron.models.visualization.render_flyaround import render_flyaround
from pytorch3d.implicitron.tools.config import enable_get_default_args, get_default_args
import torch.nn.functional as Fu
from omegaconf import OmegaConf
from pytorch3d.implicitron.dataset.data_source import ImplicitronDataSource
from pytorch3d.implicitron.dataset.dataset_base import DatasetBase, FrameData
from pytorch3d.implicitron.dataset.utils import is_train_frame
from pytorch3d.implicitron.models.base_model import EvaluationMode
from pytorch3d.implicitron.tools.configurable import get_default_args
from pytorch3d.implicitron.tools.eval_video_trajectory import (
generate_eval_video_cameras,
)
from pytorch3d.implicitron.tools.video_writer import VideoWriter
from pytorch3d.implicitron.tools.vis_utils import (
get_visdom_connection,
make_depth_image,
)
from tqdm import tqdm
from .experiment import Experiment
from .experiment import init_model
def visualize_reconstruction(
def render_sequence(
dataset: DatasetBase,
sequence_name: str,
model: torch.nn.Module,
video_path,
n_eval_cameras=40,
fps=20,
max_angle=2 * math.pi,
trajectory_type="circular_lsq_fit",
trajectory_scale=1.1,
scene_center=(0.0, 0.0, 0.0),
up=(0.0, -1.0, 0.0),
traj_offset=0.0,
n_source_views=9,
viz_env="debug",
visdom_show_preds=False,
visdom_server="http://127.0.0.1",
visdom_port=8097,
num_workers=10,
seed=None,
video_resize=None,
):
if seed is None:
seed = hash(sequence_name)
if visdom_show_preds:
viz = get_visdom_connection(server=visdom_server, port=visdom_port)
else:
viz = None
print(f"Loading all data of sequence '{sequence_name}'.")
seq_idx = list(dataset.sequence_indices_in_order(sequence_name))
train_data = _load_whole_dataset(dataset, seq_idx, num_workers=num_workers)
assert all(train_data.sequence_name[0] == sn for sn in train_data.sequence_name)
sequence_set_name = "train" if is_train_frame(train_data.frame_type)[0] else "test"
print(f"Sequence set = {sequence_set_name}.")
train_cameras = train_data.camera
time = torch.linspace(0, max_angle, n_eval_cameras + 1)[:n_eval_cameras]
test_cameras = generate_eval_video_cameras(
train_cameras,
time=time,
n_eval_cams=n_eval_cameras,
trajectory_type=trajectory_type,
trajectory_scale=trajectory_scale,
scene_center=scene_center,
up=up,
focal_length=None,
principal_point=torch.zeros(n_eval_cameras, 2),
traj_offset_canonical=(0.0, 0.0, traj_offset),
)
# sample the source views reproducibly
with torch.random.fork_rng():
torch.manual_seed(seed)
source_views_i = torch.randperm(len(seq_idx))[:n_source_views]
# add the first dummy view that will get replaced with the target camera
source_views_i = Fu.pad(source_views_i, [1, 0])
source_views = [seq_idx[i] for i in source_views_i.tolist()]
batch = _load_whole_dataset(dataset, source_views, num_workers=num_workers)
assert all(batch.sequence_name[0] == sn for sn in batch.sequence_name)
preds_total = []
for n in tqdm(range(n_eval_cameras), total=n_eval_cameras):
# set the first batch camera to the target camera
for k in ("R", "T", "focal_length", "principal_point"):
getattr(batch.camera, k)[0] = getattr(test_cameras[n], k)
# Move to cuda
net_input = batch.cuda()
with torch.no_grad():
preds = model(**{**net_input, "evaluation_mode": EvaluationMode.EVALUATION})
# make sure we dont overwrite something
assert all(k not in preds for k in net_input.keys())
preds.update(net_input) # merge everything into one big dict
# Render the predictions to images
rendered_pred = images_from_preds(preds)
preds_total.append(rendered_pred)
# show the preds every 5% of the export iterations
if visdom_show_preds and (
n % max(n_eval_cameras // 20, 1) == 0 or n == n_eval_cameras - 1
):
show_predictions(
preds_total,
sequence_name=batch.sequence_name[0],
viz=viz,
viz_env=viz_env,
)
print(f"Exporting videos for sequence {sequence_name} ...")
generate_prediction_videos(
preds_total,
sequence_name=batch.sequence_name[0],
viz=viz,
viz_env=viz_env,
fps=fps,
video_path=video_path,
resize=video_resize,
)
def _load_whole_dataset(dataset, idx, num_workers=10):
load_all_dataloader = torch.utils.data.DataLoader(
torch.utils.data.Subset(dataset, idx),
batch_size=len(idx),
num_workers=num_workers,
shuffle=False,
collate_fn=FrameData.collate,
)
return next(iter(load_all_dataloader))
def images_from_preds(preds):
imout = {}
for k in (
"image_rgb",
"images_render",
"fg_probability",
"masks_render",
"depths_render",
"depth_map",
"_all_source_images",
):
if k == "_all_source_images" and "image_rgb" in preds:
src_ims = preds["image_rgb"][1:].cpu().detach().clone()
v = _stack_images(src_ims, None)[None]
else:
if k not in preds or preds[k] is None:
print(f"cant show {k}")
continue
v = preds[k].cpu().detach().clone()
if k.startswith("depth"):
mask_resize = Fu.interpolate(
preds["masks_render"],
size=preds[k].shape[2:],
mode="nearest",
)
v = make_depth_image(preds[k], mask_resize)
if v.shape[1] == 1:
v = v.repeat(1, 3, 1, 1)
imout[k] = v.detach().cpu()
return imout
def _stack_images(ims, size):
ba = ims.shape[0]
H = int(np.ceil(np.sqrt(ba)))
W = H
n_add = H * W - ba
if n_add > 0:
ims = torch.cat((ims, torch.zeros_like(ims[:1]).repeat(n_add, 1, 1, 1)))
ims = ims.view(H, W, *ims.shape[1:])
cated = torch.cat([torch.cat(list(row), dim=2) for row in ims], dim=1)
if size is not None:
cated = Fu.interpolate(cated[None], size=size, mode="bilinear")[0]
return cated.clamp(0.0, 1.0)
def show_predictions(
preds,
sequence_name,
viz,
viz_env="visualizer",
predicted_keys=(
"images_render",
"masks_render",
"depths_render",
"_all_source_images",
),
n_samples=10,
one_image_width=200,
):
"""Given a list of predictions visualize them into a single image using visdom."""
assert isinstance(preds, list)
pred_all = []
# Randomly choose a subset of the rendered images, sort by ordr in the sequence
n_samples = min(n_samples, len(preds))
pred_idx = sorted(random.sample(list(range(len(preds))), n_samples))
for predi in pred_idx:
# Make the concatentation for the same camera vertically
pred_all.append(
torch.cat(
[
torch.nn.functional.interpolate(
preds[predi][k].cpu(),
scale_factor=one_image_width / preds[predi][k].shape[3],
mode="bilinear",
).clamp(0.0, 1.0)
for k in predicted_keys
],
dim=2,
)
)
# Concatenate the images horizontally
pred_all_cat = torch.cat(pred_all, dim=3)[0]
viz.image(
pred_all_cat,
win="show_predictions",
env=viz_env,
opts={"title": f"pred_{sequence_name}"},
)
def generate_prediction_videos(
preds,
sequence_name,
viz=None,
viz_env="visualizer",
predicted_keys=(
"images_render",
"masks_render",
"depths_render",
"_all_source_images",
),
fps=20,
video_path="/tmp/video",
resize=None,
):
"""Given a list of predictions create and visualize rotating videos of the
objects using visdom.
"""
assert isinstance(preds, list)
# make sure the target video directory exists
os.makedirs(os.path.dirname(video_path), exist_ok=True)
# init a video writer for each predicted key
vws = {}
for k in predicted_keys:
vws[k] = VideoWriter(out_path=f"{video_path}_{sequence_name}_{k}.mp4", fps=fps)
for rendered_pred in tqdm(preds):
for k in predicted_keys:
vws[k].write_frame(
rendered_pred[k][0].clip(0.0, 1.0).detach().cpu().numpy(),
resize=resize,
)
for k in predicted_keys:
vws[k].get_video(quiet=True)
print(f"Generated {vws[k].out_path}.")
if viz is not None:
viz.video(
videofile=vws[k].out_path,
env=viz_env,
win=k, # we reuse the same window otherwise visdom dies
opts={"title": sequence_name + " " + k},
)
def export_scenes(
exp_dir: str = "",
restrict_sequence_name: Optional[str] = None,
output_directory: Optional[str] = None,
render_size: Tuple[int, int] = (512, 512),
video_size: Optional[Tuple[int, int]] = None,
split: str = "train",
split: str = "train", # train | val | test
n_source_views: int = 9,
n_eval_cameras: int = 40,
visdom_server="http://127.0.0.1",
visdom_port=8097,
visdom_show_preds: bool = False,
visdom_server: str = "http://127.0.0.1",
visdom_port: int = 8097,
visdom_env: Optional[str] = None,
) -> None:
"""
Given an `exp_dir` containing a trained Implicitron model, generates videos consisting
of renderes of sequences from the dataset used to train and evaluate the trained
Implicitron model.
Args:
exp_dir: Implicitron experiment directory.
restrict_sequence_name: If set, defines the list of sequences to visualize.
output_directory: If set, defines a custom directory to output visualizations to.
render_size: The size (HxW) of the generated renders.
video_size: The size (HxW) of the output video.
split: The dataset split to use for visualization.
Can be "train" / "val" / "test".
n_source_views: The number of source views added to each rendered batch. These
views are required inputs for models such as NeRFormer / NeRF-WCE.
n_eval_cameras: The number of cameras each fly-around trajectory.
visdom_show_preds: If `True`, outputs visualizations to visdom.
visdom_server: The address of the visdom server.
visdom_port: The port of the visdom server.
visdom_env: If set, defines a custom name for the visdom environment.
"""
gpu_idx: int = 0,
):
# In case an output directory is specified use it. If no output_directory
# is specified create a vis folder inside the experiment directory
if output_directory is None:
output_directory = os.path.join(exp_dir, "vis")
os.makedirs(output_directory, exist_ok=True)
else:
output_directory = output_directory
if not os.path.exists(output_directory):
os.makedirs(output_directory)
# Set the random seeds
torch.manual_seed(0)
@@ -75,78 +326,67 @@ def visualize_reconstruction(
# Get the config from the experiment_directory,
# and overwrite relevant fields
config = _get_config_from_experiment_directory(exp_dir)
config.gpu_idx = gpu_idx
config.exp_dir = exp_dir
# important so that the CO3D dataset gets loaded in full
data_source_args = config.data_source_ImplicitronDataSource_args
if "dataset_map_provider_JsonIndexDatasetMapProvider_args" in data_source_args:
dataset_args = (
data_source_args.dataset_map_provider_JsonIndexDatasetMapProvider_args
)
dataset_args.test_on_train = False
if restrict_sequence_name is not None:
dataset_args.restrict_sequence_name = restrict_sequence_name
dataset_args = (
config.data_source_args.dataset_map_provider_JsonIndexDatasetMapProvider_args
)
dataset_args.test_on_train = False
# Set the rendering image size
model_factory_args = config.model_factory_ImplicitronModelFactory_args
model_factory_args.force_resume = True
model_args = model_factory_args.model_GenericModel_args
model_args.render_image_width = render_size[0]
model_args.render_image_height = render_size[1]
config.generic_model_args.render_image_width = render_size[0]
config.generic_model_args.render_image_height = render_size[1]
if restrict_sequence_name is not None:
dataset_args.restrict_sequence_name = restrict_sequence_name
# Set up the CUDA env for the visualization
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = str(config.gpu_idx)
# Load the previously trained model
experiment = Experiment(**config)
model = experiment.model_factory(exp_dir=exp_dir)
device = torch.device("cuda")
model.to(device)
model, _, _ = init_model(cfg=config, force_load=True, load_model_only=True)
model.cuda()
model.eval()
# Setup the dataset
data_source = experiment.data_source
dataset_map, _ = data_source.get_datasets_and_dataloaders()
datasource = ImplicitronDataSource(**config.data_source_args)
dataset_map = datasource.dataset_map_provider.get_dataset_map()
dataset = dataset_map[split]
if dataset is None:
raise ValueError(f"{split} dataset not provided")
if visdom_env is None:
visdom_env = (
"visualizer_" + config.training_loop_ImplicitronTrainingLoop_args.visdom_env
)
# iterate over the sequences in the dataset
for sequence_name in dataset.sequence_names():
with torch.no_grad():
render_flyaround(
dataset=dataset,
sequence_name=sequence_name,
model=model,
output_video_path=os.path.join(output_directory, "video"),
render_sequence(
dataset,
sequence_name,
model,
video_path="{}/video".format(output_directory),
n_source_views=n_source_views,
visdom_show_preds=visdom_show_preds,
n_flyaround_poses=n_eval_cameras,
n_eval_cameras=n_eval_cameras,
visdom_server=visdom_server,
visdom_port=visdom_port,
visdom_environment=visdom_env,
viz_env=f"visualizer_{config.visdom_env}"
if visdom_env is None
else visdom_env,
video_resize=video_size,
device=device,
)
enable_get_default_args(visualize_reconstruction)
def _get_config_from_experiment_directory(experiment_directory) -> DictConfig:
def _get_config_from_experiment_directory(experiment_directory):
cfg_file = os.path.join(experiment_directory, "expconfig.yaml")
config = OmegaConf.load(cfg_file)
# pyre-ignore[7]
return config
def main(argv) -> None:
# automatically parses arguments of visualize_reconstruction
cfg = OmegaConf.create(get_default_args(visualize_reconstruction))
cfg.update(OmegaConf.from_cli(argv))
def main(argv):
# automatically parses arguments of export_scenes
cfg = OmegaConf.create(get_default_args(export_scenes))
cfg.update(OmegaConf.from_cli())
with torch.no_grad():
visualize_reconstruction(**cfg)
export_scenes(**cfg)
if __name__ == "__main__":

View File

@@ -87,7 +87,7 @@ def generate_eval_video_cameras(
plane_normal = torch.FloatTensor(up)
else:
cov = (cam_centers_c.t() @ cam_centers_c) / cam_centers_c.shape[0]
_, e_vec = torch.linalg.eigh(cov, UPLO="U")
_, e_vec = torch.symeig(cov, eigenvectors=True)
plane_normal = e_vec[:, 0]
plane_dist = (plane_normal[None] * cam_centers_c).sum(dim=-1)
@@ -96,7 +96,7 @@ def generate_eval_video_cameras(
cov = (
cam_centers_on_plane.t() @ cam_centers_on_plane
) / cam_centers_on_plane.shape[0]
_, e_vec = torch.linalg.eigh(cov, UPLO="U")
_, e_vec = torch.symeig(cov, eigenvectors=True)
traj_radius = (cam_centers_on_plane**2).sum(dim=1).sqrt().mean()
angle = torch.linspace(0, 2.0 * math.pi, n_eval_cams)
traj = traj_radius * torch.stack(

View File

@@ -26,7 +26,7 @@ class NeuralRadianceField(torch.nn.Module):
n_hidden_neurons_xyz: int = 256,
n_hidden_neurons_dir: int = 128,
n_layers_xyz: int = 8,
append_xyz: Tuple[int, ...] = (5,),
append_xyz: Tuple[int] = (5,),
use_multiple_streams: bool = True,
**kwargs,
):
@@ -253,7 +253,7 @@ class MLPWithInputSkips(torch.nn.Module):
output_dim: int,
skip_dim: int,
hidden_dim: int,
input_skips: Tuple[int, ...] = (),
input_skips: Tuple[int] = (),
):
"""
Args:

View File

@@ -67,7 +67,7 @@ class RadianceFieldRenderer(torch.nn.Module):
n_hidden_neurons_xyz: int = 256,
n_hidden_neurons_dir: int = 128,
n_layers_xyz: int = 8,
append_xyz: Tuple[int, ...] = (5,),
append_xyz: Tuple[int] = (5,),
density_noise_std: float = 0.0,
visualization: bool = False,
):

View File

@@ -4,4 +4,4 @@
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
__version__ = "0.7.1"
__version__ = "0.6.2"

View File

@@ -10,10 +10,57 @@ import torch
"""
Some functions which depend on PyTorch or Python versions.
Some functions which depend on PyTorch versions.
"""
def solve(A: torch.Tensor, B: torch.Tensor) -> torch.Tensor: # pragma: no cover
"""
Like torch.linalg.solve, tries to return X
such that AX=B, with A square.
"""
if hasattr(torch, "linalg") and hasattr(torch.linalg, "solve"):
# PyTorch version >= 1.8.0
return torch.linalg.solve(A, B)
# pyre-fixme[16]: `Tuple` has no attribute `solution`.
return torch.solve(B, A).solution
def lstsq(A: torch.Tensor, B: torch.Tensor) -> torch.Tensor: # pragma: no cover
"""
Like torch.linalg.lstsq, tries to return X
such that AX=B.
"""
if hasattr(torch, "linalg") and hasattr(torch.linalg, "lstsq"):
# PyTorch version >= 1.9
return torch.linalg.lstsq(A, B).solution
solution = torch.lstsq(B, A).solution
if A.shape[1] < A.shape[0]:
return solution[: A.shape[1]]
return solution
def qr(A: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: # pragma: no cover
"""
Like torch.linalg.qr.
"""
if hasattr(torch, "linalg") and hasattr(torch.linalg, "qr"):
# PyTorch version >= 1.9
return torch.linalg.qr(A)
return torch.qr(A)
def eigh(A: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]: # pragma: no cover
"""
Like torch.linalg.eigh, assuming the argument is a symmetric real matrix.
"""
if hasattr(torch, "linalg") and hasattr(torch.linalg, "eigh"):
return torch.linalg.eigh(A)
return torch.symeig(A, eigenvectors=True)
def meshgrid_ij(
*A: Union[torch.Tensor, Sequence[torch.Tensor]]
) -> Tuple[torch.Tensor, ...]: # pragma: no cover
@@ -32,12 +79,3 @@ def meshgrid_ij(
# pyre-fixme[6]: For 1st param expected `Union[List[Tensor], Tensor]` but got
# `Union[Sequence[Tensor], Tensor]`.
return torch.meshgrid(*A)
def prod(iterable, *, start=1):
"""
Like math.prod in Python 3.8 and later.
"""
for i in iterable:
start *= i
return start

View File

@@ -22,7 +22,6 @@
#include "interp_face_attrs/interp_face_attrs.h"
#include "iou_box3d/iou_box3d.h"
#include "knn/knn.h"
#include "marching_cubes/marching_cubes.h"
#include "mesh_normal_consistency/mesh_normal_consistency.h"
#include "packed_to_padded_tensor/packed_to_padded_tensor.h"
#include "point_mesh/point_mesh_cuda.h"
@@ -95,9 +94,6 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
// 3D IoU
m.def("iou_box3d", &IoUBox3D);
// Marching cubes
m.def("marching_cubes", &MarchingCubes);
// Pulsar.
#ifdef PULSAR_LOGGING_ENABLED
c10::ShowLogInfoToStderr();

View File

@@ -29,17 +29,14 @@ __global__ void IoUBox3DKernel(
const size_t tid = blockIdx.x * blockDim.x + threadIdx.x;
const size_t stride = gridDim.x * blockDim.x;
FaceVerts box1_tris[NUM_TRIS];
FaceVerts box2_tris[NUM_TRIS];
FaceVerts box1_planes[NUM_PLANES];
FaceVerts box2_planes[NUM_PLANES];
for (size_t i = tid; i < N * M; i += stride) {
const size_t n = i / M; // box1 index
const size_t m = i % M; // box2 index
// Convert to array of structs of face vertices i.e. effectively (F, 3, 3)
// FaceVerts is a data type defined in iou_utils.cuh
FaceVerts box1_tris[NUM_TRIS];
FaceVerts box2_tris[NUM_TRIS];
GetBoxTris(boxes1[n], box1_tris);
GetBoxTris(boxes2[m], box2_tris);
@@ -49,7 +46,9 @@ __global__ void IoUBox3DKernel(
const float3 box2_center = BoxCenter(boxes2[m]);
// Convert to an array of face vertices
FaceVerts box1_planes[NUM_PLANES];
GetBoxPlanes(boxes1[n], box1_planes);
FaceVerts box2_planes[NUM_PLANES];
GetBoxPlanes(boxes2[m], box2_planes);
// Get Box Volumes
@@ -89,9 +88,9 @@ __global__ void IoUBox3DKernel(
for (int b1 = 0; b1 < box1_count; ++b1) {
for (int b2 = 0; b2 < box2_count; ++b2) {
const bool is_coplanar =
IsCoplanarTriTri(box1_intersect[b1], box2_intersect[b2]);
IsCoplanarFace(box1_intersect[b1], box2_intersect[b2]);
const float area = FaceArea(box1_intersect[b1]);
if ((is_coplanar) && (area > aEpsilon)) {
if ((is_coplanar) && (area > kEpsilon)) {
tri2_keep[b2].keep = false;
}
}

View File

@@ -80,9 +80,9 @@ std::tuple<at::Tensor, at::Tensor> IoUBox3DCpu(
for (int b1 = 0; b1 < box1_intersect.size(); ++b1) {
for (int b2 = 0; b2 < box2_intersect.size(); ++b2) {
const bool is_coplanar =
IsCoplanarTriTri(box1_intersect[b1], box2_intersect[b2]);
IsCoplanarFace(box1_intersect[b1], box2_intersect[b2]);
const float area = FaceArea(box1_intersect[b1]);
if ((is_coplanar) && (area > aEpsilon)) {
if ((is_coplanar) && (area > kEpsilon)) {
tri2_keep[b2] = 0;
}
}

View File

@@ -12,15 +12,7 @@
#include <cstdio>
#include "utils/float_math.cuh"
// dEpsilon: Used in dot products and is used to assess whether two unit vectors
// are orthogonal (or coplanar). It's an epsilon on cos(θ).
// With dEpsilon = 0.001, two unit vectors are considered co-planar
// if their θ = 2.5 deg.
__constant__ const float dEpsilon = 1e-3;
// aEpsilon: Used once in main function to check for small face areas
__constant__ const float aEpsilon = 1e-4;
// kEpsilon: Used only for norm(u) = u/max(||u||, kEpsilon)
__constant__ const float kEpsilon = 1e-8;
__constant__ const float kEpsilon = 1e-5;
/*
_PLANES and _TRIS define the 4- and 3-connectivity
@@ -39,25 +31,25 @@ const int MAX_TRIS = 100;
// We will use struct arrays for representing
// the data for each box and intersecting
// triangles
struct FaceVerts {
typedef struct {
float3 v0;
float3 v1;
float3 v2;
float3 v3; // Can be empty for triangles
};
} FaceVerts;
struct FaceVertsIdx {
typedef struct {
int v0;
int v1;
int v2;
int v3; // Can be empty for triangles
};
} FaceVertsIdx;
// This is used when deciding which faces to
// keep that are not coplanar
struct Keep {
typedef struct {
bool keep;
};
} Keep;
__device__ FaceVertsIdx _PLANES[] = {
{0, 1, 2, 3},
@@ -128,66 +120,24 @@ __device__ inline void GetBoxPlanes(
}
}
// The geometric center of a list of vertices.
// The normal of the face defined by vertices (v0, v1, v2)
// Define e0 to be the edge connecting (v1, v0)
// Define e1 to be the edge connecting (v2, v0)
// normal is the cross product of e0, e1
//
// Args
// vertices: A list of float3 vertices {v0, ..., vN}.
// v0, v1, v2: float3 coordinates of the vertices of the face
//
// Returns
// float3: Geometric center of the vertices.
// float3: normal for the face
//
__device__ inline float3 FaceCenter(
std::initializer_list<const float3> vertices) {
auto sumVertices = float3{};
for (const auto& vertex : vertices) {
sumVertices = sumVertices + vertex;
}
return sumVertices / vertices.size();
}
// The normal of a plane spanned by vectors e0 and e1
//
// Args
// e0, e1: float3 vectors defining a plane
//
// Returns
// float3: normal of the plane
//
__device__ inline float3 GetNormal(const float3 e0, const float3 e1) {
float3 n = cross(e0, e1);
n = n / std::fmaxf(norm(n), kEpsilon);
__device__ inline float3
FaceNormal(const float3 v0, const float3 v1, const float3 v2) {
float3 n = cross(v1 - v0, v2 - v0);
n = n / fmaxf(norm(n), kEpsilon);
return n;
}
// The normal of a face with vertices (v0, v1, v2) or (v0, ..., v3).
// We find the "best" edges connecting the face center to the vertices,
// such that the cross product between the edges is maximized.
//
// Args
// vertices: a list of float3 coordinates of the vertices.
//
// Returns
// float3: center of the plane
//
__device__ inline float3 FaceNormal(
std::initializer_list<const float3> vertices) {
const auto faceCenter = FaceCenter(vertices);
auto normal = float3();
auto maxDist = -1;
for (auto v1 = vertices.begin(); v1 != vertices.end() - 1; ++v1) {
for (auto v2 = v1 + 1; v2 != vertices.end(); ++v2) {
const auto v1ToCenter = *v1 - faceCenter;
const auto v2ToCenter = *v2 - faceCenter;
const auto dist = norm(cross(v1ToCenter, v2ToCenter));
if (dist > maxDist) {
normal = GetNormal(v1ToCenter, v2ToCenter);
maxDist = dist;
}
}
}
return normal;
}
// The area of the face defined by vertices (v0, v1, v2)
// Define e0 to be the edge connecting (v1, v0)
// Define e1 to be the edge connecting (v2, v0)
@@ -201,13 +151,15 @@ __device__ inline float3 FaceNormal(
//
__device__ inline float FaceArea(const FaceVerts& tri) {
// Get verts for face 1
const float3 n = cross(tri.v1 - tri.v0, tri.v2 - tri.v0);
const float3 v0 = tri.v0;
const float3 v1 = tri.v1;
const float3 v2 = tri.v2;
const float3 n = cross(v1 - v0, v2 - v0);
return norm(n) / 2.0;
}
// The normal of a box plane defined by the verts in `plane` such that it
// points toward the centroid of the box given by `center`.
//
// The normal of a box plane defined by the verts in `plane` with
// the centroid of the box given by `center`.
// Args
// plane: float3 coordinates of the vertices of the plane
// center: float3 coordinates of the center of the box from
@@ -221,25 +173,23 @@ template <typename FaceVertsPlane>
__device__ inline float3 PlaneNormalDirection(
const FaceVertsPlane& plane,
const float3& center) {
// The plane's center
const float3 plane_center =
FaceCenter({plane.v0, plane.v1, plane.v2, plane.v3});
// Only need the first 3 verts of the plane
const float3 v0 = plane.v0;
const float3 v1 = plane.v1;
const float3 v2 = plane.v2;
// The plane's normal
float3 n = FaceNormal({plane.v0, plane.v1, plane.v2, plane.v3});
// We project the center on the plane defined by (v0, v1, v2, v3)
// We can write center = plane_center + a * e0 + b * e1 + c * n
// We project the center on the plane defined by (v0, v1, v2)
// We can write center = v0 + a * e0 + b * e1 + c * n
// We know that <e0, n> = 0 and <e1, n> = 0 and
// <a, b> is the dot product between a and b.
// This means we can solve for c as:
// c = <center - plane_center - a * e0 - b * e1, n>
// = <center - plane_center, n>
const float c = dot((center - plane_center), n);
// c = <center - v0 - a * e0 - b * e1, n> = <center - v0, n>
float3 n = FaceNormal(v0, v1, v2);
const float c = dot((center - v0), n);
// If c is negative, then we revert the direction of n such that n
// points "inside"
if (c < 0.0f) {
if (c < kEpsilon) {
n = -1.0f * n;
}
@@ -368,16 +318,16 @@ __device__ inline float3 PolyhedronCenter(
//
__device__ inline bool
IsInside(const FaceVerts& plane, const float3& normal, const float3& point) {
// The center of the plane
const float3 plane_ctr = FaceCenter({plane.v0, plane.v1, plane.v2, plane.v3});
// Get one vert of the plane
const float3 v0 = plane.v0;
// Every point p can be written as p = plane_ctr + a e0 + b e1 + c n
// Every point p can be written as p = v0 + a e0 + b e1 + c n
// Solving for c:
// c = (point - plane_ctr - a * e0 - b * e1).dot(n)
// c = (point - v0 - a * e0 - b * e1).dot(n)
// We know that <e0, n> = 0 and <e1, n> = 0
// So the calculation can be simplified as:
const float c = dot((point - plane_ctr), normal);
const bool inside = c >= 0.0f;
const float c = dot((point - v0), normal);
const bool inside = c > -1.0f * kEpsilon;
return inside;
}
@@ -398,129 +348,20 @@ __device__ inline float3 PlaneEdgeIntersection(
const float3& normal,
const float3& p0,
const float3& p1) {
// The center of the plane
const float3 plane_ctr = FaceCenter({plane.v0, plane.v1, plane.v2, plane.v3});
// Get one vert of the plane
const float3 v0 = plane.v0;
// The point of intersection can be parametrized
// p = p0 + a (p1 - p0) where a in [0, 1]
// We want to find a such that p is on plane
// <p - plane_ctr, n> = 0
float3 direc = p1 - p0;
direc = direc / fmaxf(norm(direc), kEpsilon);
float3 p = (p1 + p0) / 2.0f;
if (abs(dot(direc, normal)) >= dEpsilon) {
const float top = -1.0f * dot(p0 - plane_ctr, normal);
const float bot = dot(p1 - p0, normal);
const float a = top / bot;
p = p0 + a * (p1 - p0);
}
// <p - v0, n> = 0
const float top = dot(-1.0f * (p0 - v0), normal);
const float bot = dot(p1 - p0, normal);
const float a = top / bot;
const float3 p = p0 + a * (p1 - p0);
return p;
}
// Compute the most distant points between two sets of vertices
//
// Args
// verts1, verts2: list of float3 defining the list of vertices
//
// Returns
// v1m, v2m: float3 vectors of the most distant points
// in verts1 and verts2 respectively
//
__device__ inline std::tuple<float3, float3> ArgMaxVerts(
std::initializer_list<float3> verts1,
std::initializer_list<float3> verts2) {
auto v1m = float3();
auto v2m = float3();
float maxdist = -1.0f;
for (const auto& v1 : verts1) {
for (const auto& v2 : verts2) {
if (norm(v1 - v2) > maxdist) {
v1m = v1;
v2m = v2;
maxdist = norm(v1 - v2);
}
}
}
return std::make_tuple(v1m, v2m);
}
// Compute a boolean indicator for whether or not two faces
// are coplanar
//
// Args
// tri1, tri2: FaceVerts struct of the vertex coordinates of
// the triangle face
//
// Returns
// bool: whether or not the two faces are coplanar
//
__device__ inline bool IsCoplanarTriTri(
const FaceVerts& tri1,
const FaceVerts& tri2) {
const float3 tri1_ctr = FaceCenter({tri1.v0, tri1.v1, tri1.v2});
const float3 tri1_n = FaceNormal({tri1.v0, tri1.v1, tri1.v2});
const float3 tri2_ctr = FaceCenter({tri2.v0, tri2.v1, tri2.v2});
const float3 tri2_n = FaceNormal({tri2.v0, tri2.v1, tri2.v2});
// Check if parallel
const bool check1 = abs(dot(tri1_n, tri2_n)) > 1 - dEpsilon;
// Compute most distant points
const auto v1mAndv2m =
ArgMaxVerts({tri1.v0, tri1.v1, tri1.v2}, {tri2.v0, tri2.v1, tri2.v2});
const auto v1m = std::get<0>(v1mAndv2m);
const auto v2m = std::get<1>(v1mAndv2m);
float3 n12m = v1m - v2m;
n12m = n12m / fmaxf(norm(n12m), kEpsilon);
const bool check2 = (abs(dot(n12m, tri1_n)) < dEpsilon) ||
(abs(dot(n12m, tri2_n)) < dEpsilon);
return (check1 && check2);
}
// Compute a boolean indicator for whether or not a triangular and a planar
// face are coplanar
//
// Args
// tri, plane: FaceVerts struct of the vertex coordinates of
// the triangle and planar face
// normal: the normal direction of the plane pointing "inside"
//
// Returns
// bool: whether or not the two faces are coplanar
//
__device__ inline bool IsCoplanarTriPlane(
const FaceVerts& tri,
const FaceVerts& plane,
const float3& normal) {
const float3 tri_ctr = FaceCenter({tri.v0, tri.v1, tri.v2});
const float3 nt = FaceNormal({tri.v0, tri.v1, tri.v2});
// check if parallel
const bool check1 = abs(dot(nt, normal)) > 1 - dEpsilon;
// Compute most distant points
const auto v1mAndv2m = ArgMaxVerts(
{tri.v0, tri.v1, tri.v2}, {plane.v0, plane.v1, plane.v2, plane.v3});
const auto v1m = std::get<0>(v1mAndv2m);
const auto v2m = std::get<1>(v1mAndv2m);
float3 n12m = v1m - v2m;
n12m = n12m / fmaxf(norm(n12m), kEpsilon);
const bool check2 = abs(dot(n12m, normal)) < dEpsilon;
return (check1 && check2);
}
// Triangle is clipped into a quadrilateral
// based on the intersection points with the plane.
// Then the quadrilateral is divided into two triangles.
@@ -636,14 +477,6 @@ __device__ inline int ClipTriByPlane(
const bool isin1 = IsInside(plane, normal, v1);
const bool isin2 = IsInside(plane, normal, v2);
// Check coplanar
const bool iscoplanar = IsCoplanarTriPlane(tri, plane, normal);
if (iscoplanar) {
// Return input vertices
face_verts_out[0] = {v0, v1, v2};
return 1;
}
// All in
if (isin0 && isin1 && isin2) {
// Return input vertices
@@ -682,6 +515,40 @@ __device__ inline int ClipTriByPlane(
return 0;
}
// Compute a boolean indicator for whether or not two faces
// are coplanar
//
// Args
// tri1, tri2: FaceVerts struct of the vertex coordinates of
// the triangle face
//
// Returns
// bool: whether or not the two faces are coplanar
//
__device__ inline bool IsCoplanarFace(
const FaceVerts& tri1,
const FaceVerts& tri2) {
// Get verts for face 1
const float3 v0 = tri1.v0;
const float3 v1 = tri1.v1;
const float3 v2 = tri1.v2;
const float3 n1 = FaceNormal(v0, v1, v2);
int coplanar_count = 0;
// Check v0, v1, v2
if (abs(dot(tri2.v0 - v0, n1)) < kEpsilon) {
coplanar_count++;
}
if (abs(dot(tri2.v1 - v0, n1)) < kEpsilon) {
coplanar_count++;
}
if (abs(dot(tri2.v2 - v0, n1)) < kEpsilon) {
coplanar_count++;
}
return (coplanar_count == 3);
}
// Get the triangles from each box which are part of the
// intersecting polyhedron by computing the intersection
// points with each of the planes.

View File

@@ -18,15 +18,7 @@
#include <type_traits>
#include "utils/vec3.h"
// dEpsilon: Used in dot products and is used to assess whether two unit vectors
// are orthogonal (or coplanar). It's an epsilon on cos(θ).
// With dEpsilon = 0.001, two unit vectors are considered co-planar
// if their θ = 2.5 deg.
const auto dEpsilon = 1e-3;
// aEpsilon: Used once in main function to check for small face areas
const auto aEpsilon = 1e-4;
// kEpsilon: Used only for norm(u) = u/max(||u||, kEpsilon)
const auto kEpsilon = 1e-8;
const auto kEpsilon = 1e-5;
/*
_PLANES and _TRIS define the 4- and 3-connectivity
@@ -137,108 +129,20 @@ inline face_verts GetBoxPlanes(const Box& box) {
return box_planes;
}
// The normal of a plane spanned by vectors e0 and e1
// The normal of the face defined by vertices (v0, v1, v2)
// Define e0 to be the edge connecting (v1, v0)
// Define e1 to be the edge connecting (v2, v0)
// normal is the cross product of e0, e1
//
// Args
// e0, e1: vec3 vectors defining a plane
//
// Returns
// vec3: normal of the plane
//
inline vec3<float> GetNormal(const vec3<float> e0, const vec3<float> e1) {
vec3<float> n = cross(e0, e1);
n = n / std::fmaxf(norm(n), kEpsilon);
return n;
}
// The center of a triangle tri
//
// Args
// tri: vec3 coordinates of the vertices of the triangle
//
// Returns
// vec3: center of the triangle
//
inline vec3<float> TriCenter(const std::vector<vec3<float>>& tri) {
// Vertices of the triangle
const vec3<float> v0 = tri[0];
const vec3<float> v1 = tri[1];
const vec3<float> v2 = tri[2];
return (v0 + v1 + v2) / 3.0f;
}
// The normal of the triangle defined by vertices (v0, v1, v2)
// We find the "best" edges connecting the face center to the vertices,
// such that the cross product between the edges is maximized.
//
// Args
// tri: vec3 coordinates of the vertices of the face
// v0, v1, v2: vec3 coordinates of the vertices of the face
//
// Returns
// vec3: normal for the face
//
inline vec3<float> TriNormal(const std::vector<vec3<float>>& tri) {
// Get center of triangle
const vec3<float> ctr = TriCenter(tri);
// find the "best" normal as cross product of edges from center
float max_dist = -1.0f;
vec3<float> n = {0.0f, 0.0f, 0.0f};
for (int i = 0; i < 2; ++i) {
for (int j = i + 1; j < 3; ++j) {
const float dist = norm(cross(tri[i] - ctr, tri[j] - ctr));
if (dist > max_dist) {
n = GetNormal(tri[i] - ctr, tri[j] - ctr);
}
}
}
return n;
}
// The center of a plane
//
// Args
// plane: vec3 coordinates of the vertices of the plane
//
// Returns
// vec3: center of the plane
//
inline vec3<float> PlaneCenter(const std::vector<vec3<float>>& plane) {
// Vertices of the plane
const vec3<float> v0 = plane[0];
const vec3<float> v1 = plane[1];
const vec3<float> v2 = plane[2];
const vec3<float> v3 = plane[3];
return (v0 + v1 + v2 + v3) / 4.0f;
}
// The normal of a planar face with vertices (v0, v1, v2, v3)
// We find the "best" edges connecting the face center to the vertices,
// such that the cross product between the edges is maximized.
//
// Args
// plane: vec3 coordinates of the vertices of the planar face
//
// Returns
// vec3: normal of the planar face
//
inline vec3<float> PlaneNormal(const std::vector<vec3<float>>& plane) {
// Get center of planar face
vec3<float> ctr = PlaneCenter(plane);
// find the "best" normal as cross product of edges from center
float max_dist = -1.0f;
vec3<float> n = {0.0f, 0.0f, 0.0f};
for (int i = 0; i < 3; ++i) {
for (int j = i + 1; j < 4; ++j) {
const float dist = norm(cross(plane[i] - ctr, plane[j] - ctr));
if (dist > max_dist) {
n = GetNormal(plane[i] - ctr, plane[j] - ctr);
}
}
}
inline vec3<float> FaceNormal(vec3<float> v0, vec3<float> v1, vec3<float> v2) {
vec3<float> n = cross(v1 - v0, v2 - v0);
n = n / std::fmaxf(norm(n), kEpsilon);
return n;
}
@@ -262,9 +166,8 @@ inline float FaceArea(const std::vector<vec3<float>>& tri) {
return norm(n) / 2.0;
}
// The normal of a box plane defined by the verts in `plane` such that it
// points toward the centroid of the box given by `center`.
//
// The normal of a box plane defined by the verts in `plane` with
// the centroid of the box given by `center`.
// Args
// plane: vec3 coordinates of the vertices of the plane
// center: vec3 coordinates of the center of the box from
@@ -277,22 +180,23 @@ inline float FaceArea(const std::vector<vec3<float>>& tri) {
inline vec3<float> PlaneNormalDirection(
const std::vector<vec3<float>>& plane,
const vec3<float>& center) {
// The plane's center & normal
const vec3<float> plane_center = PlaneCenter(plane);
vec3<float> n = PlaneNormal(plane);
// Only need the first 3 verts of the plane
const vec3<float> v0 = plane[0];
const vec3<float> v1 = plane[1];
const vec3<float> v2 = plane[2];
// We project the center on the plane defined by (v0, v1, v2, v3)
// We can write center = plane_center + a * e0 + b * e1 + c * n
// We project the center on the plane defined by (v0, v1, v2)
// We can write center = v0 + a * e0 + b * e1 + c * n
// We know that <e0, n> = 0 and <e1, n> = 0 and
// <a, b> is the dot product between a and b.
// This means we can solve for c as:
// c = <center - plane_center - a * e0 - b * e1, n>
// = <center - plane_center, n>
const float c = dot((center - plane_center), n);
// c = <center - v0 - a * e0 - b * e1, n> = <center - v0, n>
vec3<float> n = FaceNormal(v0, v1, v2);
const float c = dot((center - v0), n);
// If c is negative, then we revert the direction of n such that n
// points "inside"
if (c < 0.0f) {
if (c < kEpsilon) {
n = -1.0f * n;
}
@@ -404,16 +308,16 @@ inline bool IsInside(
const std::vector<vec3<float>>& plane,
const vec3<float>& normal,
const vec3<float>& point) {
// The center of the plane
const vec3<float> plane_ctr = PlaneCenter(plane);
// Get one vert of the plane
const vec3<float> v0 = plane[0];
// Every point p can be written as p = plane_ctr + a e0 + b e1 + c n
// Every point p can be written as p = v0 + a e0 + b e1 + c n
// Solving for c:
// c = (point - plane_ctr - a * e0 - b * e1).dot(n)
// c = (point - v0 - a * e0 - b * e1).dot(n)
// We know that <e0, n> = 0 and <e1, n> = 0
// So the calculation can be simplified as:
const float c = dot((point - plane_ctr), normal);
const bool inside = c >= 0.0f;
const float c = dot((point - v0), normal);
const bool inside = c > -1.0f * kEpsilon;
return inside;
}
@@ -434,126 +338,20 @@ inline vec3<float> PlaneEdgeIntersection(
const vec3<float>& normal,
const vec3<float>& p0,
const vec3<float>& p1) {
// The center of the plane
const vec3<float> plane_ctr = PlaneCenter(plane);
// Get one vert of the plane
const vec3<float> v0 = plane[0];
// The point of intersection can be parametrized
// p = p0 + a (p1 - p0) where a in [0, 1]
// We want to find a such that p is on plane
// <p - ctr, n> = 0
vec3<float> direc = p1 - p0;
direc = direc / std::fmaxf(norm(direc), kEpsilon);
vec3<float> p = (p1 + p0) / 2.0f;
if (std::abs(dot(direc, normal)) >= dEpsilon) {
const float top = -1.0f * dot(p0 - plane_ctr, normal);
const float bot = dot(p1 - p0, normal);
const float a = top / bot;
p = p0 + a * (p1 - p0);
}
// <p - v0, n> = 0
const float top = dot(-1.0f * (p0 - v0), normal);
const float bot = dot(p1 - p0, normal);
const float a = top / bot;
const vec3<float> p = p0 + a * (p1 - p0);
return p;
}
// Compute the most distant points between two sets of vertices
//
// Args
// verts1, verts2: vec3 defining the list of vertices
//
// Returns
// v1m, v2m: vec3 vectors of the most distant points
// in verts1 and verts2 respectively
//
inline std::tuple<vec3<float>, vec3<float>> ArgMaxVerts(
const std::vector<vec3<float>>& verts1,
const std::vector<vec3<float>>& verts2) {
vec3<float> v1m = {0.0f, 0.0f, 0.0f};
vec3<float> v2m = {0.0f, 0.0f, 0.0f};
float maxdist = -1.0f;
for (const auto& v1 : verts1) {
for (const auto& v2 : verts2) {
if (norm(v1 - v2) > maxdist) {
v1m = v1;
v2m = v2;
maxdist = norm(v1 - v2);
}
}
}
return std::make_tuple(v1m, v2m);
}
// Compute a boolean indicator for whether or not two faces
// are coplanar
//
// Args
// tri1, tri2: std:vector<vec3> of the vertex coordinates of
// triangle faces
//
// Returns
// bool: whether or not the two faces are coplanar
//
inline bool IsCoplanarTriTri(
const std::vector<vec3<float>>& tri1,
const std::vector<vec3<float>>& tri2) {
// Get normal for tri 1
const vec3<float> n1 = TriNormal(tri1);
// Get normal for tri 2
const vec3<float> n2 = TriNormal(tri2);
// Check if parallel
const bool check1 = std::abs(dot(n1, n2)) > 1 - dEpsilon;
// Compute most distant points
auto argvs = ArgMaxVerts(tri1, tri2);
const auto v1m = std::get<0>(argvs);
const auto v2m = std::get<1>(argvs);
vec3<float> n12m = v1m - v2m;
n12m = n12m / std::fmaxf(norm(n12m), kEpsilon);
const bool check2 = (std::abs(dot(n12m, n1)) < dEpsilon) ||
(std::abs(dot(n12m, n2)) < dEpsilon);
return (check1 && check2);
}
// Compute a boolean indicator for whether or not a triangular and a planar
// face are coplanar
//
// Args
// tri, plane: std:vector<vec3> of the vertex coordinates of
// triangular face and planar face
// normal: the normal direction of the plane pointing "inside"
//
// Returns
// bool: whether or not the two faces are coplanar
//
inline bool IsCoplanarTriPlane(
const std::vector<vec3<float>>& tri,
const std::vector<vec3<float>>& plane,
const vec3<float>& normal) {
// Get normal for tri
const vec3<float> nt = TriNormal(tri);
// check if parallel
const bool check1 = std::abs(dot(nt, normal)) > 1 - dEpsilon;
// Compute most distant points
auto argvs = ArgMaxVerts(tri, plane);
const auto v1m = std::get<0>(argvs);
const auto v2m = std::get<1>(argvs);
vec3<float> n12m = v1m - v2m;
n12m = n12m / std::fmaxf(norm(n12m), kEpsilon);
const bool check2 = std::abs(dot(n12m, normal)) < dEpsilon;
return (check1 && check2);
}
// Triangle is clipped into a quadrilateral
// based on the intersection points with the plane.
// Then the quadrilateral is divided into two triangles.
@@ -638,14 +436,6 @@ inline face_verts ClipTriByPlane(
const vec3<float> v1 = tri[1];
const vec3<float> v2 = tri[2];
// Check coplanar
const bool iscoplanar = IsCoplanarTriPlane(tri, plane, normal);
if (iscoplanar) {
// Return input vertices
face_verts tris = {{v0, v1, v2}};
return tris;
}
// Check each of the triangle vertices to see if it is inside the plane
const bool isin0 = IsInside(plane, normal, v0);
const bool isin1 = IsInside(plane, normal, v1);
@@ -690,6 +480,35 @@ inline face_verts ClipTriByPlane(
return empty_tris;
}
// Compute a boolean indicator for whether or not two faces
// are coplanar
//
// Args
// tri1, tri2: std:vector<vec3> of the vertex coordinates of
// triangle faces
//
// Returns
// bool: whether or not the two faces are coplanar
//
inline bool IsCoplanarFace(
const std::vector<vec3<float>>& tri1,
const std::vector<vec3<float>>& tri2) {
// Get verts for face 1
const vec3<float> v0 = tri1[0];
const vec3<float> v1 = tri1[1];
const vec3<float> v2 = tri1[2];
const vec3<float> n1 = FaceNormal(v0, v1, v2);
int coplanar_count = 0;
for (int i = 0; i < 3; ++i) {
float d = std::abs(dot(tri2[i] - v0, n1));
if (d < kEpsilon) {
coplanar_count = coplanar_count + 1;
}
}
return (coplanar_count == 3);
}
// Get the triangles from each box which are part of the
// intersecting polyhedron by computing the intersection
// points with each of the planes.

View File

@@ -1,39 +0,0 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/
#pragma once
#include <torch/extension.h>
#include <tuple>
#include <vector>
#include "utils/pytorch3d_cutils.h"
// Run Marching Cubes algorithm over a batch of volume scalar fields
// with a pre-defined threshold and return a mesh composed of vertices
// and faces for the mesh.
//
// Args:
// vol: FloatTensor of shape (D, H, W) giving a volume
// scalar grids.
// isolevel: isosurface value to use as the threshoold to determine whether
// the points are within a volume.
//
// Returns:
// vertices: List of N FloatTensors of vertices
// faces: List of N LongTensors of faces
// CPU implementation
std::tuple<at::Tensor, at::Tensor> MarchingCubesCpu(
const at::Tensor& vol,
const float isolevel);
// Implementation which is exposed
inline std::tuple<at::Tensor, at::Tensor> MarchingCubes(
const at::Tensor& vol,
const float isolevel) {
return MarchingCubesCpu(vol.contiguous(), isolevel);
}

View File

@@ -1,115 +0,0 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/
#include <torch/extension.h>
#include <algorithm>
#include <array>
#include <cstring>
#include <unordered_map>
#include <vector>
#include "marching_cubes/marching_cubes_utils.h"
// Cpu implementation for Marching Cubes
// Args:
// vol: a Tensor of size (D, H, W) corresponding to a 3D scalar field
// isolevel: the isosurface value to use as the threshold to determine
// whether points are within a volume.
//
// Returns:
// vertices: a float tensor of shape (N, 3) for positions of the mesh
// faces: a long tensor of shape (N, 3) for indices of the face vertices
//
std::tuple<at::Tensor, at::Tensor> MarchingCubesCpu(
const at::Tensor& vol,
const float isolevel) {
// volume shapes
const int D = vol.size(0);
const int H = vol.size(1);
const int W = vol.size(2);
// Create tensor accessors
auto vol_a = vol.accessor<float, 3>();
// vpair_to_edge maps a pair of vertex ids to its corresponding edge id
std::unordered_map<std::pair<int, int>, int64_t> vpair_to_edge;
// edge_id_to_v maps from an edge id to a vertex position
std::unordered_map<int64_t, Vertex> edge_id_to_v;
// uniq_edge_id: used to remove redundant edge ids
std::unordered_map<int64_t, int64_t> uniq_edge_id;
std::vector<int64_t> faces; // store face indices
std::vector<Vertex> verts; // store vertex positions
// enumerate each cell in the 3d grid
for (int z = 0; z < D - 1; z++) {
for (int y = 0; y < H - 1; y++) {
for (int x = 0; x < W - 1; x++) {
Cube cube(x, y, z, vol_a, isolevel);
// Cube is entirely in/out of the surface
if (_FACE_TABLE[cube.cubeindex][0] == -1) {
continue;
}
// store all boundary vertices that intersect with the edges
std::array<Vertex, 12> interp_points;
// triangle vertex IDs and positions
std::vector<int64_t> tri;
std::vector<Vertex> ps;
// Interpolate the vertices where the surface intersects with the cube
for (int j = 0; _FACE_TABLE[cube.cubeindex][j] != -1; j++) {
const int e = _FACE_TABLE[cube.cubeindex][j];
interp_points[e] = cube.VertexInterp(isolevel, e, vol_a);
auto vpair = cube.GetVPairFromEdge(e, W, H);
if (!vpair_to_edge.count(vpair)) {
vpair_to_edge[vpair] = vpair_to_edge.size();
}
int64_t edge = vpair_to_edge[vpair];
tri.push_back(edge);
ps.push_back(interp_points[e]);
// Check if the triangle face is degenerate. A triangle face
// is degenerate if any of the two verices share the same 3D position
if ((j + 1) % 3 == 0 && ps[0] != ps[1] && ps[1] != ps[2] &&
ps[2] != ps[0]) {
for (int k = 0; k < 3; k++) {
int v = tri[k];
edge_id_to_v[tri.at(k)] = ps.at(k);
if (!uniq_edge_id.count(v)) {
uniq_edge_id[v] = verts.size();
verts.push_back(edge_id_to_v[v]);
}
faces.push_back(uniq_edge_id[v]);
}
tri.clear();
ps.clear();
} // endif
} // endfor edge enumeration
} // endfor x
} // endfor y
} // endfor z
// Collect returning tensor
const int n_vertices = verts.size();
const int64_t n_faces = (int64_t)faces.size() / 3;
auto vert_tensor = torch::zeros({n_vertices, 3}, torch::kFloat);
auto face_tensor = torch::zeros({n_faces, 3}, torch::kInt64);
auto vert_a = vert_tensor.accessor<float, 2>();
for (int i = 0; i < n_vertices; i++) {
vert_a[i][0] = verts.at(i).x;
vert_a[i][1] = verts.at(i).y;
vert_a[i][2] = verts.at(i).z;
}
auto face_a = face_tensor.accessor<int64_t, 2>();
for (int64_t i = 0; i < n_faces; i++) {
face_a[i][0] = faces.at(i * 3 + 0);
face_a[i][1] = faces.at(i * 3 + 1);
face_a[i][2] = faces.at(i * 3 + 2);
}
return std::make_tuple(vert_tensor, face_tensor);
}

View File

@@ -1,446 +0,0 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/
#pragma once
#include <torch/extension.h>
#include <cmath>
#include <cstdint>
#include <vector>
#include "ATen/core/TensorAccessor.h"
// EPS: Used to assess whether two float values are close
const float EPS = 1e-5;
// A table mapping from cubeindex to a list of face configurations.
// Each list contains at most 5 faces, where each face is represented with
// 3 consecutive numbers
// Table taken from http://paulbourke.net/geometry/polygonise/
const int _FACE_TABLE[256][16] = {
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{0, 8, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{0, 1, 9, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{1, 8, 3, 9, 8, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{1, 2, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{0, 8, 3, 1, 2, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{9, 2, 10, 0, 2, 9, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{2, 8, 3, 2, 10, 8, 10, 9, 8, -1, -1, -1, -1, -1, -1, -1},
{3, 11, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{0, 11, 2, 8, 11, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{1, 9, 0, 2, 3, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{1, 11, 2, 1, 9, 11, 9, 8, 11, -1, -1, -1, -1, -1, -1, -1},
{3, 10, 1, 11, 10, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{0, 10, 1, 0, 8, 10, 8, 11, 10, -1, -1, -1, -1, -1, -1, -1},
{3, 9, 0, 3, 11, 9, 11, 10, 9, -1, -1, -1, -1, -1, -1, -1},
{9, 8, 10, 10, 8, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{4, 7, 8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{4, 3, 0, 7, 3, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{0, 1, 9, 8, 4, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{4, 1, 9, 4, 7, 1, 7, 3, 1, -1, -1, -1, -1, -1, -1, -1},
{1, 2, 10, 8, 4, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{3, 4, 7, 3, 0, 4, 1, 2, 10, -1, -1, -1, -1, -1, -1, -1},
{9, 2, 10, 9, 0, 2, 8, 4, 7, -1, -1, -1, -1, -1, -1, -1},
{2, 10, 9, 2, 9, 7, 2, 7, 3, 7, 9, 4, -1, -1, -1, -1},
{8, 4, 7, 3, 11, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{11, 4, 7, 11, 2, 4, 2, 0, 4, -1, -1, -1, -1, -1, -1, -1},
{9, 0, 1, 8, 4, 7, 2, 3, 11, -1, -1, -1, -1, -1, -1, -1},
{4, 7, 11, 9, 4, 11, 9, 11, 2, 9, 2, 1, -1, -1, -1, -1},
{3, 10, 1, 3, 11, 10, 7, 8, 4, -1, -1, -1, -1, -1, -1, -1},
{1, 11, 10, 1, 4, 11, 1, 0, 4, 7, 11, 4, -1, -1, -1, -1},
{4, 7, 8, 9, 0, 11, 9, 11, 10, 11, 0, 3, -1, -1, -1, -1},
{4, 7, 11, 4, 11, 9, 9, 11, 10, -1, -1, -1, -1, -1, -1, -1},
{9, 5, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{9, 5, 4, 0, 8, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{0, 5, 4, 1, 5, 0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{8, 5, 4, 8, 3, 5, 3, 1, 5, -1, -1, -1, -1, -1, -1, -1},
{1, 2, 10, 9, 5, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{3, 0, 8, 1, 2, 10, 4, 9, 5, -1, -1, -1, -1, -1, -1, -1},
{5, 2, 10, 5, 4, 2, 4, 0, 2, -1, -1, -1, -1, -1, -1, -1},
{2, 10, 5, 3, 2, 5, 3, 5, 4, 3, 4, 8, -1, -1, -1, -1},
{9, 5, 4, 2, 3, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{0, 11, 2, 0, 8, 11, 4, 9, 5, -1, -1, -1, -1, -1, -1, -1},
{0, 5, 4, 0, 1, 5, 2, 3, 11, -1, -1, -1, -1, -1, -1, -1},
{2, 1, 5, 2, 5, 8, 2, 8, 11, 4, 8, 5, -1, -1, -1, -1},
{10, 3, 11, 10, 1, 3, 9, 5, 4, -1, -1, -1, -1, -1, -1, -1},
{4, 9, 5, 0, 8, 1, 8, 10, 1, 8, 11, 10, -1, -1, -1, -1},
{5, 4, 0, 5, 0, 11, 5, 11, 10, 11, 0, 3, -1, -1, -1, -1},
{5, 4, 8, 5, 8, 10, 10, 8, 11, -1, -1, -1, -1, -1, -1, -1},
{9, 7, 8, 5, 7, 9, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{9, 3, 0, 9, 5, 3, 5, 7, 3, -1, -1, -1, -1, -1, -1, -1},
{0, 7, 8, 0, 1, 7, 1, 5, 7, -1, -1, -1, -1, -1, -1, -1},
{1, 5, 3, 3, 5, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{9, 7, 8, 9, 5, 7, 10, 1, 2, -1, -1, -1, -1, -1, -1, -1},
{10, 1, 2, 9, 5, 0, 5, 3, 0, 5, 7, 3, -1, -1, -1, -1},
{8, 0, 2, 8, 2, 5, 8, 5, 7, 10, 5, 2, -1, -1, -1, -1},
{2, 10, 5, 2, 5, 3, 3, 5, 7, -1, -1, -1, -1, -1, -1, -1},
{7, 9, 5, 7, 8, 9, 3, 11, 2, -1, -1, -1, -1, -1, -1, -1},
{9, 5, 7, 9, 7, 2, 9, 2, 0, 2, 7, 11, -1, -1, -1, -1},
{2, 3, 11, 0, 1, 8, 1, 7, 8, 1, 5, 7, -1, -1, -1, -1},
{11, 2, 1, 11, 1, 7, 7, 1, 5, -1, -1, -1, -1, -1, -1, -1},
{9, 5, 8, 8, 5, 7, 10, 1, 3, 10, 3, 11, -1, -1, -1, -1},
{5, 7, 0, 5, 0, 9, 7, 11, 0, 1, 0, 10, 11, 10, 0, -1},
{11, 10, 0, 11, 0, 3, 10, 5, 0, 8, 0, 7, 5, 7, 0, -1},
{11, 10, 5, 7, 11, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{10, 6, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{0, 8, 3, 5, 10, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{9, 0, 1, 5, 10, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{1, 8, 3, 1, 9, 8, 5, 10, 6, -1, -1, -1, -1, -1, -1, -1},
{1, 6, 5, 2, 6, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{1, 6, 5, 1, 2, 6, 3, 0, 8, -1, -1, -1, -1, -1, -1, -1},
{9, 6, 5, 9, 0, 6, 0, 2, 6, -1, -1, -1, -1, -1, -1, -1},
{5, 9, 8, 5, 8, 2, 5, 2, 6, 3, 2, 8, -1, -1, -1, -1},
{2, 3, 11, 10, 6, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{11, 0, 8, 11, 2, 0, 10, 6, 5, -1, -1, -1, -1, -1, -1, -1},
{0, 1, 9, 2, 3, 11, 5, 10, 6, -1, -1, -1, -1, -1, -1, -1},
{5, 10, 6, 1, 9, 2, 9, 11, 2, 9, 8, 11, -1, -1, -1, -1},
{6, 3, 11, 6, 5, 3, 5, 1, 3, -1, -1, -1, -1, -1, -1, -1},
{0, 8, 11, 0, 11, 5, 0, 5, 1, 5, 11, 6, -1, -1, -1, -1},
{3, 11, 6, 0, 3, 6, 0, 6, 5, 0, 5, 9, -1, -1, -1, -1},
{6, 5, 9, 6, 9, 11, 11, 9, 8, -1, -1, -1, -1, -1, -1, -1},
{5, 10, 6, 4, 7, 8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{4, 3, 0, 4, 7, 3, 6, 5, 10, -1, -1, -1, -1, -1, -1, -1},
{1, 9, 0, 5, 10, 6, 8, 4, 7, -1, -1, -1, -1, -1, -1, -1},
{10, 6, 5, 1, 9, 7, 1, 7, 3, 7, 9, 4, -1, -1, -1, -1},
{6, 1, 2, 6, 5, 1, 4, 7, 8, -1, -1, -1, -1, -1, -1, -1},
{1, 2, 5, 5, 2, 6, 3, 0, 4, 3, 4, 7, -1, -1, -1, -1},
{8, 4, 7, 9, 0, 5, 0, 6, 5, 0, 2, 6, -1, -1, -1, -1},
{7, 3, 9, 7, 9, 4, 3, 2, 9, 5, 9, 6, 2, 6, 9, -1},
{3, 11, 2, 7, 8, 4, 10, 6, 5, -1, -1, -1, -1, -1, -1, -1},
{5, 10, 6, 4, 7, 2, 4, 2, 0, 2, 7, 11, -1, -1, -1, -1},
{0, 1, 9, 4, 7, 8, 2, 3, 11, 5, 10, 6, -1, -1, -1, -1},
{9, 2, 1, 9, 11, 2, 9, 4, 11, 7, 11, 4, 5, 10, 6, -1},
{8, 4, 7, 3, 11, 5, 3, 5, 1, 5, 11, 6, -1, -1, -1, -1},
{5, 1, 11, 5, 11, 6, 1, 0, 11, 7, 11, 4, 0, 4, 11, -1},
{0, 5, 9, 0, 6, 5, 0, 3, 6, 11, 6, 3, 8, 4, 7, -1},
{6, 5, 9, 6, 9, 11, 4, 7, 9, 7, 11, 9, -1, -1, -1, -1},
{10, 4, 9, 6, 4, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{4, 10, 6, 4, 9, 10, 0, 8, 3, -1, -1, -1, -1, -1, -1, -1},
{10, 0, 1, 10, 6, 0, 6, 4, 0, -1, -1, -1, -1, -1, -1, -1},
{8, 3, 1, 8, 1, 6, 8, 6, 4, 6, 1, 10, -1, -1, -1, -1},
{1, 4, 9, 1, 2, 4, 2, 6, 4, -1, -1, -1, -1, -1, -1, -1},
{3, 0, 8, 1, 2, 9, 2, 4, 9, 2, 6, 4, -1, -1, -1, -1},
{0, 2, 4, 4, 2, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{8, 3, 2, 8, 2, 4, 4, 2, 6, -1, -1, -1, -1, -1, -1, -1},
{10, 4, 9, 10, 6, 4, 11, 2, 3, -1, -1, -1, -1, -1, -1, -1},
{0, 8, 2, 2, 8, 11, 4, 9, 10, 4, 10, 6, -1, -1, -1, -1},
{3, 11, 2, 0, 1, 6, 0, 6, 4, 6, 1, 10, -1, -1, -1, -1},
{6, 4, 1, 6, 1, 10, 4, 8, 1, 2, 1, 11, 8, 11, 1, -1},
{9, 6, 4, 9, 3, 6, 9, 1, 3, 11, 6, 3, -1, -1, -1, -1},
{8, 11, 1, 8, 1, 0, 11, 6, 1, 9, 1, 4, 6, 4, 1, -1},
{3, 11, 6, 3, 6, 0, 0, 6, 4, -1, -1, -1, -1, -1, -1, -1},
{6, 4, 8, 11, 6, 8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{7, 10, 6, 7, 8, 10, 8, 9, 10, -1, -1, -1, -1, -1, -1, -1},
{0, 7, 3, 0, 10, 7, 0, 9, 10, 6, 7, 10, -1, -1, -1, -1},
{10, 6, 7, 1, 10, 7, 1, 7, 8, 1, 8, 0, -1, -1, -1, -1},
{10, 6, 7, 10, 7, 1, 1, 7, 3, -1, -1, -1, -1, -1, -1, -1},
{1, 2, 6, 1, 6, 8, 1, 8, 9, 8, 6, 7, -1, -1, -1, -1},
{2, 6, 9, 2, 9, 1, 6, 7, 9, 0, 9, 3, 7, 3, 9, -1},
{7, 8, 0, 7, 0, 6, 6, 0, 2, -1, -1, -1, -1, -1, -1, -1},
{7, 3, 2, 6, 7, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{2, 3, 11, 10, 6, 8, 10, 8, 9, 8, 6, 7, -1, -1, -1, -1},
{2, 0, 7, 2, 7, 11, 0, 9, 7, 6, 7, 10, 9, 10, 7, -1},
{1, 8, 0, 1, 7, 8, 1, 10, 7, 6, 7, 10, 2, 3, 11, -1},
{11, 2, 1, 11, 1, 7, 10, 6, 1, 6, 7, 1, -1, -1, -1, -1},
{8, 9, 6, 8, 6, 7, 9, 1, 6, 11, 6, 3, 1, 3, 6, -1},
{0, 9, 1, 11, 6, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{7, 8, 0, 7, 0, 6, 3, 11, 0, 11, 6, 0, -1, -1, -1, -1},
{7, 11, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{7, 6, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{3, 0, 8, 11, 7, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{0, 1, 9, 11, 7, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{8, 1, 9, 8, 3, 1, 11, 7, 6, -1, -1, -1, -1, -1, -1, -1},
{10, 1, 2, 6, 11, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{1, 2, 10, 3, 0, 8, 6, 11, 7, -1, -1, -1, -1, -1, -1, -1},
{2, 9, 0, 2, 10, 9, 6, 11, 7, -1, -1, -1, -1, -1, -1, -1},
{6, 11, 7, 2, 10, 3, 10, 8, 3, 10, 9, 8, -1, -1, -1, -1},
{7, 2, 3, 6, 2, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{7, 0, 8, 7, 6, 0, 6, 2, 0, -1, -1, -1, -1, -1, -1, -1},
{2, 7, 6, 2, 3, 7, 0, 1, 9, -1, -1, -1, -1, -1, -1, -1},
{1, 6, 2, 1, 8, 6, 1, 9, 8, 8, 7, 6, -1, -1, -1, -1},
{10, 7, 6, 10, 1, 7, 1, 3, 7, -1, -1, -1, -1, -1, -1, -1},
{10, 7, 6, 1, 7, 10, 1, 8, 7, 1, 0, 8, -1, -1, -1, -1},
{0, 3, 7, 0, 7, 10, 0, 10, 9, 6, 10, 7, -1, -1, -1, -1},
{7, 6, 10, 7, 10, 8, 8, 10, 9, -1, -1, -1, -1, -1, -1, -1},
{6, 8, 4, 11, 8, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{3, 6, 11, 3, 0, 6, 0, 4, 6, -1, -1, -1, -1, -1, -1, -1},
{8, 6, 11, 8, 4, 6, 9, 0, 1, -1, -1, -1, -1, -1, -1, -1},
{9, 4, 6, 9, 6, 3, 9, 3, 1, 11, 3, 6, -1, -1, -1, -1},
{6, 8, 4, 6, 11, 8, 2, 10, 1, -1, -1, -1, -1, -1, -1, -1},
{1, 2, 10, 3, 0, 11, 0, 6, 11, 0, 4, 6, -1, -1, -1, -1},
{4, 11, 8, 4, 6, 11, 0, 2, 9, 2, 10, 9, -1, -1, -1, -1},
{10, 9, 3, 10, 3, 2, 9, 4, 3, 11, 3, 6, 4, 6, 3, -1},
{8, 2, 3, 8, 4, 2, 4, 6, 2, -1, -1, -1, -1, -1, -1, -1},
{0, 4, 2, 4, 6, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{1, 9, 0, 2, 3, 4, 2, 4, 6, 4, 3, 8, -1, -1, -1, -1},
{1, 9, 4, 1, 4, 2, 2, 4, 6, -1, -1, -1, -1, -1, -1, -1},
{8, 1, 3, 8, 6, 1, 8, 4, 6, 6, 10, 1, -1, -1, -1, -1},
{10, 1, 0, 10, 0, 6, 6, 0, 4, -1, -1, -1, -1, -1, -1, -1},
{4, 6, 3, 4, 3, 8, 6, 10, 3, 0, 3, 9, 10, 9, 3, -1},
{10, 9, 4, 6, 10, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{4, 9, 5, 7, 6, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{0, 8, 3, 4, 9, 5, 11, 7, 6, -1, -1, -1, -1, -1, -1, -1},
{5, 0, 1, 5, 4, 0, 7, 6, 11, -1, -1, -1, -1, -1, -1, -1},
{11, 7, 6, 8, 3, 4, 3, 5, 4, 3, 1, 5, -1, -1, -1, -1},
{9, 5, 4, 10, 1, 2, 7, 6, 11, -1, -1, -1, -1, -1, -1, -1},
{6, 11, 7, 1, 2, 10, 0, 8, 3, 4, 9, 5, -1, -1, -1, -1},
{7, 6, 11, 5, 4, 10, 4, 2, 10, 4, 0, 2, -1, -1, -1, -1},
{3, 4, 8, 3, 5, 4, 3, 2, 5, 10, 5, 2, 11, 7, 6, -1},
{7, 2, 3, 7, 6, 2, 5, 4, 9, -1, -1, -1, -1, -1, -1, -1},
{9, 5, 4, 0, 8, 6, 0, 6, 2, 6, 8, 7, -1, -1, -1, -1},
{3, 6, 2, 3, 7, 6, 1, 5, 0, 5, 4, 0, -1, -1, -1, -1},
{6, 2, 8, 6, 8, 7, 2, 1, 8, 4, 8, 5, 1, 5, 8, -1},
{9, 5, 4, 10, 1, 6, 1, 7, 6, 1, 3, 7, -1, -1, -1, -1},
{1, 6, 10, 1, 7, 6, 1, 0, 7, 8, 7, 0, 9, 5, 4, -1},
{4, 0, 10, 4, 10, 5, 0, 3, 10, 6, 10, 7, 3, 7, 10, -1},
{7, 6, 10, 7, 10, 8, 5, 4, 10, 4, 8, 10, -1, -1, -1, -1},
{6, 9, 5, 6, 11, 9, 11, 8, 9, -1, -1, -1, -1, -1, -1, -1},
{3, 6, 11, 0, 6, 3, 0, 5, 6, 0, 9, 5, -1, -1, -1, -1},
{0, 11, 8, 0, 5, 11, 0, 1, 5, 5, 6, 11, -1, -1, -1, -1},
{6, 11, 3, 6, 3, 5, 5, 3, 1, -1, -1, -1, -1, -1, -1, -1},
{1, 2, 10, 9, 5, 11, 9, 11, 8, 11, 5, 6, -1, -1, -1, -1},
{0, 11, 3, 0, 6, 11, 0, 9, 6, 5, 6, 9, 1, 2, 10, -1},
{11, 8, 5, 11, 5, 6, 8, 0, 5, 10, 5, 2, 0, 2, 5, -1},
{6, 11, 3, 6, 3, 5, 2, 10, 3, 10, 5, 3, -1, -1, -1, -1},
{5, 8, 9, 5, 2, 8, 5, 6, 2, 3, 8, 2, -1, -1, -1, -1},
{9, 5, 6, 9, 6, 0, 0, 6, 2, -1, -1, -1, -1, -1, -1, -1},
{1, 5, 8, 1, 8, 0, 5, 6, 8, 3, 8, 2, 6, 2, 8, -1},
{1, 5, 6, 2, 1, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{1, 3, 6, 1, 6, 10, 3, 8, 6, 5, 6, 9, 8, 9, 6, -1},
{10, 1, 0, 10, 0, 6, 9, 5, 0, 5, 6, 0, -1, -1, -1, -1},
{0, 3, 8, 5, 6, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{10, 5, 6, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{11, 5, 10, 7, 5, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{11, 5, 10, 11, 7, 5, 8, 3, 0, -1, -1, -1, -1, -1, -1, -1},
{5, 11, 7, 5, 10, 11, 1, 9, 0, -1, -1, -1, -1, -1, -1, -1},
{10, 7, 5, 10, 11, 7, 9, 8, 1, 8, 3, 1, -1, -1, -1, -1},
{11, 1, 2, 11, 7, 1, 7, 5, 1, -1, -1, -1, -1, -1, -1, -1},
{0, 8, 3, 1, 2, 7, 1, 7, 5, 7, 2, 11, -1, -1, -1, -1},
{9, 7, 5, 9, 2, 7, 9, 0, 2, 2, 11, 7, -1, -1, -1, -1},
{7, 5, 2, 7, 2, 11, 5, 9, 2, 3, 2, 8, 9, 8, 2, -1},
{2, 5, 10, 2, 3, 5, 3, 7, 5, -1, -1, -1, -1, -1, -1, -1},
{8, 2, 0, 8, 5, 2, 8, 7, 5, 10, 2, 5, -1, -1, -1, -1},
{9, 0, 1, 5, 10, 3, 5, 3, 7, 3, 10, 2, -1, -1, -1, -1},
{9, 8, 2, 9, 2, 1, 8, 7, 2, 10, 2, 5, 7, 5, 2, -1},
{1, 3, 5, 3, 7, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{0, 8, 7, 0, 7, 1, 1, 7, 5, -1, -1, -1, -1, -1, -1, -1},
{9, 0, 3, 9, 3, 5, 5, 3, 7, -1, -1, -1, -1, -1, -1, -1},
{9, 8, 7, 5, 9, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{5, 8, 4, 5, 10, 8, 10, 11, 8, -1, -1, -1, -1, -1, -1, -1},
{5, 0, 4, 5, 11, 0, 5, 10, 11, 11, 3, 0, -1, -1, -1, -1},
{0, 1, 9, 8, 4, 10, 8, 10, 11, 10, 4, 5, -1, -1, -1, -1},
{10, 11, 4, 10, 4, 5, 11, 3, 4, 9, 4, 1, 3, 1, 4, -1},
{2, 5, 1, 2, 8, 5, 2, 11, 8, 4, 5, 8, -1, -1, -1, -1},
{0, 4, 11, 0, 11, 3, 4, 5, 11, 2, 11, 1, 5, 1, 11, -1},
{0, 2, 5, 0, 5, 9, 2, 11, 5, 4, 5, 8, 11, 8, 5, -1},
{9, 4, 5, 2, 11, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{2, 5, 10, 3, 5, 2, 3, 4, 5, 3, 8, 4, -1, -1, -1, -1},
{5, 10, 2, 5, 2, 4, 4, 2, 0, -1, -1, -1, -1, -1, -1, -1},
{3, 10, 2, 3, 5, 10, 3, 8, 5, 4, 5, 8, 0, 1, 9, -1},
{5, 10, 2, 5, 2, 4, 1, 9, 2, 9, 4, 2, -1, -1, -1, -1},
{8, 4, 5, 8, 5, 3, 3, 5, 1, -1, -1, -1, -1, -1, -1, -1},
{0, 4, 5, 1, 0, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{8, 4, 5, 8, 5, 3, 9, 0, 5, 0, 3, 5, -1, -1, -1, -1},
{9, 4, 5, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{4, 11, 7, 4, 9, 11, 9, 10, 11, -1, -1, -1, -1, -1, -1, -1},
{0, 8, 3, 4, 9, 7, 9, 11, 7, 9, 10, 11, -1, -1, -1, -1},
{1, 10, 11, 1, 11, 4, 1, 4, 0, 7, 4, 11, -1, -1, -1, -1},
{3, 1, 4, 3, 4, 8, 1, 10, 4, 7, 4, 11, 10, 11, 4, -1},
{4, 11, 7, 9, 11, 4, 9, 2, 11, 9, 1, 2, -1, -1, -1, -1},
{9, 7, 4, 9, 11, 7, 9, 1, 11, 2, 11, 1, 0, 8, 3, -1},
{11, 7, 4, 11, 4, 2, 2, 4, 0, -1, -1, -1, -1, -1, -1, -1},
{11, 7, 4, 11, 4, 2, 8, 3, 4, 3, 2, 4, -1, -1, -1, -1},
{2, 9, 10, 2, 7, 9, 2, 3, 7, 7, 4, 9, -1, -1, -1, -1},
{9, 10, 7, 9, 7, 4, 10, 2, 7, 8, 7, 0, 2, 0, 7, -1},
{3, 7, 10, 3, 10, 2, 7, 4, 10, 1, 10, 0, 4, 0, 10, -1},
{1, 10, 2, 8, 7, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{4, 9, 1, 4, 1, 7, 7, 1, 3, -1, -1, -1, -1, -1, -1, -1},
{4, 9, 1, 4, 1, 7, 0, 8, 1, 8, 7, 1, -1, -1, -1, -1},
{4, 0, 3, 7, 4, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{4, 8, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{9, 10, 8, 10, 11, 8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{3, 0, 9, 3, 9, 11, 11, 9, 10, -1, -1, -1, -1, -1, -1, -1},
{0, 1, 10, 0, 10, 8, 8, 10, 11, -1, -1, -1, -1, -1, -1, -1},
{3, 1, 10, 11, 3, 10, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{1, 2, 11, 1, 11, 9, 9, 11, 8, -1, -1, -1, -1, -1, -1, -1},
{3, 0, 9, 3, 9, 11, 1, 2, 9, 2, 11, 9, -1, -1, -1, -1},
{0, 2, 11, 8, 0, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{3, 2, 11, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{2, 3, 8, 2, 8, 10, 10, 8, 9, -1, -1, -1, -1, -1, -1, -1},
{9, 10, 2, 0, 9, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{2, 3, 8, 2, 8, 10, 0, 1, 8, 1, 10, 8, -1, -1, -1, -1},
{1, 10, 2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{1, 3, 8, 9, 1, 8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{0, 9, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{0, 3, 8, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1}};
// Table mapping each edge to the corresponding cube vertices
const int _EDGE_TO_VERTICES[12][2] = {
{0, 1},
{1, 5},
{4, 5},
{0, 4},
{2, 3},
{3, 7},
{6, 7},
{2, 6},
{0, 2},
{1, 3},
{5, 7},
{4, 6},
};
// Table mapping from 0-7 to v0-v7 in cube.vertices
const int _INDEX_TABLE[8] = {0, 1, 5, 4, 2, 3, 7, 6};
// Data structures for the marching cubes
struct Vertex {
// Constructor used when performing marching cube in each cell
explicit Vertex(float x = 0.0f, float y = 0.0f, float z = 0.0f)
: x(x), y(y), z(z) {}
// The */+ operator overrides are used for vertex interpolation
Vertex operator*(float s) const {
return Vertex(x * s, y * s, z * s);
}
Vertex operator+(const Vertex& xyz) const {
return Vertex(x + xyz.x, y + xyz.y, z + xyz.z);
}
// The =/!= operator overrides is used for checking degenerate triangles
bool operator==(const Vertex& xyz) const {
return (
std::abs(x - xyz.x) < EPS && std::abs(y - xyz.y) < EPS &&
std::abs(z - xyz.z) < EPS);
}
bool operator!=(const Vertex& xyz) const {
return (
std::abs(x - xyz.x) >= EPS || std::abs(y - xyz.y) >= EPS ||
std::abs(z - xyz.z) >= EPS);
}
// vertex position
float x, y, z;
};
struct Cube {
// Edge and vertex convention:
// v4_______e4____________v5
// /| /|
// / | / |
// e7/ | e5/ |
// /___|______e6_________/ |
// v7| | |v6 |e9
// | | | |
// | |e8 |e10|
// e11| | | |
// | |_________________|___|
// | / v0 e0 | /v1
// | / | /
// | /e3 | /e1
// |/_____________________|/
// v3 e2 v2
Vertex p[8];
int x, y, z;
int cubeindex = 0;
Cube(
int x,
int y,
int z,
const at::TensorAccessor<float, 3>& vol_a,
const float isolevel)
: x(x), y(y), z(z) {
// vertex position (x, y, z) for v0-v1-v4-v5-v3-v2-v7-v6
for (int v = 0; v < 8; v++) {
p[v] = Vertex(x + (v & 1), y + (v >> 1 & 1), z + (v >> 2 & 1));
}
// Calculates cube configuration index given values of the cube vertices
for (int i = 0; i < 8; i++) {
const int idx = _INDEX_TABLE[i];
Vertex v = p[idx];
if (vol_a[v.z][v.y][v.x] < isolevel) {
cubeindex |= (1 << i);
}
}
}
// Linearly interpolate the position where an isosurface cuts an edge
// between two vertices, based on their scalar values
//
// Args:
// isolevel: float value used as threshold
// edge: edge (ID) to interpolate
// cube: current cube vertices
// vol_a: 3D scalar field
//
// Returns:
// point: interpolated vertex
Vertex VertexInterp(
float isolevel,
const int edge,
const at::TensorAccessor<float, 3>& vol_a) {
const int v1 = _EDGE_TO_VERTICES[edge][0];
const int v2 = _EDGE_TO_VERTICES[edge][1];
Vertex p1 = p[v1];
Vertex p2 = p[v2];
float val1 = vol_a[p1.z][p1.y][p1.x];
float val2 = vol_a[p2.z][p2.y][p2.x];
float ratio = 1.0f;
if (std::abs(isolevel - val1) < EPS) {
return p1;
} else if (std::abs(isolevel - val2) < EPS) {
return p2;
} else if (std::abs(val1 - val2) < EPS) {
return p1;
}
// interpolate vertex p based on two vertices on the edge
ratio = (isolevel - val1) / (val2 - val1);
return p1 * (1 - ratio) + p2 * ratio;
}
// Get a tuple of global vertex ID from a local edge ID
// Global vertex ID is calculated as (x + dx) + (y + dy) * W + (z + dz) * W *
// H
// Args:
// edge: local edge ID in the cube
// W: width of x dimension
// H: height of y dimension
//
// Returns:
// a pair of global vertex ID
//
std::pair<int, int> GetVPairFromEdge(const int edge, int W, int H) {
const int v1 = _EDGE_TO_VERTICES[edge][0];
const int v2 = _EDGE_TO_VERTICES[edge][1];
const int v1_id = p[v1].x + p[v1].y * W + p[v1].z * W * H;
const int v2_id = p[v2].x + p[v2].y * W + p[v2].z * W * H;
return std::make_pair(v1_id, v2_id);
}
};
// helper functions for hashing
namespace std {
// Taken from boost library to combine several hash functions
template <class T>
inline void hash_combine(size_t& s, const T& v) {
std::hash<T> h;
s ^= h(v) + 0x9e3779b9 + (s << 6) + (s >> 2);
}
// Function for hashing a pair of vertices
template <>
struct hash<std::pair<int, int>> {
size_t operator()(const std::pair<int, int>& p) const {
size_t res = 0;
hash_combine(res, p.first);
hash_combine(res, p.second);
return res;
}
};
} // namespace std

View File

@@ -10,9 +10,7 @@
#include <algorithm>
#include <list>
#include <queue>
#include <thread>
#include <tuple>
#include "ATen/core/TensorAccessor.h"
#include "rasterize_points/rasterization_utils.h"
#include "utils/geometry_utils.h"
#include "utils/vec2.h"
@@ -119,28 +117,54 @@ struct IsNeighbor {
int neighbor_idx;
};
namespace {
void RasterizeMeshesNaiveCpu_worker(
const int start_yi,
const int end_yi,
std::tuple<torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor>
RasterizeMeshesNaiveCpu(
const torch::Tensor& face_verts,
const torch::Tensor& mesh_to_face_first_idx,
const torch::Tensor& num_faces_per_mesh,
const torch::Tensor& clipped_faces_neighbor_idx,
const std::tuple<int, int> image_size,
const float blur_radius,
const int faces_per_pixel,
const bool perspective_correct,
const bool clip_barycentric_coords,
const bool cull_backfaces,
const int32_t N,
const int H,
const int W,
const int K,
at::TensorAccessor<float, 3>& face_verts_a,
at::TensorAccessor<float, 1>& face_areas_a,
at::TensorAccessor<float, 2>& face_bboxes_a,
at::TensorAccessor<int64_t, 1>& neighbor_idx_a,
at::TensorAccessor<float, 4>& zbuf_a,
at::TensorAccessor<int64_t, 4>& face_idxs_a,
at::TensorAccessor<float, 4>& pix_dists_a,
at::TensorAccessor<float, 5>& barycentric_coords_a) {
const bool cull_backfaces) {
if (face_verts.ndimension() != 3 || face_verts.size(1) != 3 ||
face_verts.size(2) != 3) {
AT_ERROR("face_verts must have dimensions (num_faces, 3, 3)");
}
if (num_faces_per_mesh.size(0) != mesh_to_face_first_idx.size(0)) {
AT_ERROR(
"num_faces_per_mesh must have save size first dimension as mesh_to_face_first_idx");
}
const int32_t N = mesh_to_face_first_idx.size(0); // batch_size.
const int H = std::get<0>(image_size);
const int W = std::get<1>(image_size);
const int K = faces_per_pixel;
auto long_opts = num_faces_per_mesh.options().dtype(torch::kInt64);
auto float_opts = face_verts.options().dtype(torch::kFloat32);
// Initialize output tensors.
torch::Tensor face_idxs = torch::full({N, H, W, K}, -1, long_opts);
torch::Tensor zbuf = torch::full({N, H, W, K}, -1, float_opts);
torch::Tensor pix_dists = torch::full({N, H, W, K}, -1, float_opts);
torch::Tensor barycentric_coords =
torch::full({N, H, W, K, 3}, -1, float_opts);
auto face_verts_a = face_verts.accessor<float, 3>();
auto face_idxs_a = face_idxs.accessor<int64_t, 4>();
auto zbuf_a = zbuf.accessor<float, 4>();
auto pix_dists_a = pix_dists.accessor<float, 4>();
auto barycentric_coords_a = barycentric_coords.accessor<float, 5>();
auto neighbor_idx_a = clipped_faces_neighbor_idx.accessor<int64_t, 1>();
auto face_bboxes = ComputeFaceBoundingBoxes(face_verts);
auto face_bboxes_a = face_bboxes.accessor<float, 2>();
auto face_areas = ComputeFaceAreas(face_verts);
auto face_areas_a = face_areas.accessor<float, 1>();
for (int n = 0; n < N; ++n) {
// Loop through each mesh in the batch.
// Get the start index of the faces in faces_packed and the num faces
@@ -150,7 +174,7 @@ void RasterizeMeshesNaiveCpu_worker(
(face_start_idx + num_faces_per_mesh[n].item().to<int32_t>());
// Iterate through the horizontal lines of the image from top to bottom.
for (int yi = start_yi; yi < end_yi; ++yi) {
for (int yi = 0; yi < H; ++yi) {
// Reverse the order of yi so that +Y is pointing upwards in the image.
const int yidx = H - 1 - yi;
@@ -300,92 +324,6 @@ void RasterizeMeshesNaiveCpu_worker(
}
}
}
}
} // namespace
std::tuple<torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor>
RasterizeMeshesNaiveCpu(
const torch::Tensor& face_verts,
const torch::Tensor& mesh_to_face_first_idx,
const torch::Tensor& num_faces_per_mesh,
const torch::Tensor& clipped_faces_neighbor_idx,
const std::tuple<int, int> image_size,
const float blur_radius,
const int faces_per_pixel,
const bool perspective_correct,
const bool clip_barycentric_coords,
const bool cull_backfaces) {
if (face_verts.ndimension() != 3 || face_verts.size(1) != 3 ||
face_verts.size(2) != 3) {
AT_ERROR("face_verts must have dimensions (num_faces, 3, 3)");
}
if (num_faces_per_mesh.size(0) != mesh_to_face_first_idx.size(0)) {
AT_ERROR(
"num_faces_per_mesh must have save size first dimension as mesh_to_face_first_idx");
}
const int32_t N = mesh_to_face_first_idx.size(0); // batch_size.
const int H = std::get<0>(image_size);
const int W = std::get<1>(image_size);
const int K = faces_per_pixel;
auto long_opts = num_faces_per_mesh.options().dtype(torch::kInt64);
auto float_opts = face_verts.options().dtype(torch::kFloat32);
// Initialize output tensors.
torch::Tensor face_idxs = torch::full({N, H, W, K}, -1, long_opts);
torch::Tensor zbuf = torch::full({N, H, W, K}, -1, float_opts);
torch::Tensor pix_dists = torch::full({N, H, W, K}, -1, float_opts);
torch::Tensor barycentric_coords =
torch::full({N, H, W, K, 3}, -1, float_opts);
auto face_verts_a = face_verts.accessor<float, 3>();
auto face_idxs_a = face_idxs.accessor<int64_t, 4>();
auto zbuf_a = zbuf.accessor<float, 4>();
auto pix_dists_a = pix_dists.accessor<float, 4>();
auto barycentric_coords_a = barycentric_coords.accessor<float, 5>();
auto neighbor_idx_a = clipped_faces_neighbor_idx.accessor<int64_t, 1>();
auto face_bboxes = ComputeFaceBoundingBoxes(face_verts);
auto face_bboxes_a = face_bboxes.accessor<float, 2>();
auto face_areas = ComputeFaceAreas(face_verts);
auto face_areas_a = face_areas.accessor<float, 1>();
const int64_t n_threads = at::get_num_threads();
std::vector<std::thread> threads;
threads.reserve(n_threads);
const int chunk_size = 1 + (H - 1) / n_threads;
int start_yi = 0;
for (int iThread = 0; iThread < n_threads; ++iThread) {
const int64_t end_yi = std::min(start_yi + chunk_size, H);
threads.emplace_back(
RasterizeMeshesNaiveCpu_worker,
start_yi,
end_yi,
mesh_to_face_first_idx,
num_faces_per_mesh,
blur_radius,
perspective_correct,
clip_barycentric_coords,
cull_backfaces,
N,
H,
W,
K,
std::ref(face_verts_a),
std::ref(face_areas_a),
std::ref(face_bboxes_a),
std::ref(neighbor_idx_a),
std::ref(zbuf_a),
std::ref(face_idxs_a),
std::ref(pix_dists_a),
std::ref(barycentric_coords_a));
start_yi += chunk_size;
}
for (auto&& thread : threads) {
thread.join();
}
return std::make_tuple(face_idxs, zbuf, barycentric_coords, pix_dists);
}

View File

@@ -12,7 +12,6 @@
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <cub/cub.cuh>
#include "utils/warp_reduce.cuh"
template <unsigned int block_size>
@@ -26,19 +25,20 @@ __global__ void FarthestPointSamplingKernel(
const at::PackedTensorAccessor64<int64_t, 1, at::RestrictPtrTraits> start_idxs
// clang-format on
) {
typedef cub::BlockReduce<
cub::KeyValuePair<int64_t, float>,
block_size,
cub::BLOCK_REDUCE_RAKING_COMMUTATIVE_ONLY>
BlockReduce;
__shared__ typename BlockReduce::TempStorage temp_storage;
__shared__ int64_t selected_store;
// Get constants
const int64_t N = points.size(0);
const int64_t P = points.size(1);
const int64_t D = points.size(2);
// Create single shared memory buffer which is split and cast to different
// types: dists/dists_idx are used to save the maximum distances seen by the
// points processed by any one thread and the associated point indices.
// These values only need to be accessed by other threads in this block which
// are processing the same batch and not by other blocks.
extern __shared__ char shared_buf[];
float* dists = (float*)shared_buf; // block_size floats
int64_t* dists_idx = (int64_t*)&dists[block_size]; // block_size int64_t
// Get batch index and thread index
const int64_t batch_idx = blockIdx.x;
const size_t tid = threadIdx.x;
@@ -82,26 +82,43 @@ __global__ void FarthestPointSamplingKernel(
max_dist = (p_min_dist > max_dist) ? p_min_dist : max_dist;
}
// max_dist, max_dist_idx are now the max point and idx seen by this thread.
// Now find the index corresponding to the maximum distance seen by any
// thread. (This value is only on thread 0.)
selected =
BlockReduce(temp_storage)
.Reduce(
cub::KeyValuePair<int64_t, float>(max_dist_idx, max_dist),
cub::ArgMax(),
block_size)
.key;
// After going through all points for this thread, save the max
// point and idx seen by this thread. Each thread sees P/block_size points.
dists[tid] = max_dist;
dists_idx[tid] = max_dist_idx;
// Sync to ensure all threads in the block have updated their max point.
__syncthreads();
// Parallelized block reduction to find the max point seen by
// all the threads in this block for iteration k.
// Each block represents one batch element so we can use a divide/conquer
// approach to find the max, syncing all threads after each step.
for (int s = block_size / 2; s > 0; s >>= 1) {
if (tid < s) {
// Compare the best point seen by two threads and update the shared
// memory at the location of the first thread index with the max out
// of the two threads.
if (dists[tid] < dists[tid + s]) {
dists[tid] = dists[tid + s];
dists_idx[tid] = dists_idx[tid + s];
}
}
__syncthreads();
}
// TODO(nikhilar): As reduction proceeds, the number of “active” threads
// decreases. When tid < 32, there should only be one warp left which could
// be unrolled.
// The overall max after reducing will be saved
// at the location of tid = 0.
selected = dists_idx[0];
if (tid == 0) {
// Write the farthest point for iteration k to global memory
idxs[batch_idx][k] = selected;
selected_store = selected;
}
// Ensure `selected` in all threads equals the global maximum.
__syncthreads();
selected = selected_store;
}
}
@@ -168,8 +185,15 @@ at::Tensor FarthestPointSamplingCuda(
auto min_point_dist_a =
min_point_dist.packed_accessor64<float, 2, at::RestrictPtrTraits>();
// TempStorage for the reduction uses static shared memory only.
size_t shared_mem = 0;
// Initialize the shared memory which will be used to store the
// distance/index of the best point seen by each thread.
size_t shared_mem = threads * sizeof(float) + threads * sizeof(int64_t);
// TODO: using shared memory for min_point_dist gives an ~2x speed up
// compared to using a global (N, P) shaped tensor, however for
// larger pointclouds this may exceed the shared memory limit per block.
// If a speed up is required for smaller pointclouds, then the storage
// could be switched to shared memory if the required total shared memory is
// within the memory limit per block.
// Support a case for all powers of 2 up to MAX_THREADS_PER_BLOCK possible per
// block.

View File

@@ -61,91 +61,6 @@ class DataLoaderMapProviderBase(ReplaceableBase):
raise NotImplementedError()
@registry.register
class SimpleDataLoaderMapProvider(DataLoaderMapProviderBase):
"""
Trivial implementation of DataLoaderMapProviderBase.
If a dataset returns batches from get_eval_batches(), then
they will be what the corresponding dataloader returns,
independently of any of the fields on this class.
Otherwise, returns shuffled batches.
"""
batch_size: int = 1
num_workers: int = 0
dataset_length_train: int = 0
dataset_length_val: int = 0
dataset_length_test: int = 0
def get_data_loader_map(self, datasets: DatasetMap) -> DataLoaderMap:
"""
Returns a collection of data loaders for a given collection of datasets.
"""
return DataLoaderMap(
train=self._make_data_loader(
datasets.train,
self.dataset_length_train,
),
val=self._make_data_loader(
datasets.val,
self.dataset_length_val,
),
test=self._make_data_loader(
datasets.test,
self.dataset_length_test,
),
)
def _make_data_loader(
self,
dataset: Optional[DatasetBase],
num_batches: int,
) -> Optional[DataLoader[FrameData]]:
"""
Returns the dataloader for a dataset.
Args:
dataset: the dataset
num_batches: possible ceiling on number of batches per epoch
"""
if dataset is None:
return None
data_loader_kwargs = {
"num_workers": self.num_workers,
"collate_fn": dataset.frame_data_type.collate,
}
eval_batches = dataset.get_eval_batches()
if eval_batches is not None:
return DataLoader(
dataset,
batch_sampler=eval_batches,
**data_loader_kwargs,
)
if num_batches > 0:
num_samples = self.batch_size * num_batches
else:
num_samples = None
# sample with replacement only if a custom number of samples is specified
sampler = RandomSampler(
dataset,
replacement=num_samples is not None,
num_samples=num_samples,
)
batch_sampler = BatchSampler(sampler, self.batch_size, drop_last=True)
return DataLoader(
dataset,
batch_sampler=batch_sampler,
**data_loader_kwargs,
)
class DoublePoolBatchSampler(Sampler[List[int]]):
"""
Batch sampler for making random batches of a single frame
@@ -206,7 +121,7 @@ class DoublePoolBatchSampler(Sampler[List[int]]):
torch.randperm(len(self.first_indices), generator=self.generator)
for _ in range(n_copies)
]
i_first = torch.cat(raw_indices)[:num_batches]
i_first = torch.concat(raw_indices)[:num_batches]
else:
i_first = torch.randperm(len(self.first_indices), generator=self.generator)
first_indices = [self.first_indices[i] for i in i_first]

View File

@@ -15,11 +15,10 @@ from pytorch3d.renderer.cameras import CamerasBase
from .blender_dataset_map_provider import BlenderDatasetMapProvider # noqa
from .data_loader_map_provider import DataLoaderMap, DataLoaderMapProviderBase
from .dataset_map_provider import DatasetMap, DatasetMapProviderBase
from .dataset_map_provider import DatasetMap, DatasetMapProviderBase, Task
from .json_index_dataset_map_provider import JsonIndexDatasetMapProvider # noqa
from .json_index_dataset_map_provider_v2 import JsonIndexDatasetMapProviderV2 # noqa
from .llff_dataset_map_provider import LlffDatasetMapProvider # noqa
from .rendered_mesh_dataset_map_provider import RenderedMeshDatasetMapProvider # noqa
class DataSourceBase(ReplaceableBase):
@@ -68,6 +67,9 @@ class ImplicitronDataSource(DataSourceBase): # pyre-ignore[13]
dataloaders = self.data_loader_map_provider.get_data_loader_map(datasets)
return datasets, dataloaders
def get_task(self) -> Task:
return self.dataset_map_provider.get_task()
@property
def all_train_cameras(self) -> Optional[CamerasBase]:
if self._all_train_cameras_cache is None: # pyre-ignore[16]

View File

@@ -7,6 +7,7 @@
import logging
import os
from dataclasses import dataclass
from enum import Enum
from typing import Iterator, Optional
from iopath.common.file_io import PathManager
@@ -52,6 +53,11 @@ class DatasetMap:
yield self.test
class Task(Enum):
SINGLE_SEQUENCE = "singlesequence"
MULTI_SEQUENCE = "multisequence"
class DatasetMapProviderBase(ReplaceableBase):
"""
Base class for a provider of training / validation and testing
@@ -65,6 +71,9 @@ class DatasetMapProviderBase(ReplaceableBase):
"""
raise NotImplementedError()
def get_task(self) -> Task:
raise NotImplementedError()
def get_all_train_cameras(self) -> Optional[CamerasBase]:
"""
If the data is all for a single scene, returns a list

View File

@@ -24,7 +24,7 @@ from typing import (
Sequence,
Tuple,
Type,
TYPE_CHECKING,
TypedDict,
Union,
)
@@ -36,7 +36,6 @@ from pytorch3d.io import IO
from pytorch3d.renderer.camera_utils import join_cameras_as_batch
from pytorch3d.renderer.cameras import CamerasBase, PerspectiveCameras
from pytorch3d.structures.pointclouds import Pointclouds
from tqdm import tqdm
from . import types
from .dataset_base import DatasetBase, FrameData
@@ -46,15 +45,9 @@ from .utils import is_known_frame_scalar
logger = logging.getLogger(__name__)
if TYPE_CHECKING:
from typing import TypedDict
class FrameAnnotsEntry(TypedDict):
subset: Optional[str]
frame_annotation: types.FrameAnnotation
else:
FrameAnnotsEntry = dict
class FrameAnnotsEntry(TypedDict):
subset: Optional[str]
frame_annotation: types.FrameAnnotation
@registry.register
@@ -119,11 +112,6 @@ class JsonIndexDataset(DatasetBase, ReplaceableBase):
eval_batches: A list of batches that form the evaluation set;
list of batch-sized lists of indices corresponding to __getitem__
of this class, thus it can be used directly as a batch sampler.
eval_batch_index:
( Optional[List[List[Union[Tuple[str, int, str], Tuple[str, int]]]] )
A list of batches of frames described as (sequence_name, frame_idx)
that can form the evaluation set, `eval_batches` will be set from this.
"""
frame_annotations_type: ClassVar[
@@ -159,7 +147,6 @@ class JsonIndexDataset(DatasetBase, ReplaceableBase):
seed: int = 0
sort_frames: bool = False
eval_batches: Any = None
eval_batch_index: Any = None
# frame_annots: List[FrameAnnotsEntry] = field(init=False)
# seq_annots: Dict[str, types.SequenceAnnotation] = field(init=False)
@@ -172,22 +159,8 @@ class JsonIndexDataset(DatasetBase, ReplaceableBase):
self._sort_frames()
self._load_subset_lists()
self._filter_db() # also computes sequence indices
self._extract_and_set_eval_batches()
logger.info(str(self))
def _extract_and_set_eval_batches(self):
"""
Sets eval_batches based on input eval_batch_index.
"""
if self.eval_batch_index is not None:
if self.eval_batches is not None:
raise ValueError(
"Cannot define both eval_batch_index and eval_batches."
)
self.eval_batches = self.seq_frame_index_to_dataset_index(
self.eval_batch_index
)
def is_filtered(self):
"""
Returns `True` in case the dataset has been filtered and thus some frame annotations
@@ -339,10 +312,9 @@ class JsonIndexDataset(DatasetBase, ReplaceableBase):
"""
Returns the cameras corresponding to all the known frames.
"""
logger.info("Loading all train cameras.")
cameras = []
# pyre-ignore[16]
for frame_idx, frame_annot in enumerate(tqdm(self.frame_annots)):
for frame_idx, frame_annot in enumerate(self.frame_annots):
frame_type = self._get_frame_type(frame_annot)
if frame_type is None:
raise ValueError("subsets not loaded")
@@ -414,26 +386,15 @@ class JsonIndexDataset(DatasetBase, ReplaceableBase):
)
if self.load_point_clouds and point_cloud is not None:
pcl_path = self._fix_point_cloud_path(point_cloud.path)
frame_data.sequence_point_cloud_path = pcl_path = os.path.join(
self.dataset_root, point_cloud.path
)
frame_data.sequence_point_cloud = _load_pointcloud(
self._local_path(pcl_path), max_points=self.max_points
)
frame_data.sequence_point_cloud_path = pcl_path
return frame_data
def _fix_point_cloud_path(self, path: str) -> str:
"""
Fix up a point cloud path from the dataset.
Some files in Co3Dv2 have an accidental absolute path stored.
"""
unwanted_prefix = (
"/large_experiments/p3/replay/datasets/co3d/co3d45k_220512/export_v23/"
)
if path.startswith(unwanted_prefix):
path = path[len(unwanted_prefix) :]
return os.path.join(self.dataset_root, path)
def _load_crop_fg_probability(
self, entry: types.FrameAnnotation
) -> Tuple[

View File

@@ -9,7 +9,7 @@ import json
import os
from typing import Dict, List, Optional, Tuple, Type
from omegaconf import DictConfig
from omegaconf import DictConfig, open_dict
from pytorch3d.implicitron.tools.config import (
expand_args_fields,
registry,
@@ -17,7 +17,12 @@ from pytorch3d.implicitron.tools.config import (
)
from pytorch3d.renderer.cameras import CamerasBase
from .dataset_map_provider import DatasetMap, DatasetMapProviderBase, PathManagerFactory
from .dataset_map_provider import (
DatasetMap,
DatasetMapProviderBase,
PathManagerFactory,
Task,
)
from .json_index_dataset import JsonIndexDataset
from .utils import (
@@ -52,7 +57,6 @@ _CO3D_DATASET_ROOT: str = os.getenv("CO3D_DATASET_ROOT", "")
_NEED_CONTROL: Tuple[str, ...] = (
"dataset_root",
"eval_batches",
"eval_batch_index",
"n_frames_per_sequence",
"path_manager",
"pick_sequence",
@@ -113,8 +117,9 @@ class JsonIndexDatasetMapProvider(DatasetMapProviderBase): # pyre-ignore [13]
Called by get_default_args(JsonIndexDatasetMapProvider) to
not expose certain fields of each dataset class.
"""
for key in _NEED_CONTROL:
del args[key]
with open_dict(args):
for key in _NEED_CONTROL:
del args[key]
def create_dataset(self):
"""
@@ -154,7 +159,7 @@ class JsonIndexDatasetMapProvider(DatasetMapProviderBase): # pyre-ignore [13]
# This maps the common names of the dataset subsets ("train"/"val"/"test")
# to the names of the subsets in the CO3D dataset.
set_names_mapping = _get_co3d_set_names_mapping(
self.task_str,
self.get_task(),
self.test_on_train,
self.only_test_set,
)
@@ -179,7 +184,7 @@ class JsonIndexDatasetMapProvider(DatasetMapProviderBase): # pyre-ignore [13]
eval_batch_index = json.load(f)
restrict_sequence_name = self.restrict_sequence_name
if self.task_str == "singlesequence":
if self.get_task() == Task.SINGLE_SEQUENCE:
if (
self.test_restrict_sequence_id is None
or self.test_restrict_sequence_id < 0
@@ -207,10 +212,6 @@ class JsonIndexDatasetMapProvider(DatasetMapProviderBase): # pyre-ignore [13]
]
# overwrite the restrict_sequence_name
restrict_sequence_name = [eval_sequence_name]
if len(restrict_sequence_name) > 0:
eval_batch_index = [
b for b in eval_batch_index if b[0][0] in restrict_sequence_name
]
dataset_type: Type[JsonIndexDataset] = registry.get(
JsonIndexDataset, self.dataset_class_type
@@ -238,9 +239,15 @@ class JsonIndexDatasetMapProvider(DatasetMapProviderBase): # pyre-ignore [13]
n_frames_per_sequence=-1,
subsets=set_names_mapping["test"],
pick_sequence=restrict_sequence_name,
eval_batch_index=eval_batch_index,
**common_kwargs,
)
if len(restrict_sequence_name) > 0:
eval_batch_index = [
b for b in eval_batch_index if b[0][0] in restrict_sequence_name
]
test_dataset.eval_batches = test_dataset.seq_frame_index_to_dataset_index(
eval_batch_index
)
dataset_map = DatasetMap(
train=train_dataset, val=val_dataset, test=test_dataset
)
@@ -261,11 +268,12 @@ class JsonIndexDatasetMapProvider(DatasetMapProviderBase): # pyre-ignore [13]
# pyre-ignore[16]
return self.dataset_map
def get_all_train_cameras(self) -> Optional[CamerasBase]:
if self.task_str == "multisequence":
return None
def get_task(self) -> Task:
return Task(self.task_str)
assert self.task_str == "singlesequence"
def get_all_train_cameras(self) -> Optional[CamerasBase]:
if Task(self.task_str) == Task.MULTI_SEQUENCE:
return None
# pyre-ignore[16]
train_dataset = self.dataset_map.train
@@ -274,7 +282,7 @@ class JsonIndexDatasetMapProvider(DatasetMapProviderBase): # pyre-ignore [13]
def _get_co3d_set_names_mapping(
task_str: str,
task: Task,
test_on_train: bool,
only_test: bool,
) -> Dict[str, List[str]]:
@@ -288,7 +296,7 @@ def _get_co3d_set_names_mapping(
- val (if not test_on_train)
- test (if not test_on_train)
"""
single_seq = task_str == "singlesequence"
single_seq = task == Task.SINGLE_SEQUENCE
if only_test:
set_names_mapping = {}

View File

@@ -5,22 +5,17 @@
# LICENSE file in the root directory of this source tree.
import copy
import json
import logging
import os
import warnings
from collections import defaultdict
from typing import Dict, List, Optional, Tuple, Type, Union
from typing import Dict, List, Optional, Type
import numpy as np
from iopath.common.file_io import PathManager
from omegaconf import DictConfig
from pytorch3d.implicitron.dataset.dataset_map_provider import (
DatasetMap,
DatasetMapProviderBase,
PathManagerFactory,
Task,
)
from pytorch3d.implicitron.dataset.json_index_dataset import JsonIndexDataset
from pytorch3d.implicitron.tools.config import (
@@ -34,19 +29,6 @@ from pytorch3d.renderer.cameras import CamerasBase
_CO3DV2_DATASET_ROOT: str = os.getenv("CO3DV2_DATASET_ROOT", "")
# _NEED_CONTROL is a list of those elements of JsonIndexDataset which
# are not directly specified for it in the config but come from the
# DatasetMapProvider.
_NEED_CONTROL: Tuple[str, ...] = (
"dataset_root",
"eval_batches",
"eval_batch_index",
"path_manager",
"subsets",
"frame_annotations_file",
"sequence_annotations_file",
"subset_lists_file",
)
logger = logging.getLogger(__name__)
@@ -157,9 +139,6 @@ class JsonIndexDatasetMapProviderV2(DatasetMapProviderBase): # pyre-ignore [13]
only_test_set: Load only the test set. Incompatible with `test_on_train`.
load_eval_batches: Load the file containing eval batches pointing to the
test dataset.
n_known_frames_for_test: Add a certain number of known frames to each
eval batch. Useful for evaluating models that require
source views as input (e.g. NeRF-WCE / PixelNeRF).
dataset_args: Specifies additional arguments to the
JsonIndexDataset constructor call.
path_manager_factory: (Optional) An object that generates an instance of
@@ -175,8 +154,6 @@ class JsonIndexDatasetMapProviderV2(DatasetMapProviderBase): # pyre-ignore [13]
only_test_set: bool = False
load_eval_batches: bool = True
n_known_frames_for_test: int = 0
dataset_class_type: str = "JsonIndexDataset"
dataset: JsonIndexDataset
@@ -199,20 +176,6 @@ class JsonIndexDatasetMapProviderV2(DatasetMapProviderBase): # pyre-ignore [13]
path_manager = self.path_manager_factory.get()
if path_manager is not None:
path_managed_frame_file = path_manager.get_local_path(frame_file)
else:
path_managed_frame_file = frame_file
if not os.path.isfile(path_managed_frame_file):
# The frame_file does not exist.
# Most probably the user has not specified the root folder.
raise ValueError(
f"Looking for frame annotations in {path_managed_frame_file}."
+ " Please specify a correct dataset_root folder."
+ " Note: By default the root folder is taken from the"
+ " CO3DV2_DATASET_ROOT environment variable."
)
# setup the common dataset arguments
common_dataset_kwargs = getattr(self, f"dataset_{self.dataset_class_type}_args")
common_dataset_kwargs = {
@@ -274,18 +237,6 @@ class JsonIndexDatasetMapProviderV2(DatasetMapProviderBase): # pyre-ignore [13]
val_dataset = dataset.subset_from_frame_index(subset_mapping["val"])
logger.info(f"Val dataset: {str(val_dataset)}")
logger.debug("Extracting test dataset.")
if (self.n_known_frames_for_test > 0) and self.load_eval_batches:
# extend the test subset mapping and the dataset with additional
# known views from the train dataset
(
eval_batch_index,
subset_mapping["test"],
) = self._extend_test_data_with_known_views(
subset_mapping,
eval_batch_index,
)
test_dataset = dataset.subset_from_frame_index(subset_mapping["test"])
logger.info(f"Test dataset: {str(test_dataset)}")
if self.load_eval_batches:
@@ -318,15 +269,6 @@ class JsonIndexDatasetMapProviderV2(DatasetMapProviderBase): # pyre-ignore [13]
train=train_dataset, val=val_dataset, test=test_dataset
)
@classmethod
def dataset_tweak_args(cls, type, args: DictConfig) -> None:
"""
Called by get_default_args(JsonIndexDatasetMapProviderV2) to
not expose certain fields of each dataset class.
"""
for key in _NEED_CONTROL:
del args[key]
def create_dataset(self):
# The dataset object is created inside `self.get_dataset_map`
pass
@@ -357,6 +299,12 @@ class JsonIndexDatasetMapProviderV2(DatasetMapProviderBase): # pyre-ignore [13]
)
return category_to_subset_name_list
def get_task(self) -> Task: # TODO: we plan to get rid of tasks
return {
"manyview": Task.SINGLE_SEQUENCE,
"fewview": Task.MULTI_SEQUENCE,
}[self.subset_name.split("_")[0]]
def get_all_train_cameras(self) -> Optional[CamerasBase]:
# pyre-ignore[16]
train_dataset = self.dataset_map.train
@@ -384,71 +332,26 @@ class JsonIndexDatasetMapProviderV2(DatasetMapProviderBase): # pyre-ignore [13]
return data
def _get_available_subset_names(self):
return get_available_subset_names(
self.dataset_root,
self.category,
path_manager=self.path_manager_factory.get(),
)
def _extend_test_data_with_known_views(
self,
subset_mapping: Dict[str, List[Union[Tuple[str, int], Tuple[str, int, str]]]],
eval_batch_index: List[List[Union[Tuple[str, int, str], Tuple[str, int]]]],
):
# convert the train subset mapping to a dict:
# sequence_to_train_frames: {sequence_name: frame_index}
sequence_to_train_frames = defaultdict(list)
for frame_entry in subset_mapping["train"]:
sequence_name = frame_entry[0]
sequence_to_train_frames[sequence_name].append(frame_entry)
sequence_to_train_frames = dict(sequence_to_train_frames)
test_subset_mapping_set = {tuple(s) for s in subset_mapping["test"]}
# extend the eval batches / subset mapping with the additional examples
eval_batch_index_out = copy.deepcopy(eval_batch_index)
generator = np.random.default_rng(seed=0)
for batch in eval_batch_index_out:
sequence_name = batch[0][0]
sequence_known_entries = sequence_to_train_frames[sequence_name]
idx_to_add = generator.permutation(len(sequence_known_entries))[
: self.n_known_frames_for_test
]
entries_to_add = [sequence_known_entries[a] for a in idx_to_add]
assert all(e in subset_mapping["train"] for e in entries_to_add)
# extend the eval batch with the known views
batch.extend(entries_to_add)
# also add these new entries to the test subset mapping
test_subset_mapping_set.update(tuple(e) for e in entries_to_add)
return eval_batch_index_out, list(test_subset_mapping_set)
path_manager = self.path_manager_factory.get()
if path_manager is not None:
dataset_root = path_manager.get_local_path(self.dataset_root)
else:
dataset_root = self.dataset_root
return get_available_subset_names(dataset_root, self.category)
def get_available_subset_names(
dataset_root: str,
category: str,
path_manager: Optional[PathManager] = None,
) -> List[str]:
def get_available_subset_names(dataset_root: str, category: str) -> List[str]:
"""
Get the available subset names for a given category folder inside a root dataset
folder `dataset_root`.
"""
category_dir = os.path.join(dataset_root, category)
category_dir_exists = (
(path_manager is not None) and path_manager.isdir(category_dir)
) or os.path.isdir(category_dir)
if not category_dir_exists:
if not os.path.isdir(category_dir):
raise ValueError(
f"Looking for dataset files in {category_dir}. "
+ "Please specify a correct dataset_root folder."
)
set_list_dir = os.path.join(category_dir, "set_lists")
set_list_jsons = (os.listdir if path_manager is None else path_manager.ls)(
set_list_dir
)
set_list_jsons = os.listdir(os.path.join(category_dir, "set_lists"))
return [
json_file.replace("set_lists_", "").replace(".json", "")
for json_file in set_list_jsons

View File

@@ -32,21 +32,17 @@ class LlffDatasetMapProvider(SingleSceneDatasetMapProviderBase):
and test datasets, and this many random training frames are added to
each test batch. If not set, test batches each contain just a single
testing frame.
downscale_factor: determines image sizes.
"""
downscale_factor: int = 4
def _load_data(self) -> None:
path_manager = self.path_manager_factory.get()
images, poses, _ = load_llff_data(
self.base_dir, factor=self.downscale_factor, path_manager=path_manager
self.base_dir, factor=8, path_manager=path_manager
)
hwf = poses[0, :3, -1]
poses = poses[:, :3, :4]
llffhold = 8
i_test = np.arange(images.shape[0])[::llffhold]
i_test = np.arange(images.shape[0])[::8]
i_test_index = set(i_test.tolist())
i_train = np.array(
[i for i in np.arange(images.shape[0]) if i not in i_test_index]

View File

@@ -294,7 +294,7 @@ def _local_path(path_manager, path):
def _ls(path_manager, path):
if path_manager is None:
return os.listdir(path)
return os.path.listdir(path)
return path_manager.ls(path)

View File

@@ -1,211 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
from os.path import dirname, join, realpath
from typing import Optional, Tuple
import torch
from pytorch3d.implicitron.tools.config import (
expand_args_fields,
registry,
run_auto_creation,
)
from pytorch3d.io import IO
from pytorch3d.renderer import (
AmbientLights,
BlendParams,
CamerasBase,
FoVPerspectiveCameras,
HardPhongShader,
look_at_view_transform,
MeshRasterizer,
MeshRendererWithFragments,
PointLights,
RasterizationSettings,
)
from pytorch3d.structures.meshes import Meshes
from .dataset_map_provider import DatasetMap, DatasetMapProviderBase, PathManagerFactory
from .single_sequence_dataset import SingleSceneDataset
from .utils import DATASET_TYPE_KNOWN
@registry.register
class RenderedMeshDatasetMapProvider(DatasetMapProviderBase): # pyre-ignore [13]
"""
A simple single-scene dataset based on PyTorch3D renders of a mesh.
Provides `num_views` renders of the mesh as train, with no val
and test. The renders are generated from viewpoints sampled at uniformly
distributed azimuth intervals. The elevation is kept constant so that the
camera's vertical position coincides with the equator.
By default, uses Keenan Crane's cow model, and the camera locations are
set to make sense for that.
Although the rendering used to generate this dataset will use a GPU
if one is available, the data it produces is on the CPU just like
the data returned by implicitron's other dataset map providers.
This is because both datasets and models can be large, so implicitron's
GenericModel.forward (etc) expects data on the CPU and only moves
what it needs to the device.
For a more detailed explanation of this code, please refer to the
docs/tutorials/fit_textured_mesh.ipynb notebook.
Members:
num_views: The number of generated renders.
data_file: The folder that contains the mesh file. By default, finds
the cow mesh in the same repo as this code.
azimuth_range: number of degrees on each side of the start position to
take samples
resolution: the common height and width of the output images.
use_point_light: whether to use a particular point light as opposed
to ambient white.
"""
num_views: int = 40
data_file: Optional[str] = None
azimuth_range: float = 180
resolution: int = 128
use_point_light: bool = True
path_manager_factory: PathManagerFactory
path_manager_factory_class_type: str = "PathManagerFactory"
def get_dataset_map(self) -> DatasetMap:
# pyre-ignore[16]
return DatasetMap(train=self.train_dataset, val=None, test=None)
def get_all_train_cameras(self) -> CamerasBase:
# pyre-ignore[16]
return self.poses
def __post_init__(self) -> None:
super().__init__()
run_auto_creation(self)
if torch.cuda.is_available():
device = torch.device("cuda:0")
else:
device = torch.device("cpu")
if self.data_file is None:
data_file = join(
dirname(dirname(dirname(dirname(realpath(__file__))))),
"docs",
"tutorials",
"data",
"cow_mesh",
"cow.obj",
)
else:
data_file = self.data_file
io = IO(path_manager=self.path_manager_factory.get())
mesh = io.load_mesh(data_file, device=device)
poses, images, masks = _generate_cow_renders(
num_views=self.num_views,
mesh=mesh,
azimuth_range=self.azimuth_range,
resolution=self.resolution,
device=device,
use_point_light=self.use_point_light,
)
# pyre-ignore[16]
self.poses = poses.cpu()
expand_args_fields(SingleSceneDataset)
# pyre-ignore[16]
self.train_dataset = SingleSceneDataset( # pyre-ignore[28]
object_name="cow",
images=list(images.permute(0, 3, 1, 2).cpu()),
fg_probabilities=list(masks[:, None].cpu()),
poses=[self.poses[i] for i in range(len(poses))],
frame_types=[DATASET_TYPE_KNOWN] * len(poses),
eval_batches=None,
)
@torch.no_grad()
def _generate_cow_renders(
*,
num_views: int,
mesh: Meshes,
azimuth_range: float,
resolution: int,
device: torch.device,
use_point_light: bool,
) -> Tuple[CamerasBase, torch.Tensor, torch.Tensor]:
"""
Returns:
cameras: A batch of `num_views` `FoVPerspectiveCameras` from which the
images are rendered.
images: A tensor of shape `(num_views, height, width, 3)` containing
the rendered images.
silhouettes: A tensor of shape `(num_views, height, width)` containing
the rendered silhouettes.
"""
# Load obj file
# We scale normalize and center the target mesh to fit in a sphere of radius 1
# centered at (0,0,0). (scale, center) will be used to bring the predicted mesh
# to its original center and scale. Note that normalizing the target mesh,
# speeds up the optimization but is not necessary!
verts = mesh.verts_packed()
N = verts.shape[0]
center = verts.mean(0)
scale = max((verts - center).abs().max(0)[0])
mesh.offset_verts_(-(center.expand(N, 3)))
mesh.scale_verts_((1.0 / float(scale)))
# Get a batch of viewing angles.
elev = torch.linspace(0, 0, num_views) # keep constant
azim = torch.linspace(-azimuth_range, azimuth_range, num_views) + 180.0
# Place a point light in front of the object. As mentioned above, the front of
# the cow is facing the -z direction.
if use_point_light:
lights = PointLights(device=device, location=[[0.0, 0.0, -3.0]])
else:
lights = AmbientLights(device=device)
# Initialize an OpenGL perspective camera that represents a batch of different
# viewing angles. All the cameras helper methods support mixed type inputs and
# broadcasting. So we can view the camera from the a distance of dist=2.7, and
# then specify elevation and azimuth angles for each viewpoint as tensors.
R, T = look_at_view_transform(dist=2.7, elev=elev, azim=azim)
cameras = FoVPerspectiveCameras(device=device, R=R, T=T)
# Define the settings for rasterization and shading.
# As we are rendering images for visualization
# purposes only we will set faces_per_pixel=1 and blur_radius=0.0. Refer to
# rasterize_meshes.py for explanations of these parameters. We also leave
# bin_size and max_faces_per_bin to their default values of None, which sets
# their values using heuristics and ensures that the faster coarse-to-fine
# rasterization method is used. Refer to docs/notes/renderer.md for an
# explanation of the difference between naive and coarse-to-fine rasterization.
raster_settings = RasterizationSettings(
image_size=resolution, blur_radius=0.0, faces_per_pixel=1
)
# Create a Phong renderer by composing a rasterizer and a shader. The textured
# Phong shader will interpolate the texture uv coordinates for each vertex,
# sample from a texture image and apply the Phong lighting model
blend_params = BlendParams(sigma=1e-4, gamma=1e-4, background_color=(0.0, 0.0, 0.0))
rasterizer_type = MeshRasterizer
renderer = MeshRendererWithFragments(
rasterizer=rasterizer_type(cameras=cameras, raster_settings=raster_settings),
shader=HardPhongShader(
device=device, cameras=cameras, lights=lights, blend_params=blend_params
),
)
# Create a batch of meshes by repeating the cow mesh and associated textures.
# Meshes has a useful `extend` method which allows us do this very easily.
# This also extends the textures.
meshes = mesh.extend(num_views)
# Render the cow mesh from each viewing angle
target_images, fragments = renderer(meshes, cameras=cameras, lights=lights)
silhouette_binary = (fragments.pix_to_face[..., 0] >= 0).float()
return cameras, target_images[..., :3], silhouette_binary

View File

@@ -9,7 +9,7 @@
# provide data for a single scene.
from dataclasses import field
from typing import Iterable, Iterator, List, Optional, Tuple
from typing import Iterable, List, Optional
import numpy as np
import torch
@@ -21,13 +21,17 @@ from pytorch3d.implicitron.tools.config import (
from pytorch3d.renderer import CamerasBase, join_cameras_as_batch, PerspectiveCameras
from .dataset_base import DatasetBase, FrameData
from .dataset_map_provider import DatasetMap, DatasetMapProviderBase, PathManagerFactory
from .dataset_map_provider import (
DatasetMap,
DatasetMapProviderBase,
PathManagerFactory,
Task,
)
from .utils import DATASET_TYPE_KNOWN, DATASET_TYPE_UNKNOWN
_SINGLE_SEQUENCE_NAME: str = "one_sequence"
@expand_args_fields
class SingleSceneDataset(DatasetBase, Configurable):
"""
A dataset from images from a single scene.
@@ -46,12 +50,6 @@ class SingleSceneDataset(DatasetBase, Configurable):
def __len__(self) -> int:
return len(self.poses)
def sequence_frames_in_order(
self, seq_name: str
) -> Iterator[Tuple[float, int, int]]:
for i in range(len(self)):
yield (0.0, i, i)
def __getitem__(self, index) -> FrameData:
if index >= len(self):
raise IndexError(f"index {index} out of range {len(self)}")
@@ -117,6 +115,7 @@ class SingleSceneDatasetMapProviderBase(DatasetMapProviderBase):
def _get_dataset(
self, split_idx: int, frame_type: str, set_eval_batches: bool = False
) -> SingleSceneDataset:
expand_args_fields(SingleSceneDataset)
# pyre-ignore[16]
split = self.i_split[split_idx]
frame_types = [frame_type] * len(split)
@@ -160,6 +159,9 @@ class SingleSceneDatasetMapProviderBase(DatasetMapProviderBase):
test=self._get_dataset(2, DATASET_TYPE_UNKNOWN, True),
)
def get_task(self) -> Task:
return Task.SINGLE_SEQUENCE
def get_all_train_cameras(self) -> Optional[CamerasBase]:
# pyre-ignore[16]
cameras = [self.poses[i] for i in self.i_split[0]]

View File

@@ -225,8 +225,8 @@ def _dataclass_list_from_dict_list(dlist, typeannot):
assert indices[-1] == len(all_keys_res)
keys = np.split(list(all_keys_res), indices[:-1])
all_vals_res_iter = iter(all_vals_res)
return [cls(zip(k, all_vals_res_iter)) for k in keys]
vals = np.split(list(all_vals_res), indices[:-1])
return [cls(zip(k, v)) for k, v in zip(keys, vals)]
elif not dataclasses.is_dataclass(typeannot):
return dlist

View File

@@ -7,12 +7,11 @@
import dataclasses
import os
from enum import Enum
from typing import Any, cast, Dict, List, Optional, Tuple
import lpips
import torch
from pytorch3d.implicitron.dataset.data_source import ImplicitronDataSource
from pytorch3d.implicitron.dataset.data_source import ImplicitronDataSource, Task
from pytorch3d.implicitron.dataset.json_index_dataset import JsonIndexDataset
from pytorch3d.implicitron.dataset.json_index_dataset_map_provider import (
CO3D_CATEGORIES,
@@ -28,11 +27,6 @@ from pytorch3d.implicitron.tools.utils import dataclass_to_cuda_
from tqdm import tqdm
class Task(Enum):
SINGLE_SEQUENCE = "singlesequence"
MULTI_SEQUENCE = "multisequence"
def main() -> None:
"""
Evaluates new view synthesis metrics of a simple depth-based image rendering
@@ -159,15 +153,11 @@ def evaluate_dbir_for_category(
if task == Task.SINGLE_SEQUENCE:
camera_difficulty_bin_breaks = 0.97, 0.98
multisequence_evaluation = False
else:
camera_difficulty_bin_breaks = 2.0 / 3, 5.0 / 6
multisequence_evaluation = True
category_result_flat, category_result = summarize_nvs_eval_results(
per_batch_eval_results,
camera_difficulty_bin_breaks=camera_difficulty_bin_breaks,
is_multisequence=multisequence_evaluation,
per_batch_eval_results, task, camera_difficulty_bin_breaks
)
return category_result["results"]

View File

@@ -9,11 +9,12 @@ import copy
import warnings
from collections import OrderedDict
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional, Sequence, Tuple, TYPE_CHECKING, Union
from typing import Any, Dict, List, Optional, Sequence, Tuple, Union
import numpy as np
import torch
import torch.nn.functional as F
from pytorch3d.implicitron.dataset.data_source import Task
from pytorch3d.implicitron.dataset.dataset_base import FrameData
from pytorch3d.implicitron.dataset.utils import is_known_frame, is_train_frame
from pytorch3d.implicitron.models.base_model import ImplicitronRender
@@ -27,9 +28,7 @@ from pytorch3d.renderer.camera_utils import join_cameras_as_batch
from pytorch3d.renderer.cameras import CamerasBase, PerspectiveCameras
from pytorch3d.vis.plotly_vis import plot_scene
from tabulate import tabulate
if TYPE_CHECKING:
from visdom import Visdom
from visdom import Visdom
EVAL_N_SRC_VIEWS = [1, 3, 5, 7, 9]
@@ -45,7 +44,7 @@ class _Visualizer:
visdom_env: str = "eval_debug"
_viz: Optional["Visdom"] = field(init=False)
_viz: Visdom = field(init=False)
def __post_init__(self):
self._viz = vis_utils.get_visdom_connection()
@@ -53,8 +52,6 @@ class _Visualizer:
def show_rgb(
self, loss_value: float, metric_name: str, loss_mask_now: torch.Tensor
):
if self._viz is None:
return
self._viz.images(
torch.cat(
(
@@ -72,10 +69,7 @@ class _Visualizer:
def show_depth(
self, depth_loss: float, name_postfix: str, loss_mask_now: torch.Tensor
):
if self._viz is None:
return
viz = self._viz
viz.images(
self._viz.images(
torch.cat(
(
make_depth_image(self.depth_render, loss_mask_now),
@@ -87,13 +81,13 @@ class _Visualizer:
win="depth_abs" + name_postfix,
opts={"title": f"depth_abs_{name_postfix}_{depth_loss:1.2f}"},
)
viz.images(
self._viz.images(
loss_mask_now,
env=self.visdom_env,
win="depth_abs" + name_postfix + "_mask",
opts={"title": f"depth_abs_{name_postfix}_{depth_loss:1.2f}_mask"},
)
viz.images(
self._viz.images(
self.depth_mask,
env=self.visdom_env,
win="depth_abs" + name_postfix + "_maskd",
@@ -133,7 +127,7 @@ class _Visualizer:
pointcloud_max_points=10000,
pointcloud_marker_size=1,
)
viz.plotlyplot(
self._viz.plotlyplot(
plotlyplot,
env=self.visdom_env,
win=f"pcl{name_postfix}",
@@ -249,30 +243,10 @@ def eval_batch(
if frame_data.depth_map is None or frame_data.depth_map.sum() <= 0:
warnings.warn("Empty or missing depth map in evaluation!")
if frame_data.mask_crop is None:
warnings.warn("mask_crop is None, assuming the whole image is valid.")
if frame_data.fg_probability is None:
warnings.warn("fg_probability is None, assuming the whole image is fg.")
# threshold the masks to make ground truth binary masks
mask_fg = (
frame_data.fg_probability >= mask_thr
if frame_data.fg_probability is not None
# pyre-ignore [16]
else torch.ones_like(frame_data.image_rgb[:, :1, ...]).bool()
)
mask_crop = (
frame_data.mask_crop
if frame_data.mask_crop is not None
else torch.ones_like(mask_fg)
)
# unmasked g.t. image
image_rgb = frame_data.image_rgb
# fg-masked g.t. image
mask_fg, mask_crop = [
(getattr(frame_data, k) >= mask_thr) for k in ("fg_probability", "mask_crop")
]
image_rgb_masked = mask_background(
# pyre-fixme[6]: Expected `Tensor` for 1st param but got
# `Optional[torch.Tensor]`.
@@ -292,6 +266,7 @@ def eval_batch(
# pyre-fixme[6]: Expected `Tensor` for 4th param but got
# `Optional[torch.Tensor]`.
depth_map=frame_data.depth_map,
# pyre-fixme[16]: `Optional` has no attribute `__getitem__`.
depth_mask=frame_data.depth_mask[:1],
visdom_env=visualize_visdom_env,
)
@@ -304,7 +279,7 @@ def eval_batch(
mask=mask_crop,
)
for loss_fg_mask, name_postfix in zip((mask_crop, mask_fg), ("_masked", "_fg")):
for loss_fg_mask, name_postfix in zip((mask_crop, mask_fg), ("", "_fg")):
loss_mask_now = mask_crop * loss_fg_mask
@@ -323,7 +298,7 @@ def eval_batch(
results[metric_name].item(), metric_name, loss_mask_now
)
if name_postfix == "_fg" and frame_data.depth_map is not None:
if name_postfix == "_fg":
# only record depth metrics for the foreground
_, abs_ = eval_depth(
cloned_render["depth_render"],
@@ -339,28 +314,16 @@ def eval_batch(
if visualize:
visualizer.show_depth(abs_.mean().item(), name_postfix, loss_mask_now)
if break_after_visualising:
breakpoint() # noqa: B601
import pdb # noqa: B602
# add the rgb metrics between the render and the unmasked image
for rgb_metric_name, rgb_metric_fun in zip(
("psnr_full_image", "rgb_l1_full_image"), (calc_psnr, rgb_l1)
):
results[rgb_metric_name] = rgb_metric_fun(
image_render,
image_rgb,
mask=mask_crop,
)
pdb.set_trace()
if lpips_model is not None:
for gt_image_type in ("_full_image", "_masked"):
im1, im2 = [
2.0 * im.clamp(0.0, 1.0) - 1.0 # pyre-ignore[16]
for im in (
image_rgb_masked if gt_image_type == "_masked" else image_rgb,
cloned_render["image_render"],
)
]
results["lpips" + gt_image_type] = lpips_model.forward(im1, im2).item()
im1, im2 = [
2.0 * im.clamp(0.0, 1.0) - 1.0
for im in (image_rgb_masked, cloned_render["image_render"])
]
results["lpips"] = lpips_model.forward(im1, im2).item()
# convert all metrics to floats
results = {k: float(v) for k, v in results.items()}
@@ -457,16 +420,16 @@ def _get_camera_difficulty_bin_edges(camera_difficulty_bin_breaks: Tuple[float,
def summarize_nvs_eval_results(
per_batch_eval_results: List[Dict[str, Any]],
is_multisequence: bool,
camera_difficulty_bin_breaks: Tuple[float, float],
task: Task,
camera_difficulty_bin_breaks: Tuple[float, float] = (0.97, 0.98),
):
"""
Compile the per-batch evaluation results `per_batch_eval_results` into
a set of aggregate metrics. The produced metrics depend on is_multisequence.
a set of aggregate metrics. The produced metrics depend on the task.
Args:
per_batch_eval_results: Metrics of each per-batch evaluation.
is_multisequence: Whether to evaluate as a multisequence task
task: The type of the new-view synthesis task.
camera_difficulty_bin_breaks: edge hard-medium and medium-easy
@@ -476,9 +439,14 @@ def summarize_nvs_eval_results(
"""
n_batches = len(per_batch_eval_results)
eval_sets: List[Optional[str]] = []
eval_sets = [None]
if is_multisequence:
if task == Task.SINGLE_SEQUENCE:
eval_sets = [None]
# assert n_batches==100
elif task == Task.MULTI_SEQUENCE:
eval_sets = ["train", "test"]
# assert n_batches==1000
else:
raise ValueError(task)
batch_sizes = torch.tensor(
[r["meta"]["batch_size"] for r in per_batch_eval_results]
).long()
@@ -498,9 +466,11 @@ def summarize_nvs_eval_results(
# add per set averages
for SET in eval_sets:
if SET is None:
assert task == Task.SINGLE_SEQUENCE
ok_set = torch.ones(n_batches, dtype=torch.bool)
set_name = "test"
else:
assert task == Task.MULTI_SEQUENCE
ok_set = is_train == int(SET == "train")
set_name = SET
@@ -525,7 +495,7 @@ def summarize_nvs_eval_results(
}
)
if is_multisequence:
if task == Task.MULTI_SEQUENCE:
# split based on n_src_views
n_src_views = batch_sizes - 1
for n_src in EVAL_N_SRC_VIEWS:

View File

@@ -1,165 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
import copy
import json
import logging
import os
from typing import Any, Dict, List, Optional, Tuple
import lpips
import torch
import tqdm
from pytorch3d.implicitron.dataset import utils as ds_utils
from pytorch3d.implicitron.evaluation import evaluate_new_view_synthesis as evaluate
from pytorch3d.implicitron.models.base_model import EvaluationMode, ImplicitronModelBase
from pytorch3d.implicitron.tools.config import (
registry,
ReplaceableBase,
run_auto_creation,
)
from pytorch3d.renderer.cameras import CamerasBase
from torch.utils.data import DataLoader
logger = logging.getLogger(__name__)
class EvaluatorBase(ReplaceableBase):
"""
Evaluate a trained model on given data. Returns a dict of loss/objective
names and their values.
"""
is_multisequence: bool = False
def run(
self, model: ImplicitronModelBase, dataloader: DataLoader, **kwargs
) -> Dict[str, Any]:
"""
Evaluate the results of Implicitron training.
"""
raise NotImplementedError()
@registry.register
class ImplicitronEvaluator(EvaluatorBase):
"""
Evaluate the results of Implicitron training.
Members:
camera_difficulty_bin_breaks: low/medium vals to divide camera difficulties into
[0-eps, low, medium, 1+eps].
"""
camera_difficulty_bin_breaks: Tuple[float, ...] = 0.97, 0.98
def __post_init__(self):
run_auto_creation(self)
def run(
self,
model: ImplicitronModelBase,
dataloader: DataLoader,
all_train_cameras: Optional[CamerasBase],
device: torch.device,
dump_to_json: bool = False,
exp_dir: Optional[str] = None,
epoch: Optional[int] = None,
**kwargs,
) -> Dict[str, Any]:
"""
Evaluate the results of Implicitron training. Optionally, dump results to
exp_dir/results_test.json.
Args:
model: A (trained) model to evaluate.
dataloader: A test dataloader.
all_train_cameras: Camera instances we used for training.
device: A torch device.
dump_to_json: If True, will dump the results to a json file.
exp_dir: Root expeirment directory.
epoch: Evaluation epoch (to be stored in the results dict).
Returns:
A dictionary of results.
"""
lpips_model = lpips.LPIPS(net="vgg")
lpips_model = lpips_model.to(device)
model.eval()
per_batch_eval_results = []
logger.info("Evaluating model ...")
for frame_data in tqdm.tqdm(dataloader):
frame_data = frame_data.to(device)
# mask out the unknown images so that the model does not see them
frame_data_for_eval = _get_eval_frame_data(frame_data)
with torch.no_grad():
preds = model(
**{
**frame_data_for_eval,
"evaluation_mode": EvaluationMode.EVALUATION,
}
)
implicitron_render = copy.deepcopy(preds["implicitron_render"])
per_batch_eval_results.append(
evaluate.eval_batch(
frame_data,
implicitron_render,
bg_color="black",
lpips_model=lpips_model,
source_cameras=( # None will make it use batchs known cameras
None if self.is_multisequence else all_train_cameras
),
)
)
_, category_result = evaluate.summarize_nvs_eval_results(
per_batch_eval_results,
self.is_multisequence,
self.camera_difficulty_bin_breaks,
)
results = category_result["results"]
evaluate.pretty_print_nvs_metrics(results)
if dump_to_json:
_dump_to_json(epoch, exp_dir, results)
return category_result["results"]
def _dump_to_json(
epoch: Optional[int], exp_dir: Optional[str], results: List[Dict[str, Any]]
) -> None:
if epoch is not None:
for r in results:
r["eval_epoch"] = int(epoch)
logger.info("Evaluation results")
if exp_dir is None:
raise ValueError("Cannot save results to json without a specified save path.")
with open(os.path.join(exp_dir, "results_test.json"), "w") as f:
json.dump(results, f)
def _get_eval_frame_data(frame_data: Any) -> Any:
"""
Masks the unknown image data to make sure we cannot use it at model evaluation time.
"""
frame_data_for_eval = copy.deepcopy(frame_data)
is_known = ds_utils.is_known_frame(frame_data.frame_type).type_as(
frame_data.image_rgb
)[:, None, None, None]
for k in ("image_rgb", "depth_map", "fg_probability", "mask_crop"):
value = getattr(frame_data_for_eval, k)
value_masked = value.clone() * is_known if value is not None else None
setattr(frame_data_for_eval, k, value_masked)
return frame_data_for_eval

Some files were not shown because too many files have changed in this diff Show More