1 Commits

Author SHA1 Message Date
Jeremy Reizenstein
0eac8299d4 MKL version fix in CI (#1820)
Summary:
Fix for "undefined symbol: iJIT_NotifyEvent" build issue,

Pull Request resolved: https://github.com/facebookresearch/pytorch3d/pull/1820

Differential Revision: D58685326
2024-06-20 09:24:07 -07:00
252 changed files with 1203 additions and 1981 deletions

View File

@@ -162,6 +162,90 @@ workflows:
jobs:
# - main:
# context: DOCKERHUB_TOKEN
- binary_linux_conda:
conda_docker_image: pytorch/conda-builder:cuda113
context: DOCKERHUB_TOKEN
cu_version: cu113
name: linux_conda_py38_cu113_pyt1120
python_version: '3.8'
pytorch_version: 1.12.0
- binary_linux_conda:
conda_docker_image: pytorch/conda-builder:cuda116
context: DOCKERHUB_TOKEN
cu_version: cu116
name: linux_conda_py38_cu116_pyt1120
python_version: '3.8'
pytorch_version: 1.12.0
- binary_linux_conda:
conda_docker_image: pytorch/conda-builder:cuda113
context: DOCKERHUB_TOKEN
cu_version: cu113
name: linux_conda_py38_cu113_pyt1121
python_version: '3.8'
pytorch_version: 1.12.1
- binary_linux_conda:
conda_docker_image: pytorch/conda-builder:cuda116
context: DOCKERHUB_TOKEN
cu_version: cu116
name: linux_conda_py38_cu116_pyt1121
python_version: '3.8'
pytorch_version: 1.12.1
- binary_linux_conda:
conda_docker_image: pytorch/conda-builder:cuda116
context: DOCKERHUB_TOKEN
cu_version: cu116
name: linux_conda_py38_cu116_pyt1130
python_version: '3.8'
pytorch_version: 1.13.0
- binary_linux_conda:
conda_docker_image: pytorch/conda-builder:cuda117
context: DOCKERHUB_TOKEN
cu_version: cu117
name: linux_conda_py38_cu117_pyt1130
python_version: '3.8'
pytorch_version: 1.13.0
- binary_linux_conda:
conda_docker_image: pytorch/conda-builder:cuda116
context: DOCKERHUB_TOKEN
cu_version: cu116
name: linux_conda_py38_cu116_pyt1131
python_version: '3.8'
pytorch_version: 1.13.1
- binary_linux_conda:
conda_docker_image: pytorch/conda-builder:cuda117
context: DOCKERHUB_TOKEN
cu_version: cu117
name: linux_conda_py38_cu117_pyt1131
python_version: '3.8'
pytorch_version: 1.13.1
- binary_linux_conda:
conda_docker_image: pytorch/conda-builder:cuda117
context: DOCKERHUB_TOKEN
cu_version: cu117
name: linux_conda_py38_cu117_pyt200
python_version: '3.8'
pytorch_version: 2.0.0
- binary_linux_conda:
conda_docker_image: pytorch/conda-builder:cuda118
context: DOCKERHUB_TOKEN
cu_version: cu118
name: linux_conda_py38_cu118_pyt200
python_version: '3.8'
pytorch_version: 2.0.0
- binary_linux_conda:
conda_docker_image: pytorch/conda-builder:cuda117
context: DOCKERHUB_TOKEN
cu_version: cu117
name: linux_conda_py38_cu117_pyt201
python_version: '3.8'
pytorch_version: 2.0.1
- binary_linux_conda:
conda_docker_image: pytorch/conda-builder:cuda118
context: DOCKERHUB_TOKEN
cu_version: cu118
name: linux_conda_py38_cu118_pyt201
python_version: '3.8'
pytorch_version: 2.0.1
- binary_linux_conda:
conda_docker_image: pytorch/conda-builder:cuda118
context: DOCKERHUB_TOKEN
@@ -247,33 +331,89 @@ workflows:
python_version: '3.8'
pytorch_version: 2.3.1
- binary_linux_conda:
conda_docker_image: pytorch/conda-builder:cuda118
conda_docker_image: pytorch/conda-builder:cuda113
context: DOCKERHUB_TOKEN
cu_version: cu118
name: linux_conda_py38_cu118_pyt240
python_version: '3.8'
pytorch_version: 2.4.0
cu_version: cu113
name: linux_conda_py39_cu113_pyt1120
python_version: '3.9'
pytorch_version: 1.12.0
- binary_linux_conda:
conda_docker_image: pytorch/conda-builder:cuda121
conda_docker_image: pytorch/conda-builder:cuda116
context: DOCKERHUB_TOKEN
cu_version: cu121
name: linux_conda_py38_cu121_pyt240
python_version: '3.8'
pytorch_version: 2.4.0
cu_version: cu116
name: linux_conda_py39_cu116_pyt1120
python_version: '3.9'
pytorch_version: 1.12.0
- binary_linux_conda:
conda_docker_image: pytorch/conda-builder:cuda113
context: DOCKERHUB_TOKEN
cu_version: cu113
name: linux_conda_py39_cu113_pyt1121
python_version: '3.9'
pytorch_version: 1.12.1
- binary_linux_conda:
conda_docker_image: pytorch/conda-builder:cuda116
context: DOCKERHUB_TOKEN
cu_version: cu116
name: linux_conda_py39_cu116_pyt1121
python_version: '3.9'
pytorch_version: 1.12.1
- binary_linux_conda:
conda_docker_image: pytorch/conda-builder:cuda116
context: DOCKERHUB_TOKEN
cu_version: cu116
name: linux_conda_py39_cu116_pyt1130
python_version: '3.9'
pytorch_version: 1.13.0
- binary_linux_conda:
conda_docker_image: pytorch/conda-builder:cuda117
context: DOCKERHUB_TOKEN
cu_version: cu117
name: linux_conda_py39_cu117_pyt1130
python_version: '3.9'
pytorch_version: 1.13.0
- binary_linux_conda:
conda_docker_image: pytorch/conda-builder:cuda116
context: DOCKERHUB_TOKEN
cu_version: cu116
name: linux_conda_py39_cu116_pyt1131
python_version: '3.9'
pytorch_version: 1.13.1
- binary_linux_conda:
conda_docker_image: pytorch/conda-builder:cuda117
context: DOCKERHUB_TOKEN
cu_version: cu117
name: linux_conda_py39_cu117_pyt1131
python_version: '3.9'
pytorch_version: 1.13.1
- binary_linux_conda:
conda_docker_image: pytorch/conda-builder:cuda117
context: DOCKERHUB_TOKEN
cu_version: cu117
name: linux_conda_py39_cu117_pyt200
python_version: '3.9'
pytorch_version: 2.0.0
- binary_linux_conda:
conda_docker_image: pytorch/conda-builder:cuda118
context: DOCKERHUB_TOKEN
cu_version: cu118
name: linux_conda_py38_cu118_pyt241
python_version: '3.8'
pytorch_version: 2.4.1
name: linux_conda_py39_cu118_pyt200
python_version: '3.9'
pytorch_version: 2.0.0
- binary_linux_conda:
conda_docker_image: pytorch/conda-builder:cuda121
conda_docker_image: pytorch/conda-builder:cuda117
context: DOCKERHUB_TOKEN
cu_version: cu121
name: linux_conda_py38_cu121_pyt241
python_version: '3.8'
pytorch_version: 2.4.1
cu_version: cu117
name: linux_conda_py39_cu117_pyt201
python_version: '3.9'
pytorch_version: 2.0.1
- binary_linux_conda:
conda_docker_image: pytorch/conda-builder:cuda118
context: DOCKERHUB_TOKEN
cu_version: cu118
name: linux_conda_py39_cu118_pyt201
python_version: '3.9'
pytorch_version: 2.0.1
- binary_linux_conda:
conda_docker_image: pytorch/conda-builder:cuda118
context: DOCKERHUB_TOKEN
@@ -359,33 +499,89 @@ workflows:
python_version: '3.9'
pytorch_version: 2.3.1
- binary_linux_conda:
conda_docker_image: pytorch/conda-builder:cuda118
conda_docker_image: pytorch/conda-builder:cuda113
context: DOCKERHUB_TOKEN
cu_version: cu118
name: linux_conda_py39_cu118_pyt240
python_version: '3.9'
pytorch_version: 2.4.0
cu_version: cu113
name: linux_conda_py310_cu113_pyt1120
python_version: '3.10'
pytorch_version: 1.12.0
- binary_linux_conda:
conda_docker_image: pytorch/conda-builder:cuda121
conda_docker_image: pytorch/conda-builder:cuda116
context: DOCKERHUB_TOKEN
cu_version: cu121
name: linux_conda_py39_cu121_pyt240
python_version: '3.9'
pytorch_version: 2.4.0
cu_version: cu116
name: linux_conda_py310_cu116_pyt1120
python_version: '3.10'
pytorch_version: 1.12.0
- binary_linux_conda:
conda_docker_image: pytorch/conda-builder:cuda113
context: DOCKERHUB_TOKEN
cu_version: cu113
name: linux_conda_py310_cu113_pyt1121
python_version: '3.10'
pytorch_version: 1.12.1
- binary_linux_conda:
conda_docker_image: pytorch/conda-builder:cuda116
context: DOCKERHUB_TOKEN
cu_version: cu116
name: linux_conda_py310_cu116_pyt1121
python_version: '3.10'
pytorch_version: 1.12.1
- binary_linux_conda:
conda_docker_image: pytorch/conda-builder:cuda116
context: DOCKERHUB_TOKEN
cu_version: cu116
name: linux_conda_py310_cu116_pyt1130
python_version: '3.10'
pytorch_version: 1.13.0
- binary_linux_conda:
conda_docker_image: pytorch/conda-builder:cuda117
context: DOCKERHUB_TOKEN
cu_version: cu117
name: linux_conda_py310_cu117_pyt1130
python_version: '3.10'
pytorch_version: 1.13.0
- binary_linux_conda:
conda_docker_image: pytorch/conda-builder:cuda116
context: DOCKERHUB_TOKEN
cu_version: cu116
name: linux_conda_py310_cu116_pyt1131
python_version: '3.10'
pytorch_version: 1.13.1
- binary_linux_conda:
conda_docker_image: pytorch/conda-builder:cuda117
context: DOCKERHUB_TOKEN
cu_version: cu117
name: linux_conda_py310_cu117_pyt1131
python_version: '3.10'
pytorch_version: 1.13.1
- binary_linux_conda:
conda_docker_image: pytorch/conda-builder:cuda117
context: DOCKERHUB_TOKEN
cu_version: cu117
name: linux_conda_py310_cu117_pyt200
python_version: '3.10'
pytorch_version: 2.0.0
- binary_linux_conda:
conda_docker_image: pytorch/conda-builder:cuda118
context: DOCKERHUB_TOKEN
cu_version: cu118
name: linux_conda_py39_cu118_pyt241
python_version: '3.9'
pytorch_version: 2.4.1
name: linux_conda_py310_cu118_pyt200
python_version: '3.10'
pytorch_version: 2.0.0
- binary_linux_conda:
conda_docker_image: pytorch/conda-builder:cuda121
conda_docker_image: pytorch/conda-builder:cuda117
context: DOCKERHUB_TOKEN
cu_version: cu121
name: linux_conda_py39_cu121_pyt241
python_version: '3.9'
pytorch_version: 2.4.1
cu_version: cu117
name: linux_conda_py310_cu117_pyt201
python_version: '3.10'
pytorch_version: 2.0.1
- binary_linux_conda:
conda_docker_image: pytorch/conda-builder:cuda118
context: DOCKERHUB_TOKEN
cu_version: cu118
name: linux_conda_py310_cu118_pyt201
python_version: '3.10'
pytorch_version: 2.0.1
- binary_linux_conda:
conda_docker_image: pytorch/conda-builder:cuda118
context: DOCKERHUB_TOKEN
@@ -470,34 +666,6 @@ workflows:
name: linux_conda_py310_cu121_pyt231
python_version: '3.10'
pytorch_version: 2.3.1
- binary_linux_conda:
conda_docker_image: pytorch/conda-builder:cuda118
context: DOCKERHUB_TOKEN
cu_version: cu118
name: linux_conda_py310_cu118_pyt240
python_version: '3.10'
pytorch_version: 2.4.0
- binary_linux_conda:
conda_docker_image: pytorch/conda-builder:cuda121
context: DOCKERHUB_TOKEN
cu_version: cu121
name: linux_conda_py310_cu121_pyt240
python_version: '3.10'
pytorch_version: 2.4.0
- binary_linux_conda:
conda_docker_image: pytorch/conda-builder:cuda118
context: DOCKERHUB_TOKEN
cu_version: cu118
name: linux_conda_py310_cu118_pyt241
python_version: '3.10'
pytorch_version: 2.4.1
- binary_linux_conda:
conda_docker_image: pytorch/conda-builder:cuda121
context: DOCKERHUB_TOKEN
cu_version: cu121
name: linux_conda_py310_cu121_pyt241
python_version: '3.10'
pytorch_version: 2.4.1
- binary_linux_conda:
conda_docker_image: pytorch/conda-builder:cuda118
context: DOCKERHUB_TOKEN
@@ -582,34 +750,6 @@ workflows:
name: linux_conda_py311_cu121_pyt231
python_version: '3.11'
pytorch_version: 2.3.1
- binary_linux_conda:
conda_docker_image: pytorch/conda-builder:cuda118
context: DOCKERHUB_TOKEN
cu_version: cu118
name: linux_conda_py311_cu118_pyt240
python_version: '3.11'
pytorch_version: 2.4.0
- binary_linux_conda:
conda_docker_image: pytorch/conda-builder:cuda121
context: DOCKERHUB_TOKEN
cu_version: cu121
name: linux_conda_py311_cu121_pyt240
python_version: '3.11'
pytorch_version: 2.4.0
- binary_linux_conda:
conda_docker_image: pytorch/conda-builder:cuda118
context: DOCKERHUB_TOKEN
cu_version: cu118
name: linux_conda_py311_cu118_pyt241
python_version: '3.11'
pytorch_version: 2.4.1
- binary_linux_conda:
conda_docker_image: pytorch/conda-builder:cuda121
context: DOCKERHUB_TOKEN
cu_version: cu121
name: linux_conda_py311_cu121_pyt241
python_version: '3.11'
pytorch_version: 2.4.1
- binary_linux_conda:
conda_docker_image: pytorch/conda-builder:cuda118
context: DOCKERHUB_TOKEN
@@ -652,34 +792,6 @@ workflows:
name: linux_conda_py312_cu121_pyt231
python_version: '3.12'
pytorch_version: 2.3.1
- binary_linux_conda:
conda_docker_image: pytorch/conda-builder:cuda118
context: DOCKERHUB_TOKEN
cu_version: cu118
name: linux_conda_py312_cu118_pyt240
python_version: '3.12'
pytorch_version: 2.4.0
- binary_linux_conda:
conda_docker_image: pytorch/conda-builder:cuda121
context: DOCKERHUB_TOKEN
cu_version: cu121
name: linux_conda_py312_cu121_pyt240
python_version: '3.12'
pytorch_version: 2.4.0
- binary_linux_conda:
conda_docker_image: pytorch/conda-builder:cuda118
context: DOCKERHUB_TOKEN
cu_version: cu118
name: linux_conda_py312_cu118_pyt241
python_version: '3.12'
pytorch_version: 2.4.1
- binary_linux_conda:
conda_docker_image: pytorch/conda-builder:cuda121
context: DOCKERHUB_TOKEN
cu_version: cu121
name: linux_conda_py312_cu121_pyt241
python_version: '3.12'
pytorch_version: 2.4.1
- binary_linux_conda_cuda:
name: testrun_conda_cuda_py310_cu117_pyt201
context: DOCKERHUB_TOKEN

View File

@@ -19,14 +19,18 @@ from packaging import version
# The CUDA versions which have pytorch conda packages available for linux for each
# version of pytorch.
CONDA_CUDA_VERSIONS = {
"1.12.0": ["cu113", "cu116"],
"1.12.1": ["cu113", "cu116"],
"1.13.0": ["cu116", "cu117"],
"1.13.1": ["cu116", "cu117"],
"2.0.0": ["cu117", "cu118"],
"2.0.1": ["cu117", "cu118"],
"2.1.0": ["cu118", "cu121"],
"2.1.1": ["cu118", "cu121"],
"2.1.2": ["cu118", "cu121"],
"2.2.0": ["cu118", "cu121"],
"2.2.2": ["cu118", "cu121"],
"2.3.1": ["cu118", "cu121"],
"2.4.0": ["cu118", "cu121"],
"2.4.1": ["cu118", "cu121"],
}
@@ -88,6 +92,7 @@ def workflow_pair(
upload=False,
filter_branch,
):
w = []
py = python_version.replace(".", "")
pyt = pytorch_version.replace(".", "")
@@ -126,6 +131,7 @@ def generate_base_workflow(
btype,
filter_branch=None,
):
d = {
"name": base_workflow_name,
"python_version": python_version,

View File

@@ -1,23 +0,0 @@
name: facebookresearch/pytorch3d/build_and_test
on:
pull_request:
branches:
- main
push:
branches:
- main
jobs:
binary_linux_conda_cuda:
runs-on: 4-core-ubuntu-gpu-t4
env:
PYTHON_VERSION: "3.12"
BUILD_VERSION: "${{ github.run_number }}"
PYTORCH_VERSION: "2.4.1"
CU_VERSION: "cu121"
JUST_TESTRUN: 1
steps:
- uses: actions/checkout@v4
- name: Build and run tests
run: |-
conda create --name env --yes --quiet conda-build
conda run --no-capture-output --name env python3 ./packaging/build_conda.py --use-conda-cuda

View File

@@ -8,10 +8,11 @@
The core library is written in PyTorch. Several components have underlying implementation in CUDA for improved performance. A subset of these components have CPU implementations in C++/PyTorch. It is advised to use PyTorch3D with GPU support in order to use all the features.
- Linux or macOS or Windows
- Python
- PyTorch 2.1.0, 2.1.1, 2.1.2, 2.2.0, 2.2.1, 2.2.2, 2.3.0, 2.3.1, 2.4.0 or 2.4.1.
- Python 3.8, 3.9 or 3.10
- PyTorch 1.12.0, 1.12.1, 1.13.0, 2.0.0, 2.0.1, 2.1.0, 2.1.1, 2.1.2, 2.2.0, 2.2.1, 2.2.2, 2.3.0 or 2.3.1.
- torchvision that matches the PyTorch installation. You can install them together as explained at pytorch.org to make sure of this.
- gcc & g++ ≥ 4.9
- [fvcore](https://github.com/facebookresearch/fvcore)
- [ioPath](https://github.com/facebookresearch/iopath)
- If CUDA is to be used, use a version which is supported by the corresponding pytorch version and at least version 9.2.
- If CUDA older than 11.7 is to be used and you are building from source, the CUB library must be available. We recommend version 1.10.0.
@@ -21,7 +22,7 @@ The runtime dependencies can be installed by running:
conda create -n pytorch3d python=3.9
conda activate pytorch3d
conda install pytorch=1.13.0 torchvision pytorch-cuda=11.6 -c pytorch -c nvidia
conda install -c iopath iopath
conda install -c fvcore -c iopath -c conda-forge fvcore iopath
```
For the CUB build time dependency, which you only need if you have CUDA older than 11.7, if you are using conda, you can continue with
@@ -48,7 +49,6 @@ For developing on top of PyTorch3D or contributing, you will need to run the lin
- tdqm
- jupyter
- imageio
- fvcore
- plotly
- opencv-python
@@ -59,7 +59,6 @@ conda install jupyter
pip install scikit-image matplotlib imageio plotly opencv-python
# Tests/Linting
conda install -c fvcore -c conda-forge fvcore
pip install black usort flake8 flake8-bugbear flake8-comprehensions
```
@@ -98,7 +97,7 @@ version_str="".join([
torch.version.cuda.replace(".",""),
f"_pyt{pyt_version_str}"
])
!pip install iopath
!pip install fvcore iopath
!pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/{version_str}/download.html
```

View File

@@ -10,7 +10,7 @@
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
DIR=$(dirname "${DIR}")
if [[ -f "${DIR}/BUCK" ]]
if [[ -f "${DIR}/TARGETS" ]]
then
pyfmt "${DIR}"
else
@@ -36,5 +36,5 @@ then
echo "Running pyre..."
echo "To restart/kill pyre server, run 'pyre restart' or 'pyre kill' in fbcode/"
( cd ~/fbsource/fbcode; arc pyre check //vision/fair/pytorch3d/... )
( cd ~/fbsource/fbcode; pyre -l vision/fair/pytorch3d/ )
fi

View File

@@ -23,7 +23,7 @@ conda init bash
source ~/.bashrc
conda create -y -n myenv python=3.8 matplotlib ipython ipywidgets nbconvert
conda activate myenv
conda install -y -c iopath iopath
conda install -y -c fvcore -c iopath -c conda-forge fvcore iopath
conda install -y -c pytorch pytorch=1.6.0 cudatoolkit=10.1 torchvision
conda install -y -c pytorch3d-nightly pytorch3d
pip install plotly scikit-image

View File

@@ -10,7 +10,6 @@ This example demonstrates the most trivial, direct interface of the pulsar
sphere renderer. It renders and saves an image with 10 random spheres.
Output: basic.png.
"""
import logging
import math
from os import path

View File

@@ -11,7 +11,6 @@ interface for sphere renderering. It renders and saves an image with
10 random spheres.
Output: basic-pt3d.png.
"""
import logging
from os import path

View File

@@ -14,7 +14,6 @@ distorted. Gradient-based optimization is used to converge towards the
original camera parameters.
Output: cam.gif.
"""
import logging
import math
from os import path

View File

@@ -14,7 +14,6 @@ distorted. Gradient-based optimization is used to converge towards the
original camera parameters.
Output: cam-pt3d.gif
"""
import logging
from os import path

View File

@@ -18,7 +18,6 @@ This example is not available yet through the 'unified' interface,
because opacity support has not landed in PyTorch3D for general data
structures yet.
"""
import logging
import math
from os import path

View File

@@ -13,7 +13,6 @@ The scene is initialized with random spheres. Gradient-based
optimization is used to converge towards a faithful
scene representation.
"""
import logging
import math

View File

@@ -13,7 +13,6 @@ The scene is initialized with random spheres. Gradient-based
optimization is used to converge towards a faithful
scene representation.
"""
import logging
import math

View File

@@ -5,6 +5,7 @@ sphinx_rtd_theme
sphinx_markdown_tables
numpy
iopath
fvcore
https://download.pytorch.org/whl/cpu/torchvision-0.15.2%2Bcpu-cp311-cp311-linux_x86_64.whl
https://download.pytorch.org/whl/cpu/torch-2.0.1%2Bcpu-cp311-cp311-linux_x86_64.whl
omegaconf

View File

@@ -96,7 +96,7 @@
" torch.version.cuda.replace(\".\",\"\"),\n",
" f\"_pyt{pyt_version_str}\"\n",
" ])\n",
" !pip install iopath\n",
" !pip install fvcore iopath\n",
" if sys.platform.startswith(\"linux\"):\n",
" print(\"Trying to install wheel for PyTorch3D\")\n",
" !pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/{version_str}/download.html\n",

View File

@@ -83,7 +83,7 @@
" torch.version.cuda.replace(\".\",\"\"),\n",
" f\"_pyt{pyt_version_str}\"\n",
" ])\n",
" !pip install iopath\n",
" !pip install fvcore iopath\n",
" if sys.platform.startswith(\"linux\"):\n",
" print(\"Trying to install wheel for PyTorch3D\")\n",
" !pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/{version_str}/download.html\n",

View File

@@ -58,7 +58,7 @@
" torch.version.cuda.replace(\".\",\"\"),\n",
" f\"_pyt{pyt_version_str}\"\n",
" ])\n",
" !pip install iopath\n",
" !pip install fvcore iopath\n",
" if sys.platform.startswith(\"linux\"):\n",
" print(\"Trying to install wheel for PyTorch3D\")\n",
" !pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/{version_str}/download.html\n",

View File

@@ -97,7 +97,7 @@
" torch.version.cuda.replace(\".\",\"\"),\n",
" f\"_pyt{pyt_version_str}\"\n",
" ])\n",
" !pip install iopath\n",
" !pip install fvcore iopath\n",
" if sys.platform.startswith(\"linux\"):\n",
" print(\"Trying to install wheel for PyTorch3D\")\n",
" !pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/{version_str}/download.html\n",

View File

@@ -63,7 +63,7 @@
" torch.version.cuda.replace(\".\",\"\"),\n",
" f\"_pyt{pyt_version_str}\"\n",
" ])\n",
" !pip install iopath\n",
" !pip install fvcore iopath\n",
" if sys.platform.startswith(\"linux\"):\n",
" print(\"Trying to install wheel for PyTorch3D\")\n",
" !pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/{version_str}/download.html\n",

View File

@@ -75,7 +75,7 @@
" torch.version.cuda.replace(\".\",\"\"),\n",
" f\"_pyt{pyt_version_str}\"\n",
" ])\n",
" !pip install iopath\n",
" !pip install fvcore iopath\n",
" if sys.platform.startswith(\"linux\"):\n",
" print(\"Trying to install wheel for PyTorch3D\")\n",
" !pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/{version_str}/download.html\n",

View File

@@ -54,7 +54,7 @@
" torch.version.cuda.replace(\".\",\"\"),\n",
" f\"_pyt{pyt_version_str}\"\n",
" ])\n",
" !pip install iopath\n",
" !pip install fvcore iopath\n",
" if sys.platform.startswith(\"linux\"):\n",
" print(\"Trying to install wheel for PyTorch3D\")\n",
" !pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/{version_str}/download.html\n",

View File

@@ -85,7 +85,7 @@
" torch.version.cuda.replace(\".\",\"\"),\n",
" f\"_pyt{pyt_version_str}\"\n",
" ])\n",
" !pip install iopath\n",
" !pip install fvcore iopath\n",
" if sys.platform.startswith(\"linux\"):\n",
" print(\"Trying to install wheel for PyTorch3D\")\n",
" !pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/{version_str}/download.html\n",

View File

@@ -79,7 +79,7 @@
" torch.version.cuda.replace(\".\",\"\"),\n",
" f\"_pyt{pyt_version_str}\"\n",
" ])\n",
" !pip install iopath\n",
" !pip install fvcore iopath\n",
" if sys.platform.startswith(\"linux\"):\n",
" print(\"Trying to install wheel for PyTorch3D\")\n",
" !pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/{version_str}/download.html\n",

View File

@@ -57,7 +57,7 @@
" torch.version.cuda.replace(\".\",\"\"),\n",
" f\"_pyt{pyt_version_str}\"\n",
" ])\n",
" !pip install iopath\n",
" !pip install fvcore iopath\n",
" if sys.platform.startswith(\"linux\"):\n",
" print(\"Trying to install wheel for PyTorch3D\")\n",
" !pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/{version_str}/download.html\n",

View File

@@ -64,7 +64,7 @@
" torch.version.cuda.replace(\".\",\"\"),\n",
" f\"_pyt{pyt_version_str}\"\n",
" ])\n",
" !pip install iopath\n",
" !pip install fvcore iopath\n",
" if sys.platform.startswith(\"linux\"):\n",
" print(\"Trying to install wheel for PyTorch3D\")\n",
" !pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/{version_str}/download.html\n",

View File

@@ -80,7 +80,7 @@
" torch.version.cuda.replace(\".\",\"\"),\n",
" f\"_pyt{pyt_version_str}\"\n",
" ])\n",
" !pip install iopath\n",
" !pip install fvcore iopath\n",
" if sys.platform.startswith(\"linux\"):\n",
" print(\"Trying to install wheel for PyTorch3D\")\n",
" !pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/{version_str}/download.html\n",

View File

@@ -4,11 +4,10 @@
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
import argparse
import os.path
import runpy
import subprocess
from typing import List, Tuple
from typing import List
# required env vars:
# CU_VERSION: E.g. cu112
@@ -24,7 +23,7 @@ pytorch_major_minor = tuple(int(i) for i in PYTORCH_VERSION.split(".")[:2])
source_root_dir = os.environ["PWD"]
def version_constraint(version) -> str:
def version_constraint(version):
"""
Given version "11.3" returns " >=11.3,<11.4"
"""
@@ -33,7 +32,7 @@ def version_constraint(version) -> str:
return f" >={version},<{upper}"
def get_cuda_major_minor() -> Tuple[str, str]:
def get_cuda_major_minor():
if CU_VERSION == "cpu":
raise ValueError("fn only for cuda builds")
if len(CU_VERSION) != 5 or CU_VERSION[:2] != "cu":
@@ -43,10 +42,11 @@ def get_cuda_major_minor() -> Tuple[str, str]:
return major, minor
def setup_cuda(use_conda_cuda: bool) -> List[str]:
def setup_cuda():
if CU_VERSION == "cpu":
return []
return
major, minor = get_cuda_major_minor()
os.environ["CUDA_HOME"] = f"/usr/local/cuda-{major}.{minor}/"
os.environ["FORCE_CUDA"] = "1"
basic_nvcc_flags = (
@@ -75,15 +75,6 @@ def setup_cuda(use_conda_cuda: bool) -> List[str]:
if os.environ.get("JUST_TESTRUN", "0") != "1":
os.environ["NVCC_FLAGS"] = nvcc_flags
if use_conda_cuda:
os.environ["CONDA_CUDA_TOOLKIT_BUILD_CONSTRAINT1"] = "- cuda-toolkit"
os.environ["CONDA_CUDA_TOOLKIT_BUILD_CONSTRAINT2"] = (
f"- cuda-version={major}.{minor}"
)
return ["-c", f"nvidia/label/cuda-{major}.{minor}.0"]
else:
os.environ["CUDA_HOME"] = f"/usr/local/cuda-{major}.{minor}/"
return []
def setup_conda_pytorch_constraint() -> List[str]:
@@ -104,7 +95,7 @@ def setup_conda_pytorch_constraint() -> List[str]:
return ["-c", "pytorch", "-c", "nvidia"]
def setup_conda_cudatoolkit_constraint() -> None:
def setup_conda_cudatoolkit_constraint():
if CU_VERSION == "cpu":
os.environ["CONDA_CPUONLY_FEATURE"] = "- cpuonly"
os.environ["CONDA_CUDATOOLKIT_CONSTRAINT"] = ""
@@ -125,14 +116,14 @@ def setup_conda_cudatoolkit_constraint() -> None:
os.environ["CONDA_CUDATOOLKIT_CONSTRAINT"] = toolkit
def do_build(start_args: List[str]) -> None:
def do_build(start_args: List[str]):
args = start_args.copy()
test_flag = os.environ.get("TEST_FLAG")
if test_flag is not None:
args.append(test_flag)
args.extend(["-c", "bottler", "-c", "iopath", "-c", "conda-forge"])
args.extend(["-c", "bottler", "-c", "fvcore", "-c", "iopath", "-c", "conda-forge"])
args.append("--no-anaconda-upload")
args.extend(["--python", os.environ["PYTHON_VERSION"]])
args.append("packaging/pytorch3d")
@@ -141,16 +132,8 @@ def do_build(start_args: List[str]) -> None:
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Build the conda package.")
parser.add_argument(
"--use-conda-cuda",
action="store_true",
help="get cuda from conda ignoring local cuda",
)
our_args = parser.parse_args()
args = ["conda", "build"]
args += setup_cuda(use_conda_cuda=our_args.use_conda_cuda)
setup_cuda()
init_path = source_root_dir + "/pytorch3d/__init__.py"
build_version = runpy.run_path(init_path)["__version__"]

View File

@@ -26,6 +26,6 @@ version_str="".join([
torch.version.cuda.replace(".",""),
f"_pyt{pyt_version_str}"
])
!pip install iopath
!pip install fvcore iopath
!pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/{version_str}/download.html
```

View File

@@ -144,7 +144,7 @@ do
conda activate "$tag"
# shellcheck disable=SC2086
conda install -y -c pytorch $extra_channel "pytorch=$pytorch_version" "$cudatools=$CUDA_TAG"
pip install iopath
pip install fvcore iopath
echo "python version" "$python_version" "pytorch version" "$pytorch_version" "cuda version" "$cu_version" "tag" "$tag"
rm -rf dist

View File

@@ -8,13 +8,10 @@ source:
requirements:
build:
- {{ compiler('c') }} # [win]
{{ environ.get('CONDA_CUDA_TOOLKIT_BUILD_CONSTRAINT1', '') }}
{{ environ.get('CONDA_CUDA_TOOLKIT_BUILD_CONSTRAINT2', '') }}
{{ environ.get('CONDA_CUB_CONSTRAINT') }}
host:
- python
- mkl =2023 # [x86_64]
{{ environ.get('SETUPTOOLS_CONSTRAINT') }}
{{ environ.get('CONDA_PYTORCH_BUILD_CONSTRAINT') }}
{{ environ.get('CONDA_PYTORCH_MKL_CONSTRAINT') }}
@@ -25,7 +22,7 @@ requirements:
- python
- numpy >=1.11
- torchvision >=0.5
- mkl =2023 # [x86_64]
- fvcore
- iopath
{{ environ.get('CONDA_PYTORCH_CONSTRAINT') }}
{{ environ.get('CONDA_CUDATOOLKIT_CONSTRAINT') }}
@@ -51,11 +48,8 @@ test:
- imageio
- hydra-core
- accelerate
- matplotlib
- tabulate
- pandas
- sqlalchemy
commands:
#pytest .
python -m unittest discover -v -s tests -t .

View File

@@ -7,7 +7,7 @@
# pyre-unsafe
""" "
""""
This file is the entry point for launching experiments with Implicitron.
Launch Training
@@ -44,7 +44,6 @@ The outputs of the experiment are saved and logged in multiple ways:
config file.
"""
import logging
import os
import warnings
@@ -100,7 +99,7 @@ except ModuleNotFoundError:
no_accelerate = os.environ.get("PYTORCH3D_NO_ACCELERATE") is not None
class Experiment(Configurable):
class Experiment(Configurable): # pyre-ignore: 13
"""
This class is at the top level of Implicitron's config hierarchy. Its
members are high-level components necessary for training an implicit rende-
@@ -121,16 +120,12 @@ class Experiment(Configurable):
will be saved here.
"""
# pyre-fixme[13]: Attribute `data_source` is never initialized.
data_source: DataSourceBase
data_source_class_type: str = "ImplicitronDataSource"
# pyre-fixme[13]: Attribute `model_factory` is never initialized.
model_factory: ModelFactoryBase
model_factory_class_type: str = "ImplicitronModelFactory"
# pyre-fixme[13]: Attribute `optimizer_factory` is never initialized.
optimizer_factory: OptimizerFactoryBase
optimizer_factory_class_type: str = "ImplicitronOptimizerFactory"
# pyre-fixme[13]: Attribute `training_loop` is never initialized.
training_loop: TrainingLoopBase
training_loop_class_type: str = "ImplicitronTrainingLoop"

View File

@@ -26,6 +26,7 @@ logger = logging.getLogger(__name__)
class ModelFactoryBase(ReplaceableBase):
resume: bool = True # resume from the last checkpoint
def __call__(self, **kwargs) -> ImplicitronModelBase:
@@ -44,7 +45,7 @@ class ModelFactoryBase(ReplaceableBase):
@registry.register
class ImplicitronModelFactory(ModelFactoryBase):
class ImplicitronModelFactory(ModelFactoryBase): # pyre-ignore [13]
"""
A factory class that initializes an implicit rendering model.
@@ -60,7 +61,6 @@ class ImplicitronModelFactory(ModelFactoryBase):
"""
# pyre-fixme[13]: Attribute `model` is never initialized.
model: ImplicitronModelBase
model_class_type: str = "GenericModel"
resume: bool = True
@@ -115,9 +115,7 @@ class ImplicitronModelFactory(ModelFactoryBase):
"cuda:%d" % 0: "cuda:%d" % accelerator.local_process_index
}
model_state_dict = torch.load(
model_io.get_model_path(model_path),
map_location=map_location,
weights_only=True,
model_io.get_model_path(model_path), map_location=map_location
)
try:

View File

@@ -123,7 +123,6 @@ class ImplicitronOptimizerFactory(OptimizerFactoryBase):
"""
# Get the parameters to optimize
if hasattr(model, "_get_param_groups"): # use the model function
# pyre-fixme[29]: `Union[Tensor, Module]` is not a function.
p_groups = model._get_param_groups(self.lr, wd=self.weight_decay)
else:
p_groups = [
@@ -242,7 +241,7 @@ class ImplicitronOptimizerFactory(OptimizerFactoryBase):
map_location = {
"cuda:%d" % 0: "cuda:%d" % accelerator.local_process_index
}
optimizer_state = torch.load(opt_path, map_location, weights_only=True)
optimizer_state = torch.load(opt_path, map_location)
else:
raise FileNotFoundError(f"Optimizer state {opt_path} does not exist.")
return optimizer_state

View File

@@ -30,13 +30,13 @@ from .utils import seed_all_random_engines
logger = logging.getLogger(__name__)
# pyre-fixme[13]: Attribute `evaluator` is never initialized.
class TrainingLoopBase(ReplaceableBase):
"""
Members:
evaluator: An EvaluatorBase instance, used to evaluate training results.
"""
# pyre-fixme[13]: Attribute `evaluator` is never initialized.
evaluator: Optional[EvaluatorBase]
evaluator_class_type: Optional[str] = "ImplicitronEvaluator"
@@ -161,6 +161,7 @@ class ImplicitronTrainingLoop(TrainingLoopBase):
for epoch in range(start_epoch, self.max_epochs):
# automatic new_epoch and plotting of stats at every epoch start
with stats:
# Make sure to re-seed random generators to ensure reproducibility
# even after restart.
seed_all_random_engines(seed + epoch)
@@ -394,7 +395,6 @@ class ImplicitronTrainingLoop(TrainingLoopBase):
):
prefix = f"e{stats.epoch}_it{stats.it[trainmode]}"
if hasattr(model, "visualize"):
# pyre-fixme[29]: `Union[Tensor, Module]` is not a function.
model.visualize(
viz,
visdom_env_imgs,

View File

@@ -53,8 +53,12 @@ class TestExperiment(unittest.TestCase):
cfg.data_source_ImplicitronDataSource_args.dataset_map_provider_class_type = (
"JsonIndexDatasetMapProvider"
)
dataset_args = cfg.data_source_ImplicitronDataSource_args.dataset_map_provider_JsonIndexDatasetMapProvider_args
dataloader_args = cfg.data_source_ImplicitronDataSource_args.data_loader_map_provider_SequenceDataLoaderMapProvider_args
dataset_args = (
cfg.data_source_ImplicitronDataSource_args.dataset_map_provider_JsonIndexDatasetMapProvider_args
)
dataloader_args = (
cfg.data_source_ImplicitronDataSource_args.data_loader_map_provider_SequenceDataLoaderMapProvider_args
)
dataset_args.category = "skateboard"
dataset_args.test_restrict_sequence_id = 0
dataset_args.dataset_root = "manifold://co3d/tree/extracted"
@@ -90,8 +94,12 @@ class TestExperiment(unittest.TestCase):
cfg.data_source_ImplicitronDataSource_args.dataset_map_provider_class_type = (
"JsonIndexDatasetMapProvider"
)
dataset_args = cfg.data_source_ImplicitronDataSource_args.dataset_map_provider_JsonIndexDatasetMapProvider_args
dataloader_args = cfg.data_source_ImplicitronDataSource_args.data_loader_map_provider_SequenceDataLoaderMapProvider_args
dataset_args = (
cfg.data_source_ImplicitronDataSource_args.dataset_map_provider_JsonIndexDatasetMapProvider_args
)
dataloader_args = (
cfg.data_source_ImplicitronDataSource_args.data_loader_map_provider_SequenceDataLoaderMapProvider_args
)
dataset_args.category = "skateboard"
dataset_args.test_restrict_sequence_id = 0
dataset_args.dataset_root = "manifold://co3d/tree/extracted"
@@ -103,7 +111,9 @@ class TestExperiment(unittest.TestCase):
cfg.training_loop_ImplicitronTrainingLoop_args.max_epochs = 2
cfg.training_loop_ImplicitronTrainingLoop_args.store_checkpoints = False
cfg.optimizer_factory_ImplicitronOptimizerFactory_args.lr_policy = "Exponential"
cfg.optimizer_factory_ImplicitronOptimizerFactory_args.exponential_lr_step_size = 2
cfg.optimizer_factory_ImplicitronOptimizerFactory_args.exponential_lr_step_size = (
2
)
if DEBUG:
experiment.dump_cfg(cfg)

View File

@@ -81,9 +81,8 @@ class TestOptimizerFactory(unittest.TestCase):
def test_param_overrides_self_param_group_assignment(self):
pa, pb, pc = [torch.nn.Parameter(data=torch.tensor(i * 1.0)) for i in range(3)]
na, nb = (
Node(params=[pa]),
Node(params=[pb], param_groups={"self": "pb_self", "p1": "pb_param"}),
na, nb = Node(params=[pa]), Node(
params=[pb], param_groups={"self": "pb_self", "p1": "pb_param"}
)
root = Node(children=[na, nb], params=[pc], param_groups={"m1": "pb_member"})
param_groups = self._get_param_groups(root)

View File

@@ -84,9 +84,9 @@ def get_nerf_datasets(
if autodownload and any(not os.path.isfile(p) for p in (cameras_path, image_path)):
# Automatically download the data files if missing.
download_data([dataset_name], data_root=data_root)
download_data((dataset_name,), data_root=data_root)
train_data = torch.load(cameras_path, weights_only=True)
train_data = torch.load(cameras_path)
n_cameras = train_data["cameras"]["R"].shape[0]
_image_max_image_pixels = Image.MAX_IMAGE_PIXELS

View File

@@ -194,6 +194,7 @@ class Stats:
it = self.it[stat_set]
for stat in self.log_vars:
if stat not in self.stats[stat_set]:
self.stats[stat_set][stat] = AverageMeter()

View File

@@ -24,6 +24,7 @@ CONFIG_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "configs"
@hydra.main(config_path=CONFIG_DIR, config_name="lego")
def main(cfg: DictConfig):
# Device on which to run.
if torch.cuda.is_available():
device = "cuda"
@@ -62,7 +63,7 @@ def main(cfg: DictConfig):
raise ValueError(f"Model checkpoint {checkpoint_path} does not exist!")
print(f"Loading checkpoint {checkpoint_path}.")
loaded_data = torch.load(checkpoint_path, weights_only=True)
loaded_data = torch.load(checkpoint_path)
# Do not load the cached xy grid.
# - this allows setting an arbitrary evaluation image size.
state_dict = {

View File

@@ -42,6 +42,7 @@ class TestRaysampler(unittest.TestCase):
cameras, rays = [], []
for _ in range(batch_size):
R = random_rotations(1)
T = torch.randn(1, 3)
focal_length = torch.rand(1, 2) + 0.5

View File

@@ -25,6 +25,7 @@ CONFIG_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "configs"
@hydra.main(config_path=CONFIG_DIR, config_name="lego")
def main(cfg: DictConfig):
# Set the relevant seeds for reproducibility.
np.random.seed(cfg.seed)
torch.manual_seed(cfg.seed)
@@ -76,7 +77,7 @@ def main(cfg: DictConfig):
# Resume training if requested.
if cfg.resume and os.path.isfile(checkpoint_path):
print(f"Resuming from checkpoint {checkpoint_path}.")
loaded_data = torch.load(checkpoint_path, weights_only=True)
loaded_data = torch.load(checkpoint_path)
model.load_state_dict(loaded_data["model"])
stats = pickle.loads(loaded_data["stats"])
print(f" => resuming from epoch {stats.epoch}.")
@@ -218,6 +219,7 @@ def main(cfg: DictConfig):
# Validation
if epoch % cfg.validation_epoch_interval == 0 and epoch > 0:
# Sample a validation camera/image.
val_batch = next(val_dataloader.__iter__())
val_image, val_camera, camera_idx = val_batch[0].values()

View File

@@ -6,4 +6,4 @@
# pyre-unsafe
__version__ = "0.7.9"
__version__ = "0.7.6"

View File

@@ -17,7 +17,7 @@ Some functions which depend on PyTorch or Python versions.
def meshgrid_ij(
*A: Union[torch.Tensor, Sequence[torch.Tensor]],
*A: Union[torch.Tensor, Sequence[torch.Tensor]]
) -> Tuple[torch.Tensor, ...]: # pragma: no cover
"""
Like torch.meshgrid was before PyTorch 1.10.0, i.e. with indexing set to ij

View File

@@ -32,9 +32,7 @@ __global__ void BallQueryKernel(
at::PackedTensorAccessor64<int64_t, 3, at::RestrictPtrTraits> idxs,
at::PackedTensorAccessor64<scalar_t, 3, at::RestrictPtrTraits> dists,
const int64_t K,
const float radius,
const float radius2,
const bool skip_points_outside_cube) {
const float radius2) {
const int64_t N = p1.size(0);
const int64_t chunks_per_cloud = (1 + (p1.size(1) - 1) / blockDim.x);
const int64_t chunks_to_do = N * chunks_per_cloud;
@@ -53,19 +51,7 @@ __global__ void BallQueryKernel(
// Iterate over points in p2 until desired count is reached or
// all points have been considered
for (int64_t j = 0, count = 0; j < lengths2[n] && count < K; ++j) {
if (skip_points_outside_cube) {
bool is_within_radius = true;
// Filter when any one coordinate is already outside the radius
for (int d = 0; is_within_radius && d < D; ++d) {
scalar_t abs_diff = fabs(p1[n][i][d] - p2[n][j][d]);
is_within_radius = (abs_diff <= radius);
}
if (!is_within_radius) {
continue;
}
}
// Else, calculate the distance between the points and compare
// Calculate the distance between the points
scalar_t dist2 = 0.0;
for (int d = 0; d < D; ++d) {
scalar_t diff = p1[n][i][d] - p2[n][j][d];
@@ -91,8 +77,7 @@ std::tuple<at::Tensor, at::Tensor> BallQueryCuda(
const at::Tensor& lengths1, // (N,)
const at::Tensor& lengths2, // (N,)
int K,
float radius,
bool skip_points_outside_cube) {
float radius) {
// Check inputs are on the same device
at::TensorArg p1_t{p1, "p1", 1}, p2_t{p2, "p2", 2},
lengths1_t{lengths1, "lengths1", 3}, lengths2_t{lengths2, "lengths2", 4};
@@ -135,9 +120,7 @@ std::tuple<at::Tensor, at::Tensor> BallQueryCuda(
idxs.packed_accessor64<int64_t, 3, at::RestrictPtrTraits>(),
dists.packed_accessor64<float, 3, at::RestrictPtrTraits>(),
K_64,
radius,
radius2,
skip_points_outside_cube);
radius2);
}));
AT_CUDA_CHECK(cudaGetLastError());

View File

@@ -25,9 +25,6 @@
// within the radius
// radius: the radius around each point within which the neighbors need to be
// located
// skip_points_outside_cube: If true, reduce multiplications of float values
// by not explicitly calculating distances to points that fall outside the
// D-cube with side length (2*radius) centered at each point in p1.
//
// Returns:
// p1_neighbor_idx: LongTensor of shape (N, P1, K), where
@@ -49,8 +46,7 @@ std::tuple<at::Tensor, at::Tensor> BallQueryCpu(
const at::Tensor& lengths1,
const at::Tensor& lengths2,
const int K,
const float radius,
const bool skip_points_outside_cube);
const float radius);
// CUDA implementation
std::tuple<at::Tensor, at::Tensor> BallQueryCuda(
@@ -59,8 +55,7 @@ std::tuple<at::Tensor, at::Tensor> BallQueryCuda(
const at::Tensor& lengths1,
const at::Tensor& lengths2,
const int K,
const float radius,
const bool skip_points_outside_cube);
const float radius);
// Implementation which is exposed
// Note: the backward pass reuses the KNearestNeighborBackward kernel
@@ -70,8 +65,7 @@ inline std::tuple<at::Tensor, at::Tensor> BallQuery(
const at::Tensor& lengths1,
const at::Tensor& lengths2,
int K,
float radius,
bool skip_points_outside_cube) {
float radius) {
if (p1.is_cuda() || p2.is_cuda()) {
#ifdef WITH_CUDA
CHECK_CUDA(p1);
@@ -82,20 +76,16 @@ inline std::tuple<at::Tensor, at::Tensor> BallQuery(
lengths1.contiguous(),
lengths2.contiguous(),
K,
radius,
skip_points_outside_cube);
radius);
#else
AT_ERROR("Not compiled with GPU support.");
#endif
}
CHECK_CPU(p1);
CHECK_CPU(p2);
return BallQueryCpu(
p1.contiguous(),
p2.contiguous(),
lengths1.contiguous(),
lengths2.contiguous(),
K,
radius,
skip_points_outside_cube);
radius);
}

View File

@@ -6,8 +6,8 @@
* LICENSE file in the root directory of this source tree.
*/
#include <math.h>
#include <torch/extension.h>
#include <queue>
#include <tuple>
std::tuple<at::Tensor, at::Tensor> BallQueryCpu(
@@ -16,8 +16,7 @@ std::tuple<at::Tensor, at::Tensor> BallQueryCpu(
const at::Tensor& lengths1,
const at::Tensor& lengths2,
int K,
float radius,
bool skip_points_outside_cube) {
float radius) {
const int N = p1.size(0);
const int P1 = p1.size(1);
const int D = p1.size(2);
@@ -39,16 +38,6 @@ std::tuple<at::Tensor, at::Tensor> BallQueryCpu(
const int64_t length2 = lengths2_a[n];
for (int64_t i = 0; i < length1; ++i) {
for (int64_t j = 0, count = 0; j < length2 && count < K; ++j) {
if (skip_points_outside_cube) {
bool is_within_radius = true;
for (int d = 0; is_within_radius && d < D; ++d) {
float abs_diff = fabs(p1_a[n][i][d] - p2_a[n][j][d]);
is_within_radius = (abs_diff <= radius);
}
if (!is_within_radius) {
continue;
}
}
float dist2 = 0;
for (int d = 0; d < D; ++d) {
float diff = p1_a[n][i][d] - p2_a[n][j][d];

View File

@@ -98,11 +98,6 @@ at::Tensor SigmoidAlphaBlendBackward(
AT_ERROR("Not compiled with GPU support.");
#endif
}
CHECK_CPU(distances);
CHECK_CPU(pix_to_face);
CHECK_CPU(alphas);
CHECK_CPU(grad_alphas);
return SigmoidAlphaBlendBackwardCpu(
grad_alphas, alphas, distances, pix_to_face, sigma);
}

View File

@@ -28,16 +28,17 @@ __global__ void alphaCompositeCudaForwardKernel(
const at::PackedTensorAccessor64<float, 4, at::RestrictPtrTraits> alphas,
const at::PackedTensorAccessor64<int64_t, 4, at::RestrictPtrTraits> points_idx) {
// clang-format on
const int64_t batch_size = result.size(0);
const int64_t C = features.size(0);
const int64_t H = points_idx.size(2);
const int64_t W = points_idx.size(3);
// Get the batch and index
const auto batch = blockIdx.x;
const int batch = blockIdx.x;
const int num_pixels = C * H * W;
const auto num_threads = gridDim.y * blockDim.x;
const auto tid = blockIdx.y * blockDim.x + threadIdx.x;
const int num_threads = gridDim.y * blockDim.x;
const int tid = blockIdx.y * blockDim.x + threadIdx.x;
// Iterate over each feature in each pixel
for (int pid = tid; pid < num_pixels; pid += num_threads) {
@@ -78,16 +79,17 @@ __global__ void alphaCompositeCudaBackwardKernel(
const at::PackedTensorAccessor64<float, 4, at::RestrictPtrTraits> alphas,
const at::PackedTensorAccessor64<int64_t, 4, at::RestrictPtrTraits> points_idx) {
// clang-format on
const int64_t batch_size = points_idx.size(0);
const int64_t C = features.size(0);
const int64_t H = points_idx.size(2);
const int64_t W = points_idx.size(3);
// Get the batch and index
const auto batch = blockIdx.x;
const int batch = blockIdx.x;
const int num_pixels = C * H * W;
const auto num_threads = gridDim.y * blockDim.x;
const auto tid = blockIdx.y * blockDim.x + threadIdx.x;
const int num_threads = gridDim.y * blockDim.x;
const int tid = blockIdx.y * blockDim.x + threadIdx.x;
// Parallelize over each feature in each pixel in images of size H * W,
// for each image in the batch of size batch_size

View File

@@ -74,9 +74,6 @@ torch::Tensor alphaCompositeForward(
AT_ERROR("Not compiled with GPU support");
#endif
} else {
CHECK_CPU(features);
CHECK_CPU(alphas);
CHECK_CPU(points_idx);
return alphaCompositeCpuForward(features, alphas, points_idx);
}
}
@@ -104,11 +101,6 @@ std::tuple<torch::Tensor, torch::Tensor> alphaCompositeBackward(
AT_ERROR("Not compiled with GPU support");
#endif
} else {
CHECK_CPU(grad_outputs);
CHECK_CPU(features);
CHECK_CPU(alphas);
CHECK_CPU(points_idx);
return alphaCompositeCpuBackward(
grad_outputs, features, alphas, points_idx);
}

View File

@@ -28,16 +28,17 @@ __global__ void weightedSumNormCudaForwardKernel(
const at::PackedTensorAccessor64<float, 4, at::RestrictPtrTraits> alphas,
const at::PackedTensorAccessor64<int64_t, 4, at::RestrictPtrTraits> points_idx) {
// clang-format on
const int64_t batch_size = result.size(0);
const int64_t C = features.size(0);
const int64_t H = points_idx.size(2);
const int64_t W = points_idx.size(3);
// Get the batch and index
const auto batch = blockIdx.x;
const int batch = blockIdx.x;
const int num_pixels = C * H * W;
const auto num_threads = gridDim.y * blockDim.x;
const auto tid = blockIdx.y * blockDim.x + threadIdx.x;
const int num_threads = gridDim.y * blockDim.x;
const int tid = blockIdx.y * blockDim.x + threadIdx.x;
// Parallelize over each feature in each pixel in images of size H * W,
// for each image in the batch of size batch_size
@@ -91,16 +92,17 @@ __global__ void weightedSumNormCudaBackwardKernel(
const at::PackedTensorAccessor64<float, 4, at::RestrictPtrTraits> alphas,
const at::PackedTensorAccessor64<int64_t, 4, at::RestrictPtrTraits> points_idx) {
// clang-format on
const int64_t batch_size = points_idx.size(0);
const int64_t C = features.size(0);
const int64_t H = points_idx.size(2);
const int64_t W = points_idx.size(3);
// Get the batch and index
const auto batch = blockIdx.x;
const int batch = blockIdx.x;
const int num_pixels = C * W * H;
const auto num_threads = gridDim.y * blockDim.x;
const auto tid = blockIdx.y * blockDim.x + threadIdx.x;
const int num_threads = gridDim.y * blockDim.x;
const int tid = blockIdx.y * blockDim.x + threadIdx.x;
// Parallelize over each feature in each pixel in images of size H * W,
// for each image in the batch of size batch_size

View File

@@ -73,10 +73,6 @@ torch::Tensor weightedSumNormForward(
AT_ERROR("Not compiled with GPU support");
#endif
} else {
CHECK_CPU(features);
CHECK_CPU(alphas);
CHECK_CPU(points_idx);
return weightedSumNormCpuForward(features, alphas, points_idx);
}
}
@@ -104,11 +100,6 @@ std::tuple<torch::Tensor, torch::Tensor> weightedSumNormBackward(
AT_ERROR("Not compiled with GPU support");
#endif
} else {
CHECK_CPU(grad_outputs);
CHECK_CPU(features);
CHECK_CPU(alphas);
CHECK_CPU(points_idx);
return weightedSumNormCpuBackward(
grad_outputs, features, alphas, points_idx);
}

View File

@@ -26,16 +26,17 @@ __global__ void weightedSumCudaForwardKernel(
const at::PackedTensorAccessor64<float, 4, at::RestrictPtrTraits> alphas,
const at::PackedTensorAccessor64<int64_t, 4, at::RestrictPtrTraits> points_idx) {
// clang-format on
const int64_t batch_size = result.size(0);
const int64_t C = features.size(0);
const int64_t H = points_idx.size(2);
const int64_t W = points_idx.size(3);
// Get the batch and index
const auto batch = blockIdx.x;
const int batch = blockIdx.x;
const int num_pixels = C * H * W;
const auto num_threads = gridDim.y * blockDim.x;
const auto tid = blockIdx.y * blockDim.x + threadIdx.x;
const int num_threads = gridDim.y * blockDim.x;
const int tid = blockIdx.y * blockDim.x + threadIdx.x;
// Parallelize over each feature in each pixel in images of size H * W,
// for each image in the batch of size batch_size
@@ -73,16 +74,17 @@ __global__ void weightedSumCudaBackwardKernel(
const at::PackedTensorAccessor64<float, 4, at::RestrictPtrTraits> alphas,
const at::PackedTensorAccessor64<int64_t, 4, at::RestrictPtrTraits> points_idx) {
// clang-format on
const int64_t batch_size = points_idx.size(0);
const int64_t C = features.size(0);
const int64_t H = points_idx.size(2);
const int64_t W = points_idx.size(3);
// Get the batch and index
const auto batch = blockIdx.x;
const int batch = blockIdx.x;
const int num_pixels = C * H * W;
const auto num_threads = gridDim.y * blockDim.x;
const auto tid = blockIdx.y * blockDim.x + threadIdx.x;
const int num_threads = gridDim.y * blockDim.x;
const int tid = blockIdx.y * blockDim.x + threadIdx.x;
// Iterate over each pixel to compute the contribution to the
// gradient for the features and weights

View File

@@ -72,9 +72,6 @@ torch::Tensor weightedSumForward(
AT_ERROR("Not compiled with GPU support");
#endif
} else {
CHECK_CPU(features);
CHECK_CPU(alphas);
CHECK_CPU(points_idx);
return weightedSumCpuForward(features, alphas, points_idx);
}
}
@@ -101,11 +98,6 @@ std::tuple<torch::Tensor, torch::Tensor> weightedSumBackward(
AT_ERROR("Not compiled with GPU support");
#endif
} else {
CHECK_CPU(grad_outputs);
CHECK_CPU(features);
CHECK_CPU(alphas);
CHECK_CPU(points_idx);
return weightedSumCpuBackward(grad_outputs, features, alphas, points_idx);
}
}

View File

@@ -8,6 +8,7 @@
// clang-format off
#include "./pulsar/global.h" // Include before <torch/extension.h>.
#include <torch/extension.h>
// clang-format on
#include "./pulsar/pytorch/renderer.h"
#include "./pulsar/pytorch/tensor_util.h"
@@ -98,23 +99,21 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("marching_cubes", &MarchingCubes);
// Pulsar.
// Pulsar not enabled on AMD.
#ifdef PULSAR_LOGGING_ENABLED
c10::ShowLogInfoToStderr();
#endif
py::class_<
pulsar::pytorch::Renderer,
std::shared_ptr<pulsar::pytorch::Renderer>>(m, "PulsarRenderer")
.def(
py::init<
const uint&,
const uint&,
const uint&,
const bool&,
const bool&,
const float&,
const uint&,
const uint&>())
.def(py::init<
const uint&,
const uint&,
const uint&,
const bool&,
const bool&,
const float&,
const uint&,
const uint&>())
.def(
"__eq__",
[](const pulsar::pytorch::Renderer& a,
@@ -149,10 +148,10 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
py::arg("gamma"),
py::arg("max_depth"),
py::arg("min_depth") /* = 0.f*/,
py::arg("bg_col") /* = std::nullopt not exposed properly in
pytorch 1.1. */
py::arg(
"bg_col") /* = at::nullopt not exposed properly in pytorch 1.1. */
,
py::arg("opacity") /* = std::nullopt ... */,
py::arg("opacity") /* = at::nullopt ... */,
py::arg("percent_allowed_difference") = 0.01f,
py::arg("max_n_hits") = MAX_UINT,
py::arg("mode") = 0)

View File

@@ -60,8 +60,6 @@ std::tuple<at::Tensor, at::Tensor> FaceAreasNormalsForward(
AT_ERROR("Not compiled with GPU support.");
#endif
}
CHECK_CPU(verts);
CHECK_CPU(faces);
return FaceAreasNormalsForwardCpu(verts, faces);
}
@@ -82,9 +80,5 @@ at::Tensor FaceAreasNormalsBackward(
AT_ERROR("Not compiled with GPU support.");
#endif
}
CHECK_CPU(grad_areas);
CHECK_CPU(grad_normals);
CHECK_CPU(verts);
CHECK_CPU(faces);
return FaceAreasNormalsBackwardCpu(grad_areas, grad_normals, verts, faces);
}

View File

@@ -20,14 +20,14 @@ __global__ void GatherScatterCudaKernel(
const size_t V,
const size_t D,
const size_t E) {
const auto tid = threadIdx.x;
const int tid = threadIdx.x;
// Reverse the vertex order if backward.
const int v0_idx = backward ? 1 : 0;
const int v1_idx = backward ? 0 : 1;
// Edges are split evenly across the blocks.
for (auto e = blockIdx.x; e < E; e += gridDim.x) {
for (int e = blockIdx.x; e < E; e += gridDim.x) {
// Get indices of vertices which form the edge.
const int64_t v0 = edges[2 * e + v0_idx];
const int64_t v1 = edges[2 * e + v1_idx];
@@ -35,7 +35,7 @@ __global__ void GatherScatterCudaKernel(
// Split vertex features evenly across threads.
// This implementation will be quite wasteful when D<128 since there will be
// a lot of threads doing nothing.
for (auto d = tid; d < D; d += blockDim.x) {
for (int d = tid; d < D; d += blockDim.x) {
const float val = input[v1 * D + d];
float* address = output + v0 * D + d;
atomicAdd(address, val);

View File

@@ -53,7 +53,5 @@ at::Tensor GatherScatter(
AT_ERROR("Not compiled with GPU support.");
#endif
}
CHECK_CPU(input);
CHECK_CPU(edges);
return GatherScatterCpu(input, edges, directed, backward);
}

View File

@@ -20,8 +20,8 @@ __global__ void InterpFaceAttrsForwardKernel(
const size_t P,
const size_t F,
const size_t D) {
const auto tid = threadIdx.x + blockIdx.x * blockDim.x;
const auto num_threads = blockDim.x * gridDim.x;
const int tid = threadIdx.x + blockIdx.x * blockDim.x;
const int num_threads = blockDim.x * gridDim.x;
for (int pd = tid; pd < P * D; pd += num_threads) {
const int p = pd / D;
const int d = pd % D;
@@ -93,8 +93,8 @@ __global__ void InterpFaceAttrsBackwardKernel(
const size_t P,
const size_t F,
const size_t D) {
const auto tid = threadIdx.x + blockIdx.x * blockDim.x;
const auto num_threads = blockDim.x * gridDim.x;
const int tid = threadIdx.x + blockIdx.x * blockDim.x;
const int num_threads = blockDim.x * gridDim.x;
for (int pd = tid; pd < P * D; pd += num_threads) {
const int p = pd / D;
const int d = pd % D;

View File

@@ -57,8 +57,6 @@ at::Tensor InterpFaceAttrsForward(
AT_ERROR("Not compiled with GPU support.");
#endif
}
CHECK_CPU(face_attrs);
CHECK_CPU(barycentric_coords);
return InterpFaceAttrsForwardCpu(pix_to_face, barycentric_coords, face_attrs);
}
@@ -108,9 +106,6 @@ std::tuple<at::Tensor, at::Tensor> InterpFaceAttrsBackward(
AT_ERROR("Not compiled with GPU support.");
#endif
}
CHECK_CPU(face_attrs);
CHECK_CPU(barycentric_coords);
CHECK_CPU(grad_pix_attrs);
return InterpFaceAttrsBackwardCpu(
pix_to_face, barycentric_coords, face_attrs, grad_pix_attrs);
}

View File

@@ -44,7 +44,5 @@ inline std::tuple<at::Tensor, at::Tensor> IoUBox3D(
AT_ERROR("Not compiled with GPU support.");
#endif
}
CHECK_CPU(boxes1);
CHECK_CPU(boxes2);
return IoUBox3DCpu(boxes1.contiguous(), boxes2.contiguous());
}

View File

@@ -7,7 +7,10 @@
*/
#include <torch/extension.h>
#include <torch/torch.h>
#include <list>
#include <numeric>
#include <queue>
#include <tuple>
#include "iou_box3d/iou_utils.h"

View File

@@ -461,8 +461,10 @@ __device__ inline std::tuple<float3, float3> ArgMaxVerts(
__device__ inline bool IsCoplanarTriTri(
const FaceVerts& tri1,
const FaceVerts& tri2) {
const float3 tri1_ctr = FaceCenter({tri1.v0, tri1.v1, tri1.v2});
const float3 tri1_n = FaceNormal({tri1.v0, tri1.v1, tri1.v2});
const float3 tri2_ctr = FaceCenter({tri2.v0, tri2.v1, tri2.v2});
const float3 tri2_n = FaceNormal({tri2.v0, tri2.v1, tri2.v2});
// Check if parallel
@@ -498,6 +500,7 @@ __device__ inline bool IsCoplanarTriPlane(
const FaceVerts& tri,
const FaceVerts& plane,
const float3& normal) {
const float3 tri_ctr = FaceCenter({tri.v0, tri.v1, tri.v2});
const float3 nt = FaceNormal({tri.v0, tri.v1, tri.v2});
// check if parallel
@@ -725,7 +728,7 @@ __device__ inline int BoxIntersections(
}
}
// Update the face_verts_out tris
num_tris = min(MAX_TRIS, offset);
num_tris = offset;
for (int j = 0; j < num_tris; ++j) {
face_verts_out[j] = tri_verts_updated[j];
}

View File

@@ -74,8 +74,6 @@ std::tuple<at::Tensor, at::Tensor> KNearestNeighborIdx(
AT_ERROR("Not compiled with GPU support.");
#endif
}
CHECK_CPU(p1);
CHECK_CPU(p2);
return KNearestNeighborIdxCpu(p1, p2, lengths1, lengths2, norm, K);
}
@@ -142,8 +140,6 @@ std::tuple<at::Tensor, at::Tensor> KNearestNeighborBackward(
AT_ERROR("Not compiled with GPU support.");
#endif
}
CHECK_CPU(p1);
CHECK_CPU(p2);
return KNearestNeighborBackwardCpu(
p1, p2, lengths1, lengths2, idxs, norm, grad_dists);
}

View File

@@ -58,6 +58,5 @@ inline std::tuple<at::Tensor, at::Tensor, at::Tensor> MarchingCubes(
AT_ERROR("Not compiled with GPU support.");
#endif
}
CHECK_CPU(vol);
return MarchingCubesCpu(vol.contiguous(), isolevel);
}

View File

@@ -88,8 +88,6 @@ at::Tensor PackedToPadded(
AT_ERROR("Not compiled with GPU support.");
#endif
}
CHECK_CPU(inputs_packed);
CHECK_CPU(first_idxs);
return PackedToPaddedCpu(inputs_packed, first_idxs, max_size);
}
@@ -107,7 +105,5 @@ at::Tensor PaddedToPacked(
AT_ERROR("Not compiled with GPU support.");
#endif
}
CHECK_CPU(inputs_padded);
CHECK_CPU(first_idxs);
return PaddedToPackedCpu(inputs_padded, first_idxs, num_inputs);
}

View File

@@ -174,8 +174,8 @@ std::tuple<at::Tensor, at::Tensor> HullHullDistanceForwardCpu(
at::Tensor idxs = at::zeros({A_N,}, as_first_idx.options());
// clang-format on
auto as_a = as.accessor<float, H1 == 1 ? 2 : 3>();
auto bs_a = bs.accessor<float, H2 == 1 ? 2 : 3>();
auto as_a = as.accessor < float, H1 == 1 ? 2 : 3 > ();
auto bs_a = bs.accessor < float, H2 == 1 ? 2 : 3 > ();
auto as_first_idx_a = as_first_idx.accessor<int64_t, 1>();
auto bs_first_idx_a = bs_first_idx.accessor<int64_t, 1>();
auto dists_a = dists.accessor<float, 1>();
@@ -230,10 +230,10 @@ std::tuple<at::Tensor, at::Tensor> HullHullDistanceBackwardCpu(
at::Tensor grad_as = at::zeros_like(as);
at::Tensor grad_bs = at::zeros_like(bs);
auto as_a = as.accessor<float, H1 == 1 ? 2 : 3>();
auto bs_a = bs.accessor<float, H2 == 1 ? 2 : 3>();
auto grad_as_a = grad_as.accessor<float, H1 == 1 ? 2 : 3>();
auto grad_bs_a = grad_bs.accessor<float, H2 == 1 ? 2 : 3>();
auto as_a = as.accessor < float, H1 == 1 ? 2 : 3 > ();
auto bs_a = bs.accessor < float, H2 == 1 ? 2 : 3 > ();
auto grad_as_a = grad_as.accessor < float, H1 == 1 ? 2 : 3 > ();
auto grad_bs_a = grad_bs.accessor < float, H2 == 1 ? 2 : 3 > ();
auto idx_bs_a = idx_bs.accessor<int64_t, 1>();
auto grad_dists_a = grad_dists.accessor<float, 1>();

View File

@@ -110,7 +110,7 @@ __global__ void DistanceForwardKernel(
__syncthreads();
// Perform reduction in shared memory.
for (auto s = blockDim.x / 2; s > 32; s >>= 1) {
for (int s = blockDim.x / 2; s > 32; s >>= 1) {
if (tid < s) {
if (min_dists[tid] > min_dists[tid + s]) {
min_dists[tid] = min_dists[tid + s];
@@ -502,8 +502,8 @@ __global__ void PointFaceArrayForwardKernel(
const float3* tris_f3 = (float3*)tris;
// Parallelize over P * S computations
const auto num_threads = gridDim.x * blockDim.x;
const auto tid = blockIdx.x * blockDim.x + threadIdx.x;
const int num_threads = gridDim.x * blockDim.x;
const int tid = blockIdx.x * blockDim.x + threadIdx.x;
for (int t_i = tid; t_i < P * T; t_i += num_threads) {
const int t = t_i / P; // segment index.
@@ -576,8 +576,8 @@ __global__ void PointFaceArrayBackwardKernel(
const float3* tris_f3 = (float3*)tris;
// Parallelize over P * S computations
const auto num_threads = gridDim.x * blockDim.x;
const auto tid = blockIdx.x * blockDim.x + threadIdx.x;
const int num_threads = gridDim.x * blockDim.x;
const int tid = blockIdx.x * blockDim.x + threadIdx.x;
for (int t_i = tid; t_i < P * T; t_i += num_threads) {
const int t = t_i / P; // triangle index.
@@ -683,8 +683,8 @@ __global__ void PointEdgeArrayForwardKernel(
float3* segms_f3 = (float3*)segms;
// Parallelize over P * S computations
const auto num_threads = gridDim.x * blockDim.x;
const auto tid = blockIdx.x * blockDim.x + threadIdx.x;
const int num_threads = gridDim.x * blockDim.x;
const int tid = blockIdx.x * blockDim.x + threadIdx.x;
for (int t_i = tid; t_i < P * S; t_i += num_threads) {
const int s = t_i / P; // segment index.
@@ -752,8 +752,8 @@ __global__ void PointEdgeArrayBackwardKernel(
float3* segms_f3 = (float3*)segms;
// Parallelize over P * S computations
const auto num_threads = gridDim.x * blockDim.x;
const auto tid = blockIdx.x * blockDim.x + threadIdx.x;
const int num_threads = gridDim.x * blockDim.x;
const int tid = blockIdx.x * blockDim.x + threadIdx.x;
for (int t_i = tid; t_i < P * S; t_i += num_threads) {
const int s = t_i / P; // segment index.

View File

@@ -88,10 +88,6 @@ std::tuple<torch::Tensor, torch::Tensor> PointFaceDistanceForward(
AT_ERROR("Not compiled with GPU support.");
#endif
}
CHECK_CPU(points);
CHECK_CPU(points_first_idx);
CHECK_CPU(tris);
CHECK_CPU(tris_first_idx);
return PointFaceDistanceForwardCpu(
points, points_first_idx, tris, tris_first_idx, min_triangle_area);
}
@@ -147,10 +143,6 @@ std::tuple<torch::Tensor, torch::Tensor> PointFaceDistanceBackward(
AT_ERROR("Not compiled with GPU support.");
#endif
}
CHECK_CPU(points);
CHECK_CPU(tris);
CHECK_CPU(idx_points);
CHECK_CPU(grad_dists);
return PointFaceDistanceBackwardCpu(
points, tris, idx_points, grad_dists, min_triangle_area);
}
@@ -229,10 +221,6 @@ std::tuple<torch::Tensor, torch::Tensor> FacePointDistanceForward(
AT_ERROR("Not compiled with GPU support.");
#endif
}
CHECK_CPU(points);
CHECK_CPU(points_first_idx);
CHECK_CPU(tris);
CHECK_CPU(tris_first_idx);
return FacePointDistanceForwardCpu(
points, points_first_idx, tris, tris_first_idx, min_triangle_area);
}
@@ -289,10 +277,6 @@ std::tuple<torch::Tensor, torch::Tensor> FacePointDistanceBackward(
AT_ERROR("Not compiled with GPU support.");
#endif
}
CHECK_CPU(points);
CHECK_CPU(tris);
CHECK_CPU(idx_tris);
CHECK_CPU(grad_dists);
return FacePointDistanceBackwardCpu(
points, tris, idx_tris, grad_dists, min_triangle_area);
}
@@ -362,10 +346,6 @@ std::tuple<torch::Tensor, torch::Tensor> PointEdgeDistanceForward(
AT_ERROR("Not compiled with GPU support.");
#endif
}
CHECK_CPU(points);
CHECK_CPU(points_first_idx);
CHECK_CPU(segms);
CHECK_CPU(segms_first_idx);
return PointEdgeDistanceForwardCpu(
points, points_first_idx, segms, segms_first_idx, max_points);
}
@@ -416,10 +396,6 @@ std::tuple<torch::Tensor, torch::Tensor> PointEdgeDistanceBackward(
AT_ERROR("Not compiled with GPU support.");
#endif
}
CHECK_CPU(points);
CHECK_CPU(segms);
CHECK_CPU(idx_points);
CHECK_CPU(grad_dists);
return PointEdgeDistanceBackwardCpu(points, segms, idx_points, grad_dists);
}
@@ -488,10 +464,6 @@ std::tuple<torch::Tensor, torch::Tensor> EdgePointDistanceForward(
AT_ERROR("Not compiled with GPU support.");
#endif
}
CHECK_CPU(points);
CHECK_CPU(points_first_idx);
CHECK_CPU(segms);
CHECK_CPU(segms_first_idx);
return EdgePointDistanceForwardCpu(
points, points_first_idx, segms, segms_first_idx, max_segms);
}
@@ -542,10 +514,6 @@ std::tuple<torch::Tensor, torch::Tensor> EdgePointDistanceBackward(
AT_ERROR("Not compiled with GPU support.");
#endif
}
CHECK_CPU(points);
CHECK_CPU(segms);
CHECK_CPU(idx_segms);
CHECK_CPU(grad_dists);
return EdgePointDistanceBackwardCpu(points, segms, idx_segms, grad_dists);
}
@@ -599,8 +567,6 @@ torch::Tensor PointFaceArrayDistanceForward(
AT_ERROR("Not compiled with GPU support.");
#endif
}
CHECK_CPU(points);
CHECK_CPU(tris);
return PointFaceArrayDistanceForwardCpu(points, tris, min_triangle_area);
}
@@ -647,9 +613,6 @@ std::tuple<torch::Tensor, torch::Tensor> PointFaceArrayDistanceBackward(
AT_ERROR("Not compiled with GPU support.");
#endif
}
CHECK_CPU(points);
CHECK_CPU(tris);
CHECK_CPU(grad_dists);
return PointFaceArrayDistanceBackwardCpu(
points, tris, grad_dists, min_triangle_area);
}
@@ -698,8 +661,6 @@ torch::Tensor PointEdgeArrayDistanceForward(
AT_ERROR("Not compiled with GPU support.");
#endif
}
CHECK_CPU(points);
CHECK_CPU(segms);
return PointEdgeArrayDistanceForwardCpu(points, segms);
}
@@ -742,8 +703,5 @@ std::tuple<torch::Tensor, torch::Tensor> PointEdgeArrayDistanceBackward(
AT_ERROR("Not compiled with GPU support.");
#endif
}
CHECK_CPU(points);
CHECK_CPU(segms);
CHECK_CPU(grad_dists);
return PointEdgeArrayDistanceBackwardCpu(points, segms, grad_dists);
}

View File

@@ -104,12 +104,6 @@ inline void PointsToVolumesForward(
AT_ERROR("Not compiled with GPU support.");
#endif
}
CHECK_CPU(points_3d);
CHECK_CPU(points_features);
CHECK_CPU(volume_densities);
CHECK_CPU(volume_features);
CHECK_CPU(grid_sizes);
CHECK_CPU(mask);
PointsToVolumesForwardCpu(
points_3d,
points_features,
@@ -189,14 +183,6 @@ inline void PointsToVolumesBackward(
AT_ERROR("Not compiled with GPU support.");
#endif
}
CHECK_CPU(points_3d);
CHECK_CPU(points_features);
CHECK_CPU(grid_sizes);
CHECK_CPU(mask);
CHECK_CPU(grad_volume_densities);
CHECK_CPU(grad_volume_features);
CHECK_CPU(grad_points_3d);
CHECK_CPU(grad_points_features);
PointsToVolumesBackwardCpu(
points_3d,
points_features,

View File

@@ -8,7 +8,9 @@
#include <torch/csrc/autograd/VariableTypeUtils.h>
#include <torch/extension.h>
#include <algorithm>
#include <cmath>
#include <thread>
#include <vector>
// In the x direction, the location {0, ..., grid_size_x - 1} correspond to

View File

@@ -59,11 +59,6 @@ getLastCudaError(const char* errorMessage, const char* file, const int line) {
#define SHARED __shared__
#define ACTIVEMASK() __activemask()
#define BALLOT(mask, val) __ballot_sync((mask), val)
/* TODO (ROCM-6.2): None of the WARP_* are used anywhere and ROCM-6.2 natively
* supports __shfl_*. Disabling until the move to ROCM-6.2.
*/
#if !defined(USE_ROCM)
/**
* Find the cumulative sum within a warp up to the current
* thread lane, with each mask thread contributing base.
@@ -120,7 +115,6 @@ INLINE DEVICE float3 WARP_SUM_FLOAT3(
ret.z = WARP_SUM(group, mask, base.z);
return ret;
}
#endif //! USE_ROCM
// Floating point.
// #define FMUL(a, b) __fmul_rn((a), (b))
@@ -148,7 +142,6 @@ INLINE DEVICE float3 WARP_SUM_FLOAT3(
#define FMA(x, y, z) __fmaf_rn((x), (y), (z))
#define I2F(a) __int2float_rn(a)
#define FRCP(x) __frcp_rn(x)
#if !defined(USE_ROCM)
__device__ static float atomicMax(float* address, float val) {
int* address_as_i = (int*)address;
int old = *address_as_i, assumed;
@@ -173,7 +166,6 @@ __device__ static float atomicMin(float* address, float val) {
} while (assumed != old);
return __int_as_float(old);
}
#endif //! USE_ROCM
#define DMAX(a, b) FMAX(a, b)
#define DMIN(a, b) FMIN(a, b)
#define DSQRT(a) sqrt(a)
@@ -417,7 +409,7 @@ __device__ static float atomicMin(float* address, float val) {
(OUT_PTR), \
(NUM_SELECTED_PTR), \
(NUM_ITEMS), \
(STREAM));
stream = (STREAM));
#define COPY_HOST_DEV(PTR_D, PTR_H, TYPE, SIZE) \
HANDLECUDA(cudaMemcpy( \

View File

@@ -15,8 +15,8 @@
#endif
#if defined(_WIN64) || defined(_WIN32)
using uint = unsigned int;
using ushort = unsigned short;
#define uint unsigned int
#define ushort unsigned short
#endif
#include "./logging.h" // <- include before torch/extension.h
@@ -36,13 +36,11 @@ using ushort = unsigned short;
#pragma nv_diag_suppress 2951
#pragma nv_diag_suppress 2967
#else
#if !defined(USE_ROCM)
#pragma diag_suppress = attribute_not_allowed
#pragma diag_suppress = 1866
#pragma diag_suppress = 2941
#pragma diag_suppress = 2951
#pragma diag_suppress = 2967
#endif //! USE_ROCM
#endif
#else // __CUDACC__
#define INLINE inline
@@ -58,9 +56,7 @@ using ushort = unsigned short;
#pragma clang diagnostic pop
#ifdef WITH_CUDA
#include <ATen/cuda/CUDAContext.h>
#if !defined(USE_ROCM)
#include <vector_functions.h>
#endif //! USE_ROCM
#else
#ifndef cudaStream_t
typedef void* cudaStream_t;

View File

@@ -357,11 +357,11 @@ void MAX_WS(
//
//
#define END_PARALLEL() \
end_parallel:; \
end_parallel :; \
}
#define END_PARALLEL_NORET() }
#define END_PARALLEL_2D() \
end_parallel:; \
end_parallel :; \
} \
}
#define END_PARALLEL_2D_NORET() \

View File

@@ -14,7 +14,7 @@
#include "./commands.h"
namespace pulsar {
IHD CamGradInfo::CamGradInfo(int x) {
IHD CamGradInfo::CamGradInfo() {
cam_pos = make_float3(0.f, 0.f, 0.f);
pixel_0_0_center = make_float3(0.f, 0.f, 0.f);
pixel_dir_x = make_float3(0.f, 0.f, 0.f);

View File

@@ -63,13 +63,18 @@ inline bool operator==(const CamInfo& a, const CamInfo& b) {
};
struct CamGradInfo {
HOST DEVICE CamGradInfo(int = 0);
HOST DEVICE CamGradInfo();
float3 cam_pos;
float3 pixel_0_0_center;
float3 pixel_dir_x;
float3 pixel_dir_y;
};
// TODO: remove once https://github.com/NVlabs/cub/issues/172 is resolved.
struct IntWrapper {
int val;
};
} // namespace pulsar
#endif

View File

@@ -24,7 +24,7 @@
// #pragma diag_suppress = 68
#include <ATen/cuda/CUDAContext.h>
// #pragma pop
#include "../gpu/commands.h"
#include "../cuda/commands.h"
#else
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Weverything"

View File

@@ -46,7 +46,6 @@ IHD float3 outer_product_sum(const float3& a) {
}
// TODO: put intrinsics here.
#if !defined(USE_ROCM)
IHD float3 operator+(const float3& a, const float3& b) {
return make_float3(a.x + b.x, a.y + b.y, a.z + b.z);
}
@@ -94,7 +93,6 @@ IHD float3 operator*(const float3& a, const float3& b) {
IHD float3 operator*(const float& a, const float3& b) {
return b * a;
}
#endif //! USE_ROCM
INLINE DEVICE float length(const float3& v) {
// TODO: benchmark what's faster.
@@ -149,6 +147,11 @@ IHD CamGradInfo operator*(const CamGradInfo& a, const float& b) {
return res;
}
IHD IntWrapper operator+(const IntWrapper& a, const IntWrapper& b) {
IntWrapper res;
res.val = a.val + b.val;
return res;
}
} // namespace pulsar
#endif

View File

@@ -155,8 +155,8 @@ void backward(
stream);
CHECKLAUNCH();
SUM_WS(
self->ids_sorted_d,
self->n_grad_contributions_d,
(IntWrapper*)(self->ids_sorted_d),
(IntWrapper*)(self->n_grad_contributions_d),
static_cast<int>(num_balls),
self->workspace_d,
self->workspace_size,

View File

@@ -52,7 +52,7 @@ HOST void construct(
self->cam.film_width = width;
self->cam.film_height = height;
self->max_num_balls = max_num_balls;
MALLOC(self->result_d, float, width * height * n_channels);
MALLOC(self->result_d, float, width* height* n_channels);
self->cam.orthogonal_projection = orthogonal_projection;
self->cam.right_handed = right_handed_system;
self->cam.background_normalization_depth = background_normalization_depth;
@@ -93,7 +93,7 @@ HOST void construct(
MALLOC(self->di_sorted_d, DrawInfo, max_num_balls);
MALLOC(self->region_flags_d, char, max_num_balls);
MALLOC(self->num_selected_d, size_t, 1);
MALLOC(self->forw_info_d, float, width * height * (3 + 2 * n_track));
MALLOC(self->forw_info_d, float, width* height * (3 + 2 * n_track));
MALLOC(self->min_max_pixels_d, IntersectInfo, 1);
MALLOC(self->grad_pos_d, float3, max_num_balls);
MALLOC(self->grad_col_d, float, max_num_balls* n_channels);

View File

@@ -255,7 +255,7 @@ GLOBAL void calc_signature(
* for every iteration through the loading loop every thread could add a
* 'hit' to the buffer.
*/
#define RENDER_BUFFER_SIZE RENDER_BLOCK_SIZE * RENDER_BLOCK_SIZE * 2
#define RENDER_BUFFER_SIZE RENDER_BLOCK_SIZE* RENDER_BLOCK_SIZE * 2
/**
* The threshold after which the spheres that are in the render buffer
* are rendered and the buffer is flushed.

View File

@@ -283,15 +283,9 @@ GLOBAL void render(
(percent_allowed_difference > 0.f &&
max_closest_possible_intersection > depth_threshold) ||
tracker.get_n_hits() >= max_n_hits;
#if defined(__CUDACC__) && defined(__HIP_PLATFORM_AMD__)
unsigned long long warp_done = __ballot(done);
int warp_done_bit_cnt = __popcll(warp_done);
#else
uint warp_done = thread_warp.ballot(done);
int warp_done_bit_cnt = POPC(warp_done);
#endif //__CUDACC__ && __HIP_PLATFORM_AMD__
if (thread_warp.thread_rank() == 0)
ATOMICADD_B(&n_pixels_done, warp_done_bit_cnt);
ATOMICADD_B(&n_pixels_done, POPC(warp_done));
// This sync is necessary to keep n_loaded until all threads are done with
// painting.
thread_block.sync();

View File

@@ -213,8 +213,8 @@ std::tuple<size_t, size_t, bool, torch::Tensor> Renderer::arg_check(
const float& gamma,
const float& max_depth,
float& min_depth,
const std::optional<torch::Tensor>& bg_col,
const std::optional<torch::Tensor>& opacity,
const c10::optional<torch::Tensor>& bg_col,
const c10::optional<torch::Tensor>& opacity,
const float& percent_allowed_difference,
const uint& max_n_hits,
const uint& mode) {
@@ -668,8 +668,8 @@ std::tuple<torch::Tensor, torch::Tensor> Renderer::forward(
const float& gamma,
const float& max_depth,
float min_depth,
const std::optional<torch::Tensor>& bg_col,
const std::optional<torch::Tensor>& opacity,
const c10::optional<torch::Tensor>& bg_col,
const c10::optional<torch::Tensor>& opacity,
const float& percent_allowed_difference,
const uint& max_n_hits,
const uint& mode) {
@@ -888,14 +888,14 @@ std::tuple<torch::Tensor, torch::Tensor> Renderer::forward(
};
std::tuple<
std::optional<torch::Tensor>,
std::optional<torch::Tensor>,
std::optional<torch::Tensor>,
std::optional<torch::Tensor>,
std::optional<torch::Tensor>,
std::optional<torch::Tensor>,
std::optional<torch::Tensor>,
std::optional<torch::Tensor>>
at::optional<torch::Tensor>,
at::optional<torch::Tensor>,
at::optional<torch::Tensor>,
at::optional<torch::Tensor>,
at::optional<torch::Tensor>,
at::optional<torch::Tensor>,
at::optional<torch::Tensor>,
at::optional<torch::Tensor>>
Renderer::backward(
const torch::Tensor& grad_im,
const torch::Tensor& image,
@@ -912,8 +912,8 @@ Renderer::backward(
const float& gamma,
const float& max_depth,
float min_depth,
const std::optional<torch::Tensor>& bg_col,
const std::optional<torch::Tensor>& opacity,
const c10::optional<torch::Tensor>& bg_col,
const c10::optional<torch::Tensor>& opacity,
const float& percent_allowed_difference,
const uint& max_n_hits,
const uint& mode,
@@ -922,7 +922,7 @@ Renderer::backward(
const bool& dif_rad,
const bool& dif_cam,
const bool& dif_opy,
const std::optional<std::pair<uint, uint>>& dbg_pos) {
const at::optional<std::pair<uint, uint>>& dbg_pos) {
this->ensure_on_device(this->device_tracker.device());
size_t batch_size;
size_t n_points;
@@ -1045,14 +1045,14 @@ Renderer::backward(
}
// Prepare the return value.
std::tuple<
std::optional<torch::Tensor>,
std::optional<torch::Tensor>,
std::optional<torch::Tensor>,
std::optional<torch::Tensor>,
std::optional<torch::Tensor>,
std::optional<torch::Tensor>,
std::optional<torch::Tensor>,
std::optional<torch::Tensor>>
at::optional<torch::Tensor>,
at::optional<torch::Tensor>,
at::optional<torch::Tensor>,
at::optional<torch::Tensor>,
at::optional<torch::Tensor>,
at::optional<torch::Tensor>,
at::optional<torch::Tensor>,
at::optional<torch::Tensor>>
ret;
if (mode == 1 || (!dif_pos && !dif_col && !dif_rad && !dif_cam && !dif_opy)) {
return ret;

View File

@@ -44,21 +44,21 @@ struct Renderer {
const float& gamma,
const float& max_depth,
float min_depth,
const std::optional<torch::Tensor>& bg_col,
const std::optional<torch::Tensor>& opacity,
const c10::optional<torch::Tensor>& bg_col,
const c10::optional<torch::Tensor>& opacity,
const float& percent_allowed_difference,
const uint& max_n_hits,
const uint& mode);
std::tuple<
std::optional<torch::Tensor>,
std::optional<torch::Tensor>,
std::optional<torch::Tensor>,
std::optional<torch::Tensor>,
std::optional<torch::Tensor>,
std::optional<torch::Tensor>,
std::optional<torch::Tensor>,
std::optional<torch::Tensor>>
at::optional<torch::Tensor>,
at::optional<torch::Tensor>,
at::optional<torch::Tensor>,
at::optional<torch::Tensor>,
at::optional<torch::Tensor>,
at::optional<torch::Tensor>,
at::optional<torch::Tensor>,
at::optional<torch::Tensor>>
backward(
const torch::Tensor& grad_im,
const torch::Tensor& image,
@@ -75,8 +75,8 @@ struct Renderer {
const float& gamma,
const float& max_depth,
float min_depth,
const std::optional<torch::Tensor>& bg_col,
const std::optional<torch::Tensor>& opacity,
const c10::optional<torch::Tensor>& bg_col,
const c10::optional<torch::Tensor>& opacity,
const float& percent_allowed_difference,
const uint& max_n_hits,
const uint& mode,
@@ -85,7 +85,7 @@ struct Renderer {
const bool& dif_rad,
const bool& dif_cam,
const bool& dif_opy,
const std::optional<std::pair<uint, uint>>& dbg_pos);
const at::optional<std::pair<uint, uint>>& dbg_pos);
// Infrastructure.
/**
@@ -115,8 +115,8 @@ struct Renderer {
const float& gamma,
const float& max_depth,
float& min_depth,
const std::optional<torch::Tensor>& bg_col,
const std::optional<torch::Tensor>& opacity,
const c10::optional<torch::Tensor>& bg_col,
const c10::optional<torch::Tensor>& opacity,
const float& percent_allowed_difference,
const uint& max_n_hits,
const uint& mode);

View File

@@ -8,7 +8,6 @@
#ifdef WITH_CUDA
#include <ATen/cuda/CUDAContext.h>
#include <c10/cuda/CUDAException.h>
#include <cuda_runtime_api.h>
#endif
#include <torch/extension.h>
@@ -34,13 +33,13 @@ torch::Tensor sphere_ids_from_result_info_nograd(
.contiguous();
if (forw_info.device().type() == c10::DeviceType::CUDA) {
#ifdef WITH_CUDA
C10_CUDA_CHECK(cudaMemcpyAsync(
cudaMemcpyAsync(
result.data_ptr(),
tmp.data_ptr(),
sizeof(uint32_t) * tmp.size(0) * tmp.size(1) * tmp.size(2) *
tmp.size(3),
cudaMemcpyDeviceToDevice,
at::cuda::getCurrentCUDAStream()));
at::cuda::getCurrentCUDAStream());
#else
throw std::runtime_error(
"Copy on CUDA device initiated but built "

View File

@@ -7,7 +7,6 @@
*/
#ifdef WITH_CUDA
#include <c10/cuda/CUDAException.h>
#include <cuda_runtime_api.h>
namespace pulsar {
@@ -18,8 +17,7 @@ void cudaDevToDev(
const void* src,
const int& size,
const cudaStream_t& stream) {
C10_CUDA_CHECK(
cudaMemcpyAsync(trg, src, size, cudaMemcpyDeviceToDevice, stream));
cudaMemcpyAsync(trg, src, size, cudaMemcpyDeviceToDevice, stream);
}
void cudaDevToHost(
@@ -27,8 +25,7 @@ void cudaDevToHost(
const void* src,
const int& size,
const cudaStream_t& stream) {
C10_CUDA_CHECK(
cudaMemcpyAsync(trg, src, size, cudaMemcpyDeviceToHost, stream));
cudaMemcpyAsync(trg, src, size, cudaMemcpyDeviceToHost, stream);
}
} // namespace pytorch

View File

@@ -6,6 +6,9 @@
* LICENSE file in the root directory of this source tree.
*/
#include "./global.h"
#include "./logging.h"
/**
* A compilation unit to provide warnings about the code and avoid
* repeated messages.

View File

@@ -25,7 +25,7 @@ class BitMask {
// Use all threads in the current block to clear all bits of this BitMask
__device__ void block_clear() {
for (auto i = threadIdx.x; i < H * W * D; i += blockDim.x) {
for (int i = threadIdx.x; i < H * W * D; i += blockDim.x) {
data[i] = 0;
}
__syncthreads();

Some files were not shown because too many files have changed in this diff Show More