From d220ee2f66659c828422cb55344973e64fc33a55 Mon Sep 17 00:00:00 2001 From: Jeremy Reizenstein Date: Tue, 10 Nov 2020 09:36:29 -0800 Subject: [PATCH] pulsar build and CI changes Summary: Changes to CI and some minor fixes now that pulsar is part of pytorch3d. Most significantly, add CUB to CI builds. Make CUB_HOME override the CUB already in cudatoolkit (important for cuda11.0 which uses cub 1.9.9 which pulsar doesn't work well with. Make imageio available for testing. Lint fixes. Fix some test verbosity. Avoid use of atomicAdd_block on older GPUs. Reviewed By: nikhilaravi, classner Differential Revision: D24773716 fbshipit-source-id: 2428356bb2e62735f2bc0c15cbe4cff35b1b24b8 --- .circleci/config.in.yml | 24 +++++++++++++----- .circleci/config.yml | 26 +++++++++++++++++--- docs/examples/pulsar_basic.py | 2 +- docs/examples/pulsar_cam_unified.py | 2 +- docs/examples/pulsar_multiview.py | 2 +- docs/examples/pulsar_optimization.py | 2 +- docs/examples/pulsar_optimization_unified.py | 2 +- packaging/build_conda.sh | 2 +- packaging/build_wheel.sh | 1 + packaging/pkg_helpers.bash | 22 +++++++++++++++++ packaging/pytorch3d/meta.yaml | 4 ++- pytorch3d/csrc/pulsar/cuda/commands.h | 2 +- setup.py | 22 +++++++++++------ tests/test_build.py | 1 - 14 files changed, 88 insertions(+), 26 deletions(-) diff --git a/.circleci/config.in.yml b/.circleci/config.in.yml index 7360aa17..00288912 100644 --- a/.circleci/config.in.yml +++ b/.circleci/config.in.yml @@ -68,14 +68,26 @@ jobs: steps: - checkout - <<: *setupcuda - - run: pip3 install --progress-bar off wheel matplotlib 'pillow<7' + - run: pip3 install --progress-bar off imageio wheel matplotlib 'pillow<7' - run: pip3 install --progress-bar off torch torchvision # - run: conda create -p ~/conda_env python=3.7 numpy # - run: conda activate ~/conda_env # - run: conda install -c pytorch pytorch torchvision - run: pip3 install --progress-bar off 'git+https://github.com/facebookresearch/fvcore' - - run: LD_LIBRARY_PATH=$LD_LIBARY_PATH:/usr/local/cuda-10.2/lib64 python3 setup.py build_ext --inplace + - run: + name: get cub + command: | + cd .. + wget --no-verbose https://github.com/NVIDIA/cub/archive/1.10.0.tar.gz + tar xzf 1.10.0.tar.gz + # This expands to a directory called cub-1.10.0 + - run: + name: build + command: | + export LD_LIBRARY_PATH=$LD_LIBARY_PATH:/usr/local/cuda-10.2/lib64 + export CUB_HOME=$(realpath ../cub-1.10.0) + python3 setup.py build_ext --inplace - run: LD_LIBRARY_PATH=$LD_LIBARY_PATH:/usr/local/cuda-10.2/lib64 python -m unittest discover -v -s tests - run: python3 setup.py bdist_wheel @@ -89,7 +101,7 @@ jobs: resource_class: 2xlarge+ steps: - checkout - - run: packaging/build_wheel.sh + - run: MAX_JOBS=15 packaging/build_wheel.sh - store_artifacts: path: dist - persist_to_workspace: @@ -109,7 +121,7 @@ jobs: - checkout # This is building with cuda but no gpu present, # so we aren't running the tests. - - run: TEST_FLAG=--no-test packaging/build_conda.sh + - run: MAX_JOBS=15 TEST_FLAG=--no-test packaging/build_conda.sh - store_artifacts: path: /opt/conda/conda-bld/linux-64 - persist_to_workspace: @@ -215,9 +227,9 @@ workflows: context: DOCKERHUB_TOKEN {{workflows()}} - binary_linux_conda_cuda: - name: testrun_conda_cuda_py37_cu101_pyt14 + name: testrun_conda_cuda_py36_cu101_pyt14 context: DOCKERHUB_TOKEN - python_version: "3.7" + python_version: "3.6" pytorch_version: "1.4" cu_version: "cu101" - binary_linux_conda_cuda: diff --git a/.circleci/config.yml b/.circleci/config.yml index 1d121fa2..f1bc3b1e 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -68,14 +68,26 @@ jobs: steps: - checkout - <<: *setupcuda - - run: pip3 install --progress-bar off wheel matplotlib 'pillow<7' + - run: pip3 install --progress-bar off imageio wheel matplotlib 'pillow<7' - run: pip3 install --progress-bar off torch torchvision # - run: conda create -p ~/conda_env python=3.7 numpy # - run: conda activate ~/conda_env # - run: conda install -c pytorch pytorch torchvision - run: pip3 install --progress-bar off 'git+https://github.com/facebookresearch/fvcore' - - run: LD_LIBRARY_PATH=$LD_LIBARY_PATH:/usr/local/cuda-10.2/lib64 python3 setup.py build_ext --inplace + - run: + name: get cub + command: | + cd .. + wget --no-verbose https://github.com/NVIDIA/cub/archive/1.10.0.tar.gz + tar xzf 1.10.0.tar.gz + # This expands to a directory called cub-1.10.0 + - run: + name: build + command: | + export LD_LIBRARY_PATH=$LD_LIBARY_PATH:/usr/local/cuda-10.2/lib64 + export CUB_HOME=$(realpath ../cub-1.10.0) + python3 setup.py build_ext --inplace - run: LD_LIBRARY_PATH=$LD_LIBARY_PATH:/usr/local/cuda-10.2/lib64 python -m unittest discover -v -s tests - run: python3 setup.py bdist_wheel @@ -89,7 +101,7 @@ jobs: resource_class: 2xlarge+ steps: - checkout - - run: packaging/build_wheel.sh + - run: MAX_JOBS=15 packaging/build_wheel.sh - store_artifacts: path: dist - persist_to_workspace: @@ -109,7 +121,7 @@ jobs: - checkout # This is building with cuda but no gpu present, # so we aren't running the tests. - - run: TEST_FLAG=--no-test packaging/build_conda.sh + - run: MAX_JOBS=15 TEST_FLAG=--no-test packaging/build_conda.sh - store_artifacts: path: /opt/conda/conda-bld/linux-64 - persist_to_workspace: @@ -489,6 +501,12 @@ workflows: python_version: "3.6" pytorch_version: "1.4" cu_version: "cu101" + - binary_linux_conda_cuda: + name: testrun_conda_cuda_py37_cu102_pyt160 + context: DOCKERHUB_TOKEN + python_version: "3.7" + pytorch_version: '1.6.0' + cu_version: "cu102" - binary_linux_conda_cuda: name: testrun_conda_cuda_py37_cu110_pyt170 context: DOCKERHUB_TOKEN diff --git a/docs/examples/pulsar_basic.py b/docs/examples/pulsar_basic.py index 43733d6d..1cd5f8cd 100755 --- a/docs/examples/pulsar_basic.py +++ b/docs/examples/pulsar_basic.py @@ -5,9 +5,9 @@ This example demonstrates the most trivial, direct interface of the pulsar sphere renderer. It renders and saves an image with 10 random spheres. Output: basic.png. """ +import logging import math from os import path -import logging import imageio import torch diff --git a/docs/examples/pulsar_cam_unified.py b/docs/examples/pulsar_cam_unified.py index 265c204c..47affc4d 100755 --- a/docs/examples/pulsar_cam_unified.py +++ b/docs/examples/pulsar_cam_unified.py @@ -9,8 +9,8 @@ distorted. Gradient-based optimization is used to converge towards the original camera parameters. Output: cam-pt3d.gif """ -from os import path import logging +from os import path import cv2 import imageio diff --git a/docs/examples/pulsar_multiview.py b/docs/examples/pulsar_multiview.py index ad487234..26889a11 100755 --- a/docs/examples/pulsar_multiview.py +++ b/docs/examples/pulsar_multiview.py @@ -13,9 +13,9 @@ This example is not available yet through the 'unified' interface, because opacity support has not landed in PyTorch3D for general data structures yet. """ +import logging import math from os import path -import logging import cv2 import imageio diff --git a/docs/examples/pulsar_optimization.py b/docs/examples/pulsar_optimization.py index 50a2ac43..5bd64424 100755 --- a/docs/examples/pulsar_optimization.py +++ b/docs/examples/pulsar_optimization.py @@ -8,8 +8,8 @@ The scene is initialized with random spheres. Gradient-based optimization is used to converge towards a faithful scene representation. """ -import math import logging +import math import cv2 import imageio diff --git a/docs/examples/pulsar_optimization_unified.py b/docs/examples/pulsar_optimization_unified.py index 268a501e..69517e1a 100755 --- a/docs/examples/pulsar_optimization_unified.py +++ b/docs/examples/pulsar_optimization_unified.py @@ -8,8 +8,8 @@ The scene is initialized with random spheres. Gradient-based optimization is used to converge towards a faithful scene representation. """ -import math import logging +import math import cv2 import imageio diff --git a/packaging/build_conda.sh b/packaging/build_conda.sh index 81b5f924..ef50a95e 100755 --- a/packaging/build_conda.sh +++ b/packaging/build_conda.sh @@ -17,4 +17,4 @@ setup_conda_pytorch_constraint setup_conda_cudatoolkit_constraint setup_visual_studio_constraint # shellcheck disable=SC2086 -conda build $CONDA_CHANNEL_FLAGS ${TEST_FLAG:-} -c defaults -c conda-forge --no-anaconda-upload -c fvcore --python "$PYTHON_VERSION" packaging/pytorch3d +conda build $CONDA_CHANNEL_FLAGS ${TEST_FLAG:-} -c bottler -c defaults -c conda-forge --no-anaconda-upload -c fvcore --python "$PYTHON_VERSION" packaging/pytorch3d diff --git a/packaging/build_wheel.sh b/packaging/build_wheel.sh index 2d256baf..68c1ab3c 100755 --- a/packaging/build_wheel.sh +++ b/packaging/build_wheel.sh @@ -12,5 +12,6 @@ setup_env "$VERSION" setup_wheel_python pip_install numpy setup_pip_pytorch_version +download_nvidiacub_if_needed python setup.py clean IS_WHEEL=1 python setup.py bdist_wheel diff --git a/packaging/pkg_helpers.bash b/packaging/pkg_helpers.bash index da6c220e..80f58100 100644 --- a/packaging/pkg_helpers.bash +++ b/packaging/pkg_helpers.bash @@ -251,24 +251,32 @@ setup_conda_pytorch_constraint() { # Translate CUDA_VERSION into CUDA_CUDATOOLKIT_CONSTRAINT setup_conda_cudatoolkit_constraint() { export CONDA_CPUONLY_FEATURE="" + export CONDA_CUB_CONSTRAINT="" if [[ "$(uname)" == Darwin ]]; then export CONDA_CUDATOOLKIT_CONSTRAINT="" else case "$CU_VERSION" in cu110) export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=11.0,<11.1 # [not osx]" + # Even though cudatoolkit 11.0 provides CUB we need our own, to control the + # version, because the built-in 1.9.9 in the cudatoolkit causes problems. + export CONDA_CUB_CONSTRAINT="- nvidiacub" ;; cu102) export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=10.2,<10.3 # [not osx]" + export CONDA_CUB_CONSTRAINT="- nvidiacub" ;; cu101) export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=10.1,<10.2 # [not osx]" + export CONDA_CUB_CONSTRAINT="- nvidiacub" ;; cu100) export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=10.0,<10.1 # [not osx]" + export CONDA_CUB_CONSTRAINT="- nvidiacub" ;; cu92) export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=9.2,<9.3 # [not osx]" + export CONDA_CUB_CONSTRAINT="- nvidiacub" ;; cpu) export CONDA_CUDATOOLKIT_CONSTRAINT="" @@ -292,3 +300,17 @@ setup_visual_studio_constraint() { cp packaging/$VSTOOLCHAIN_PACKAGE/conda_build_config.yaml packaging/pytorch3d/conda_build_config.yaml fi } + +download_nvidiacub_if_needed() { + case "$CU_VERSION" in + cu110|cu102|cu101|cu100|cu92) + echo "Downloading cub" + wget --no-verbose https://github.com/NVIDIA/cub/archive/1.10.0.tar.gz + tar xzf 1.10.0.tar.gz + CUB_HOME=$(realpath ./cub-1.10.0) + export CUB_HOME + echo "CUB_HOME is now $CUB_HOME" + ;; + esac + # We don't need CUB for a cpu build or if cuda is 11.1 or higher +} diff --git a/packaging/pytorch3d/meta.yaml b/packaging/pytorch3d/meta.yaml index 0c818c45..f7f853ad 100644 --- a/packaging/pytorch3d/meta.yaml +++ b/packaging/pytorch3d/meta.yaml @@ -8,6 +8,7 @@ source: requirements: build: - {{ compiler('c') }} # [win] + {{ environ.get('CONDA_CUB_CONSTRAINT') }} host: - python @@ -31,6 +32,7 @@ build: - CUDA_HOME - FORCE_CUDA - NVCC_FLAGS + - MAX_JOBS features: {{ environ.get('CONDA_CPUONLY_FEATURE') }} @@ -41,7 +43,7 @@ test: - tests - docs requires: - - ca-certificates + - imageio commands: #pytest . python -m unittest discover -v -s tests diff --git a/pytorch3d/csrc/pulsar/cuda/commands.h b/pytorch3d/csrc/pulsar/cuda/commands.h index 6c5f2a35..2ea94a50 100644 --- a/pytorch3d/csrc/pulsar/cuda/commands.h +++ b/pytorch3d/csrc/pulsar/cuda/commands.h @@ -186,7 +186,7 @@ __device__ static float atomicMin(float* address, float val) { ATOMICADD(&((PTR)->x), VAL.x); \ ATOMICADD(&((PTR)->y), VAL.y); \ ATOMICADD(&((PTR)->z), VAL.z); -#if (CUDART_VERSION >= 10000) +#if (CUDART_VERSION >= 10000) && (__CUDA_ARCH__ >= 600) #define ATOMICADD_B(PTR, VAL) atomicAdd_block((PTR), (VAL)) #else #define ATOMICADD_B(PTR, VAL) ATOMICADD(PTR, VAL) diff --git a/setup.py b/setup.py index 3bfcd280..b9925893 100755 --- a/setup.py +++ b/setup.py @@ -20,12 +20,18 @@ def get_extensions(): extra_compile_args = {"cxx": ["-std=c++14"]} define_macros = [] + include_dirs = [extensions_dir] force_cuda = os.getenv("FORCE_CUDA", "0") == "1" if (torch.cuda.is_available() and CUDA_HOME is not None) or force_cuda: extension = CUDAExtension sources += source_cuda define_macros += [("WITH_CUDA", None)] + # Thrust is only used for its tuple objects. + # With CUDA 11.0 we can't use the cudatoolkit's version of cub. + # We take the risk that CUB and Thrust are incompatible, because + # we aren't using parts of Thrust which actually use CUB. + define_macros += [("THRUST_IGNORE_CUB_VERSION_CHECK", None)] cub_home = os.environ.get("CUB_HOME", None) nvcc_args = [ "-std=c++14", @@ -34,6 +40,11 @@ def get_extensions(): "-D__CUDA_NO_HALF_CONVERSIONS__", "-D__CUDA_NO_HALF2_OPERATORS__", ] + if cub_home is None: + prefix = os.environ.get("CONDA_PREFIX", None) + if prefix is not None and os.path.isdir(prefix + "/include/cub"): + cub_home = prefix + "/include" + if cub_home is None: warnings.warn( "The environment variable `CUB_HOME` was not found. " @@ -43,14 +54,13 @@ def get_extensions(): "`CUB_HOME` to the folder containing the `CMakeListst.txt` file." ) else: - nvcc_args.insert( - 0, "-I%s" % (os.path.realpath(cub_home).replace("\\ ", " ")) - ) + include_dirs.append(os.path.realpath(cub_home).replace("\\ ", " ")) nvcc_flags_env = os.getenv("NVCC_FLAGS", "") if nvcc_flags_env != "": nvcc_args.extend(nvcc_flags_env.split(" ")) - # It's better if pytorch can do this by default .. + # This is needed for pytorch 1.6 and earlier. See e.g. + # https://github.com/facebookresearch/pytorch3d/issues/436 CC = os.environ.get("CC", None) if CC is not None: CC_arg = "-ccbin={}".format(CC) @@ -63,8 +73,6 @@ def get_extensions(): sources = [os.path.join(extensions_dir, s) for s in sources] - include_dirs = [extensions_dir] - ext_modules = [ extension( "pytorch3d._C", @@ -100,7 +108,7 @@ setup( url="https://github.com/facebookresearch/pytorch3d", description="PyTorch3D is FAIR's library of reusable components " "for deep Learning with 3D data.", - packages=find_packages(exclude=("configs", "tests")), + packages=find_packages(exclude=("configs", "tests", "tests.*")), install_requires=["torchvision>=0.4", "fvcore"], extras_require={ "all": ["matplotlib", "tqdm>4.29.0", "imageio", "ipywidgets"], diff --git a/tests/test_build.py b/tests/test_build.py index 1b30607b..e93e3db2 100644 --- a/tests/test_build.py +++ b/tests/test_build.py @@ -41,7 +41,6 @@ class TestBuild(unittest.TestCase): for extension in extensions: for i in root_dir.glob(f"**/*.{extension}"): - print(i) if str(i).endswith( "pytorch3d/transforms/external/kornia_angle_axis_to_rotation_matrix.py" ):