From 5fbdb99aecf317bfe6854c6cb3aa193534ad6f9c Mon Sep 17 00:00:00 2001 From: Jeremy Reizenstein Date: Thu, 11 Nov 2021 02:02:26 -0800 Subject: [PATCH] builds for pytorch 1.10.0 Summary: Add builds corresponding to the new pytorch 1.10.0. We omit CUDA 11.3 testing because it fails with current hardware, and omit the main build too for the moment. Also move to the newer GPU circle CI executors. Reviewed By: gkioxari Differential Revision: D32335934 fbshipit-source-id: 416d92a8eecd06ef7fc742664a5f2d46f93415f8 --- .circleci/config.in.yml | 66 ++++++------------ .circleci/config.yml | 116 +++++++++++++++++++------------ .circleci/regenerate.py | 11 +++ packaging/linux_wheels/inside.sh | 10 +++ packaging/pkg_helpers.bash | 29 +++++++- 5 files changed, 144 insertions(+), 88 deletions(-) diff --git a/.circleci/config.in.yml b/.circleci/config.in.yml index 607baf5d..323d36a3 100644 --- a/.circleci/config.in.yml +++ b/.circleci/config.in.yml @@ -18,20 +18,13 @@ setupcuda: &setupcuda working_directory: ~/ command: | # download and install nvidia drivers, cuda, etc - wget --no-verbose --no-clobber -P ~/nvidia-downloads https://developer.download.nvidia.com/compute/cuda/11.2.2/local_installers/cuda_11.2.2_460.32.03_linux.run - sudo sh ~/nvidia-downloads/cuda_11.2.2_460.32.03_linux.run --silent + wget --no-verbose --no-clobber -P ~/nvidia-downloads https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.19.01_linux.run + sudo sh ~/nvidia-downloads/cuda_11.3.1_465.19.01_linux.run --silent echo "Done installing CUDA." pyenv versions nvidia-smi pyenv global 3.9.1 -gpu: &gpu - environment: - CUDA_VERSION: "10.2" - machine: - image: default - resource_class: gpu.medium # tesla m60 - binary_common: &binary_common parameters: # Edit these defaults to do a release` @@ -54,42 +47,41 @@ binary_common: &binary_common description: "Wheel only: what docker image to use" type: string default: "pytorch/manylinux-cuda101" + conda_docker_image: + description: "what docker image to use for docker" + type: string + default: "pytorch/conda-cuda" environment: PYTHON_VERSION: << parameters.python_version >> BUILD_VERSION: << parameters.build_version >> PYTORCH_VERSION: << parameters.pytorch_version >> CU_VERSION: << parameters.cu_version >> + TESTRUN_DOCKER_IMAGE: << parameters.conda_docker_image >> jobs: main: - <<: *gpu + environment: + CUDA_VERSION: "11.3" + resource_class: gpu.nvidia.small.multi machine: image: ubuntu-2004:202101-01 steps: - checkout - <<: *setupcuda - run: pip3 install --progress-bar off imageio wheel matplotlib 'pillow<7' - - run: pip3 install --progress-bar off torch torchvision + - run: pip3 install --progress-bar off torch==1.10.0+cu113 torchvision==0.11.1+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html # - run: conda create -p ~/conda_env python=3.7 numpy # - run: conda activate ~/conda_env # - run: conda install -c pytorch pytorch torchvision - run: pip3 install --progress-bar off 'git+https://github.com/facebookresearch/fvcore' - run: pip3 install --progress-bar off 'git+https://github.com/facebookresearch/iopath' - - run: - name: get cub - command: | - cd .. - wget --no-verbose https://github.com/NVIDIA/cub/archive/1.10.0.tar.gz - tar xzf 1.10.0.tar.gz - # This expands to a directory called cub-1.10.0 - run: name: build command: | - export LD_LIBRARY_PATH=$LD_LIBARY_PATH:/usr/local/cuda-11.2/lib64 - export CUB_HOME=$(realpath ../cub-1.10.0) + export LD_LIBRARY_PATH=$LD_LIBARY_PATH:/usr/local/cuda-11.3/lib64 python3 setup.py build_ext --inplace - - run: LD_LIBRARY_PATH=$LD_LIBARY_PATH:/usr/local/cuda-11.2/lib64 python -m unittest discover -v -s tests + - run: LD_LIBRARY_PATH=$LD_LIBARY_PATH:/usr/local/cuda-11.3/lib64 python -m unittest discover -v -s tests - run: python3 setup.py bdist_wheel binary_linux_wheel: @@ -113,7 +105,7 @@ jobs: binary_linux_conda: <<: *binary_common docker: - - image: "pytorch/conda-cuda" + - image: "<< parameters.conda_docker_image >>" auth: username: $DOCKERHUB_USERNAME password: $DOCKERHUB_TOKEN @@ -137,7 +129,7 @@ jobs: <<: *binary_common machine: image: ubuntu-1604:201903-01 - resource_class: gpu.medium + resource_class: gpu.nvidia.small.multi steps: - checkout - run: @@ -189,9 +181,8 @@ jobs: { docker login -u="$DOCKERHUB_USERNAME" -p="$DOCKERHUB_TOKEN" ; } 2> /dev/null - DOCKER_IMAGE=pytorch/conda-cuda - echo Pulling docker image $DOCKER_IMAGE - docker pull $DOCKER_IMAGE + echo Pulling docker image $TESTRUN_DOCKER_IMAGE + docker pull $TESTRUN_DOCKER_IMAGE - run: name: Build and run tests no_output_timeout: 20m @@ -200,11 +191,10 @@ jobs: cd ${HOME}/project/ - DOCKER_IMAGE=pytorch/conda-cuda export JUST_TESTRUN=1 VARS_TO_PASS="-e PYTHON_VERSION -e BUILD_VERSION -e PYTORCH_VERSION -e CU_VERSION -e JUST_TESTRUN" - docker run --gpus all --ipc=host -v $(pwd):/remote -w /remote ${VARS_TO_PASS} ${DOCKER_IMAGE} ./packaging/build_conda.sh + docker run --gpus all --ipc=host -v $(pwd):/remote -w /remote ${VARS_TO_PASS} ${TESTRUN_DOCKER_IMAGE} ./packaging/build_conda.sh binary_macos_wheel: <<: *binary_common @@ -228,27 +218,15 @@ workflows: version: 2 build_and_test: jobs: - - main: - context: DOCKERHUB_TOKEN + # - main: + # context: DOCKERHUB_TOKEN {{workflows()}} - binary_linux_conda_cuda: - name: testrun_conda_cuda_py37_cu102_pyt190 - context: DOCKERHUB_TOKEN - python_version: "3.7" - pytorch_version: '1.9.0' - cu_version: "cu102" - - binary_linux_conda_cuda: - name: testrun_conda_cuda_py37_cu110_pyt170 + name: testrun_conda_cuda_py37_cu102_pyt170 context: DOCKERHUB_TOKEN python_version: "3.7" pytorch_version: '1.7.0' - cu_version: "cu110" - - binary_linux_conda_cuda: - name: testrun_conda_cuda_py39_cu111_pyt181 - context: DOCKERHUB_TOKEN - python_version: "3.9" - pytorch_version: '1.8.1' - cu_version: "cu111" + cu_version: "cu102" - binary_macos_wheel: cu_version: cpu name: macos_wheel_py36_cpu diff --git a/.circleci/config.yml b/.circleci/config.yml index 10456672..268a2961 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -18,20 +18,13 @@ setupcuda: &setupcuda working_directory: ~/ command: | # download and install nvidia drivers, cuda, etc - wget --no-verbose --no-clobber -P ~/nvidia-downloads https://developer.download.nvidia.com/compute/cuda/11.2.2/local_installers/cuda_11.2.2_460.32.03_linux.run - sudo sh ~/nvidia-downloads/cuda_11.2.2_460.32.03_linux.run --silent + wget --no-verbose --no-clobber -P ~/nvidia-downloads https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.19.01_linux.run + sudo sh ~/nvidia-downloads/cuda_11.3.1_465.19.01_linux.run --silent echo "Done installing CUDA." pyenv versions nvidia-smi pyenv global 3.9.1 -gpu: &gpu - environment: - CUDA_VERSION: "10.2" - machine: - image: default - resource_class: gpu.medium # tesla m60 - binary_common: &binary_common parameters: # Edit these defaults to do a release` @@ -54,42 +47,41 @@ binary_common: &binary_common description: "Wheel only: what docker image to use" type: string default: "pytorch/manylinux-cuda101" + conda_docker_image: + description: "what docker image to use for docker" + type: string + default: "pytorch/conda-cuda" environment: PYTHON_VERSION: << parameters.python_version >> BUILD_VERSION: << parameters.build_version >> PYTORCH_VERSION: << parameters.pytorch_version >> CU_VERSION: << parameters.cu_version >> + TESTRUN_DOCKER_IMAGE: << parameters.conda_docker_image >> jobs: main: - <<: *gpu + environment: + CUDA_VERSION: "11.3" + resource_class: gpu.nvidia.small.multi machine: image: ubuntu-2004:202101-01 steps: - checkout - <<: *setupcuda - run: pip3 install --progress-bar off imageio wheel matplotlib 'pillow<7' - - run: pip3 install --progress-bar off torch torchvision + - run: pip3 install --progress-bar off torch==1.10.0+cu113 torchvision==0.11.1+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html # - run: conda create -p ~/conda_env python=3.7 numpy # - run: conda activate ~/conda_env # - run: conda install -c pytorch pytorch torchvision - run: pip3 install --progress-bar off 'git+https://github.com/facebookresearch/fvcore' - run: pip3 install --progress-bar off 'git+https://github.com/facebookresearch/iopath' - - run: - name: get cub - command: | - cd .. - wget --no-verbose https://github.com/NVIDIA/cub/archive/1.10.0.tar.gz - tar xzf 1.10.0.tar.gz - # This expands to a directory called cub-1.10.0 - run: name: build command: | - export LD_LIBRARY_PATH=$LD_LIBARY_PATH:/usr/local/cuda-11.2/lib64 - export CUB_HOME=$(realpath ../cub-1.10.0) + export LD_LIBRARY_PATH=$LD_LIBARY_PATH:/usr/local/cuda-11.3/lib64 python3 setup.py build_ext --inplace - - run: LD_LIBRARY_PATH=$LD_LIBARY_PATH:/usr/local/cuda-11.2/lib64 python -m unittest discover -v -s tests + - run: LD_LIBRARY_PATH=$LD_LIBARY_PATH:/usr/local/cuda-11.3/lib64 python -m unittest discover -v -s tests - run: python3 setup.py bdist_wheel binary_linux_wheel: @@ -113,7 +105,7 @@ jobs: binary_linux_conda: <<: *binary_common docker: - - image: "pytorch/conda-cuda" + - image: "<< parameters.conda_docker_image >>" auth: username: $DOCKERHUB_USERNAME password: $DOCKERHUB_TOKEN @@ -137,7 +129,7 @@ jobs: <<: *binary_common machine: image: ubuntu-1604:201903-01 - resource_class: gpu.medium + resource_class: gpu.nvidia.small.multi steps: - checkout - run: @@ -189,9 +181,8 @@ jobs: { docker login -u="$DOCKERHUB_USERNAME" -p="$DOCKERHUB_TOKEN" ; } 2> /dev/null - DOCKER_IMAGE=pytorch/conda-cuda - echo Pulling docker image $DOCKER_IMAGE - docker pull $DOCKER_IMAGE + echo Pulling docker image $TESTRUN_DOCKER_IMAGE + docker pull $TESTRUN_DOCKER_IMAGE - run: name: Build and run tests no_output_timeout: 20m @@ -200,11 +191,10 @@ jobs: cd ${HOME}/project/ - DOCKER_IMAGE=pytorch/conda-cuda export JUST_TESTRUN=1 VARS_TO_PASS="-e PYTHON_VERSION -e BUILD_VERSION -e PYTORCH_VERSION -e CU_VERSION -e JUST_TESTRUN" - docker run --gpus all --ipc=host -v $(pwd):/remote -w /remote ${VARS_TO_PASS} ${DOCKER_IMAGE} ./packaging/build_conda.sh + docker run --gpus all --ipc=host -v $(pwd):/remote -w /remote ${VARS_TO_PASS} ${TESTRUN_DOCKER_IMAGE} ./packaging/build_conda.sh binary_macos_wheel: <<: *binary_common @@ -228,8 +218,8 @@ workflows: version: 2 build_and_test: jobs: - - main: - context: DOCKERHUB_TOKEN + # - main: + # context: DOCKERHUB_TOKEN - binary_linux_conda: context: DOCKERHUB_TOKEN cu_version: cu92 @@ -344,6 +334,19 @@ workflows: name: linux_conda_py36_cu111_pyt191 python_version: '3.6' pytorch_version: 1.9.1 + - binary_linux_conda: + context: DOCKERHUB_TOKEN + cu_version: cu102 + name: linux_conda_py36_cu102_pyt1100 + python_version: '3.6' + pytorch_version: 1.10.0 + - binary_linux_conda: + conda_docker_image: pytorch/conda-builder:cuda113 + context: DOCKERHUB_TOKEN + cu_version: cu113 + name: linux_conda_py36_cu113_pyt1100 + python_version: '3.6' + pytorch_version: 1.10.0 - binary_linux_conda: context: DOCKERHUB_TOKEN cu_version: cu92 @@ -458,6 +461,19 @@ workflows: name: linux_conda_py37_cu111_pyt191 python_version: '3.7' pytorch_version: 1.9.1 + - binary_linux_conda: + context: DOCKERHUB_TOKEN + cu_version: cu102 + name: linux_conda_py37_cu102_pyt1100 + python_version: '3.7' + pytorch_version: 1.10.0 + - binary_linux_conda: + conda_docker_image: pytorch/conda-builder:cuda113 + context: DOCKERHUB_TOKEN + cu_version: cu113 + name: linux_conda_py37_cu113_pyt1100 + python_version: '3.7' + pytorch_version: 1.10.0 - binary_linux_conda: context: DOCKERHUB_TOKEN cu_version: cu92 @@ -572,6 +588,19 @@ workflows: name: linux_conda_py38_cu111_pyt191 python_version: '3.8' pytorch_version: 1.9.1 + - binary_linux_conda: + context: DOCKERHUB_TOKEN + cu_version: cu102 + name: linux_conda_py38_cu102_pyt1100 + python_version: '3.8' + pytorch_version: 1.10.0 + - binary_linux_conda: + conda_docker_image: pytorch/conda-builder:cuda113 + context: DOCKERHUB_TOKEN + cu_version: cu113 + name: linux_conda_py38_cu113_pyt1100 + python_version: '3.8' + pytorch_version: 1.10.0 - binary_linux_conda: context: DOCKERHUB_TOKEN cu_version: cu101 @@ -650,24 +679,25 @@ workflows: name: linux_conda_py39_cu111_pyt191 python_version: '3.9' pytorch_version: 1.9.1 - - binary_linux_conda_cuda: - name: testrun_conda_cuda_py37_cu102_pyt190 + - binary_linux_conda: context: DOCKERHUB_TOKEN - python_version: "3.7" - pytorch_version: '1.9.0' - cu_version: "cu102" + cu_version: cu102 + name: linux_conda_py39_cu102_pyt1100 + python_version: '3.9' + pytorch_version: 1.10.0 + - binary_linux_conda: + conda_docker_image: pytorch/conda-builder:cuda113 + context: DOCKERHUB_TOKEN + cu_version: cu113 + name: linux_conda_py39_cu113_pyt1100 + python_version: '3.9' + pytorch_version: 1.10.0 - binary_linux_conda_cuda: - name: testrun_conda_cuda_py37_cu110_pyt170 + name: testrun_conda_cuda_py37_cu102_pyt170 context: DOCKERHUB_TOKEN python_version: "3.7" pytorch_version: '1.7.0' - cu_version: "cu110" - - binary_linux_conda_cuda: - name: testrun_conda_cuda_py39_cu111_pyt181 - context: DOCKERHUB_TOKEN - python_version: "3.9" - pytorch_version: '1.8.1' - cu_version: "cu111" + cu_version: "cu102" - binary_macos_wheel: cu_version: cpu name: macos_wheel_py36_cpu diff --git a/.circleci/regenerate.py b/.circleci/regenerate.py index 446ec84c..d727c882 100755 --- a/.circleci/regenerate.py +++ b/.circleci/regenerate.py @@ -26,9 +26,16 @@ CONDA_CUDA_VERSIONS = { "1.8.1": ["cu101", "cu102", "cu111"], "1.9.0": ["cu102", "cu111"], "1.9.1": ["cu102", "cu111"], + "1.10.0": ["cu102", "cu113"], } +def conda_docker_image_for_cuda(cuda_version): + if cuda_version == "cu113": + return "pytorch/conda-builder:cuda113" + return None + + def pytorch_versions_for_python(python_version): if python_version in ["3.6", "3.7", "3.8"]: return list(CONDA_CUDA_VERSIONS) @@ -113,6 +120,10 @@ def generate_base_workflow( "context": "DOCKERHUB_TOKEN", } + conda_docker_image = conda_docker_image_for_cuda(cu_version) + if conda_docker_image is not None: + d["conda_docker_image"] = conda_docker_image + if filter_branch is not None: d["filters"] = {"branches": {"only": filter_branch}} diff --git a/packaging/linux_wheels/inside.sh b/packaging/linux_wheels/inside.sh index 8d9d2531..b3f30096 100644 --- a/packaging/linux_wheels/inside.sh +++ b/packaging/linux_wheels/inside.sh @@ -58,6 +58,16 @@ do for cu_version in ${CONDA_CUDA_VERSIONS[$pytorch_version]} do case "$cu_version" in + cu113) + export CUDA_HOME=/usr/local/cuda-11.3/ + export CUDA_TAG=11.3 + export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_86,code=sm_86 -gencode=arch=compute_50,code=compute_50" + ;; + cu112) + export CUDA_HOME=/usr/local/cuda-11.2/ + export CUDA_TAG=11.2 + export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_86,code=sm_86 -gencode=arch=compute_50,code=compute_50" + ;; cu111) export CUDA_HOME=/usr/local/cuda-11.1/ export CUDA_TAG=11.1 diff --git a/packaging/pkg_helpers.bash b/packaging/pkg_helpers.bash index 92f50be5..6aaa5cf7 100644 --- a/packaging/pkg_helpers.bash +++ b/packaging/pkg_helpers.bash @@ -51,6 +51,28 @@ setup_cuda() { # Now work out the CUDA settings case "$CU_VERSION" in + cu113) + if [[ "$OSTYPE" == "msys" ]]; then + export CUDA_HOME="C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v11.3" + else + export CUDA_HOME=/usr/local/cuda-11.3/ + fi + export FORCE_CUDA=1 + # Hard-coding gencode flags is temporary situation until + # https://github.com/pytorch/pytorch/pull/23408 lands + export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_86,code=sm_86 -gencode=arch=compute_50,code=compute_50" + ;; + cu112) + if [[ "$OSTYPE" == "msys" ]]; then + export CUDA_HOME="C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v11.2" + else + export CUDA_HOME=/usr/local/cuda-11.2/ + fi + export FORCE_CUDA=1 + # Hard-coding gencode flags is temporary situation until + # https://github.com/pytorch/pytorch/pull/23408 lands + export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_86,code=sm_86 -gencode=arch=compute_50,code=compute_50" + ;; cu111) if [[ "$OSTYPE" == "msys" ]]; then export CUDA_HOME="C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v11.1" @@ -267,9 +289,14 @@ setup_conda_cudatoolkit_constraint() { export CONDA_CUDATOOLKIT_CONSTRAINT="" else case "$CU_VERSION" in + cu113) + export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=11.3,<11.4 # [not osx]" + ;; + cu112) + export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=11.2,<11.3 # [not osx]" + ;; cu111) export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=11.1,<11.2 # [not osx]" - #export CONDA_CUB_CONSTRAINT="- nvidiacub" ;; cu110) export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=11.0,<11.1 # [not osx]"