builds for pytorch 1.10.0

Summary:
Add builds corresponding to the new pytorch 1.10.0. We omit CUDA 11.3 testing because it fails with current hardware, and omit the main build too for the moment.

Also move to the newer GPU circle CI executors.

Reviewed By: gkioxari

Differential Revision: D32335934

fbshipit-source-id: 416d92a8eecd06ef7fc742664a5f2d46f93415f8
This commit is contained in:
Jeremy Reizenstein 2021-11-11 02:02:26 -08:00 committed by Facebook GitHub Bot
parent 1836c786fe
commit 5fbdb99aec
5 changed files with 144 additions and 88 deletions

View File

@ -18,20 +18,13 @@ setupcuda: &setupcuda
working_directory: ~/
command: |
# download and install nvidia drivers, cuda, etc
wget --no-verbose --no-clobber -P ~/nvidia-downloads https://developer.download.nvidia.com/compute/cuda/11.2.2/local_installers/cuda_11.2.2_460.32.03_linux.run
sudo sh ~/nvidia-downloads/cuda_11.2.2_460.32.03_linux.run --silent
wget --no-verbose --no-clobber -P ~/nvidia-downloads https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.19.01_linux.run
sudo sh ~/nvidia-downloads/cuda_11.3.1_465.19.01_linux.run --silent
echo "Done installing CUDA."
pyenv versions
nvidia-smi
pyenv global 3.9.1
gpu: &gpu
environment:
CUDA_VERSION: "10.2"
machine:
image: default
resource_class: gpu.medium # tesla m60
binary_common: &binary_common
parameters:
# Edit these defaults to do a release`
@ -54,42 +47,41 @@ binary_common: &binary_common
description: "Wheel only: what docker image to use"
type: string
default: "pytorch/manylinux-cuda101"
conda_docker_image:
description: "what docker image to use for docker"
type: string
default: "pytorch/conda-cuda"
environment:
PYTHON_VERSION: << parameters.python_version >>
BUILD_VERSION: << parameters.build_version >>
PYTORCH_VERSION: << parameters.pytorch_version >>
CU_VERSION: << parameters.cu_version >>
TESTRUN_DOCKER_IMAGE: << parameters.conda_docker_image >>
jobs:
main:
<<: *gpu
environment:
CUDA_VERSION: "11.3"
resource_class: gpu.nvidia.small.multi
machine:
image: ubuntu-2004:202101-01
steps:
- checkout
- <<: *setupcuda
- run: pip3 install --progress-bar off imageio wheel matplotlib 'pillow<7'
- run: pip3 install --progress-bar off torch torchvision
- run: pip3 install --progress-bar off torch==1.10.0+cu113 torchvision==0.11.1+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html
# - run: conda create -p ~/conda_env python=3.7 numpy
# - run: conda activate ~/conda_env
# - run: conda install -c pytorch pytorch torchvision
- run: pip3 install --progress-bar off 'git+https://github.com/facebookresearch/fvcore'
- run: pip3 install --progress-bar off 'git+https://github.com/facebookresearch/iopath'
- run:
name: get cub
command: |
cd ..
wget --no-verbose https://github.com/NVIDIA/cub/archive/1.10.0.tar.gz
tar xzf 1.10.0.tar.gz
# This expands to a directory called cub-1.10.0
- run:
name: build
command: |
export LD_LIBRARY_PATH=$LD_LIBARY_PATH:/usr/local/cuda-11.2/lib64
export CUB_HOME=$(realpath ../cub-1.10.0)
export LD_LIBRARY_PATH=$LD_LIBARY_PATH:/usr/local/cuda-11.3/lib64
python3 setup.py build_ext --inplace
- run: LD_LIBRARY_PATH=$LD_LIBARY_PATH:/usr/local/cuda-11.2/lib64 python -m unittest discover -v -s tests
- run: LD_LIBRARY_PATH=$LD_LIBARY_PATH:/usr/local/cuda-11.3/lib64 python -m unittest discover -v -s tests
- run: python3 setup.py bdist_wheel
binary_linux_wheel:
@ -113,7 +105,7 @@ jobs:
binary_linux_conda:
<<: *binary_common
docker:
- image: "pytorch/conda-cuda"
- image: "<< parameters.conda_docker_image >>"
auth:
username: $DOCKERHUB_USERNAME
password: $DOCKERHUB_TOKEN
@ -137,7 +129,7 @@ jobs:
<<: *binary_common
machine:
image: ubuntu-1604:201903-01
resource_class: gpu.medium
resource_class: gpu.nvidia.small.multi
steps:
- checkout
- run:
@ -189,9 +181,8 @@ jobs:
{ docker login -u="$DOCKERHUB_USERNAME" -p="$DOCKERHUB_TOKEN" ; } 2> /dev/null
DOCKER_IMAGE=pytorch/conda-cuda
echo Pulling docker image $DOCKER_IMAGE
docker pull $DOCKER_IMAGE
echo Pulling docker image $TESTRUN_DOCKER_IMAGE
docker pull $TESTRUN_DOCKER_IMAGE
- run:
name: Build and run tests
no_output_timeout: 20m
@ -200,11 +191,10 @@ jobs:
cd ${HOME}/project/
DOCKER_IMAGE=pytorch/conda-cuda
export JUST_TESTRUN=1
VARS_TO_PASS="-e PYTHON_VERSION -e BUILD_VERSION -e PYTORCH_VERSION -e CU_VERSION -e JUST_TESTRUN"
docker run --gpus all --ipc=host -v $(pwd):/remote -w /remote ${VARS_TO_PASS} ${DOCKER_IMAGE} ./packaging/build_conda.sh
docker run --gpus all --ipc=host -v $(pwd):/remote -w /remote ${VARS_TO_PASS} ${TESTRUN_DOCKER_IMAGE} ./packaging/build_conda.sh
binary_macos_wheel:
<<: *binary_common
@ -228,27 +218,15 @@ workflows:
version: 2
build_and_test:
jobs:
- main:
context: DOCKERHUB_TOKEN
# - main:
# context: DOCKERHUB_TOKEN
{{workflows()}}
- binary_linux_conda_cuda:
name: testrun_conda_cuda_py37_cu102_pyt190
context: DOCKERHUB_TOKEN
python_version: "3.7"
pytorch_version: '1.9.0'
cu_version: "cu102"
- binary_linux_conda_cuda:
name: testrun_conda_cuda_py37_cu110_pyt170
name: testrun_conda_cuda_py37_cu102_pyt170
context: DOCKERHUB_TOKEN
python_version: "3.7"
pytorch_version: '1.7.0'
cu_version: "cu110"
- binary_linux_conda_cuda:
name: testrun_conda_cuda_py39_cu111_pyt181
context: DOCKERHUB_TOKEN
python_version: "3.9"
pytorch_version: '1.8.1'
cu_version: "cu111"
cu_version: "cu102"
- binary_macos_wheel:
cu_version: cpu
name: macos_wheel_py36_cpu

View File

@ -18,20 +18,13 @@ setupcuda: &setupcuda
working_directory: ~/
command: |
# download and install nvidia drivers, cuda, etc
wget --no-verbose --no-clobber -P ~/nvidia-downloads https://developer.download.nvidia.com/compute/cuda/11.2.2/local_installers/cuda_11.2.2_460.32.03_linux.run
sudo sh ~/nvidia-downloads/cuda_11.2.2_460.32.03_linux.run --silent
wget --no-verbose --no-clobber -P ~/nvidia-downloads https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.19.01_linux.run
sudo sh ~/nvidia-downloads/cuda_11.3.1_465.19.01_linux.run --silent
echo "Done installing CUDA."
pyenv versions
nvidia-smi
pyenv global 3.9.1
gpu: &gpu
environment:
CUDA_VERSION: "10.2"
machine:
image: default
resource_class: gpu.medium # tesla m60
binary_common: &binary_common
parameters:
# Edit these defaults to do a release`
@ -54,42 +47,41 @@ binary_common: &binary_common
description: "Wheel only: what docker image to use"
type: string
default: "pytorch/manylinux-cuda101"
conda_docker_image:
description: "what docker image to use for docker"
type: string
default: "pytorch/conda-cuda"
environment:
PYTHON_VERSION: << parameters.python_version >>
BUILD_VERSION: << parameters.build_version >>
PYTORCH_VERSION: << parameters.pytorch_version >>
CU_VERSION: << parameters.cu_version >>
TESTRUN_DOCKER_IMAGE: << parameters.conda_docker_image >>
jobs:
main:
<<: *gpu
environment:
CUDA_VERSION: "11.3"
resource_class: gpu.nvidia.small.multi
machine:
image: ubuntu-2004:202101-01
steps:
- checkout
- <<: *setupcuda
- run: pip3 install --progress-bar off imageio wheel matplotlib 'pillow<7'
- run: pip3 install --progress-bar off torch torchvision
- run: pip3 install --progress-bar off torch==1.10.0+cu113 torchvision==0.11.1+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html
# - run: conda create -p ~/conda_env python=3.7 numpy
# - run: conda activate ~/conda_env
# - run: conda install -c pytorch pytorch torchvision
- run: pip3 install --progress-bar off 'git+https://github.com/facebookresearch/fvcore'
- run: pip3 install --progress-bar off 'git+https://github.com/facebookresearch/iopath'
- run:
name: get cub
command: |
cd ..
wget --no-verbose https://github.com/NVIDIA/cub/archive/1.10.0.tar.gz
tar xzf 1.10.0.tar.gz
# This expands to a directory called cub-1.10.0
- run:
name: build
command: |
export LD_LIBRARY_PATH=$LD_LIBARY_PATH:/usr/local/cuda-11.2/lib64
export CUB_HOME=$(realpath ../cub-1.10.0)
export LD_LIBRARY_PATH=$LD_LIBARY_PATH:/usr/local/cuda-11.3/lib64
python3 setup.py build_ext --inplace
- run: LD_LIBRARY_PATH=$LD_LIBARY_PATH:/usr/local/cuda-11.2/lib64 python -m unittest discover -v -s tests
- run: LD_LIBRARY_PATH=$LD_LIBARY_PATH:/usr/local/cuda-11.3/lib64 python -m unittest discover -v -s tests
- run: python3 setup.py bdist_wheel
binary_linux_wheel:
@ -113,7 +105,7 @@ jobs:
binary_linux_conda:
<<: *binary_common
docker:
- image: "pytorch/conda-cuda"
- image: "<< parameters.conda_docker_image >>"
auth:
username: $DOCKERHUB_USERNAME
password: $DOCKERHUB_TOKEN
@ -137,7 +129,7 @@ jobs:
<<: *binary_common
machine:
image: ubuntu-1604:201903-01
resource_class: gpu.medium
resource_class: gpu.nvidia.small.multi
steps:
- checkout
- run:
@ -189,9 +181,8 @@ jobs:
{ docker login -u="$DOCKERHUB_USERNAME" -p="$DOCKERHUB_TOKEN" ; } 2> /dev/null
DOCKER_IMAGE=pytorch/conda-cuda
echo Pulling docker image $DOCKER_IMAGE
docker pull $DOCKER_IMAGE
echo Pulling docker image $TESTRUN_DOCKER_IMAGE
docker pull $TESTRUN_DOCKER_IMAGE
- run:
name: Build and run tests
no_output_timeout: 20m
@ -200,11 +191,10 @@ jobs:
cd ${HOME}/project/
DOCKER_IMAGE=pytorch/conda-cuda
export JUST_TESTRUN=1
VARS_TO_PASS="-e PYTHON_VERSION -e BUILD_VERSION -e PYTORCH_VERSION -e CU_VERSION -e JUST_TESTRUN"
docker run --gpus all --ipc=host -v $(pwd):/remote -w /remote ${VARS_TO_PASS} ${DOCKER_IMAGE} ./packaging/build_conda.sh
docker run --gpus all --ipc=host -v $(pwd):/remote -w /remote ${VARS_TO_PASS} ${TESTRUN_DOCKER_IMAGE} ./packaging/build_conda.sh
binary_macos_wheel:
<<: *binary_common
@ -228,8 +218,8 @@ workflows:
version: 2
build_and_test:
jobs:
- main:
context: DOCKERHUB_TOKEN
# - main:
# context: DOCKERHUB_TOKEN
- binary_linux_conda:
context: DOCKERHUB_TOKEN
cu_version: cu92
@ -344,6 +334,19 @@ workflows:
name: linux_conda_py36_cu111_pyt191
python_version: '3.6'
pytorch_version: 1.9.1
- binary_linux_conda:
context: DOCKERHUB_TOKEN
cu_version: cu102
name: linux_conda_py36_cu102_pyt1100
python_version: '3.6'
pytorch_version: 1.10.0
- binary_linux_conda:
conda_docker_image: pytorch/conda-builder:cuda113
context: DOCKERHUB_TOKEN
cu_version: cu113
name: linux_conda_py36_cu113_pyt1100
python_version: '3.6'
pytorch_version: 1.10.0
- binary_linux_conda:
context: DOCKERHUB_TOKEN
cu_version: cu92
@ -458,6 +461,19 @@ workflows:
name: linux_conda_py37_cu111_pyt191
python_version: '3.7'
pytorch_version: 1.9.1
- binary_linux_conda:
context: DOCKERHUB_TOKEN
cu_version: cu102
name: linux_conda_py37_cu102_pyt1100
python_version: '3.7'
pytorch_version: 1.10.0
- binary_linux_conda:
conda_docker_image: pytorch/conda-builder:cuda113
context: DOCKERHUB_TOKEN
cu_version: cu113
name: linux_conda_py37_cu113_pyt1100
python_version: '3.7'
pytorch_version: 1.10.0
- binary_linux_conda:
context: DOCKERHUB_TOKEN
cu_version: cu92
@ -572,6 +588,19 @@ workflows:
name: linux_conda_py38_cu111_pyt191
python_version: '3.8'
pytorch_version: 1.9.1
- binary_linux_conda:
context: DOCKERHUB_TOKEN
cu_version: cu102
name: linux_conda_py38_cu102_pyt1100
python_version: '3.8'
pytorch_version: 1.10.0
- binary_linux_conda:
conda_docker_image: pytorch/conda-builder:cuda113
context: DOCKERHUB_TOKEN
cu_version: cu113
name: linux_conda_py38_cu113_pyt1100
python_version: '3.8'
pytorch_version: 1.10.0
- binary_linux_conda:
context: DOCKERHUB_TOKEN
cu_version: cu101
@ -650,24 +679,25 @@ workflows:
name: linux_conda_py39_cu111_pyt191
python_version: '3.9'
pytorch_version: 1.9.1
- binary_linux_conda_cuda:
name: testrun_conda_cuda_py37_cu102_pyt190
- binary_linux_conda:
context: DOCKERHUB_TOKEN
python_version: "3.7"
pytorch_version: '1.9.0'
cu_version: "cu102"
cu_version: cu102
name: linux_conda_py39_cu102_pyt1100
python_version: '3.9'
pytorch_version: 1.10.0
- binary_linux_conda:
conda_docker_image: pytorch/conda-builder:cuda113
context: DOCKERHUB_TOKEN
cu_version: cu113
name: linux_conda_py39_cu113_pyt1100
python_version: '3.9'
pytorch_version: 1.10.0
- binary_linux_conda_cuda:
name: testrun_conda_cuda_py37_cu110_pyt170
name: testrun_conda_cuda_py37_cu102_pyt170
context: DOCKERHUB_TOKEN
python_version: "3.7"
pytorch_version: '1.7.0'
cu_version: "cu110"
- binary_linux_conda_cuda:
name: testrun_conda_cuda_py39_cu111_pyt181
context: DOCKERHUB_TOKEN
python_version: "3.9"
pytorch_version: '1.8.1'
cu_version: "cu111"
cu_version: "cu102"
- binary_macos_wheel:
cu_version: cpu
name: macos_wheel_py36_cpu

View File

@ -26,9 +26,16 @@ CONDA_CUDA_VERSIONS = {
"1.8.1": ["cu101", "cu102", "cu111"],
"1.9.0": ["cu102", "cu111"],
"1.9.1": ["cu102", "cu111"],
"1.10.0": ["cu102", "cu113"],
}
def conda_docker_image_for_cuda(cuda_version):
if cuda_version == "cu113":
return "pytorch/conda-builder:cuda113"
return None
def pytorch_versions_for_python(python_version):
if python_version in ["3.6", "3.7", "3.8"]:
return list(CONDA_CUDA_VERSIONS)
@ -113,6 +120,10 @@ def generate_base_workflow(
"context": "DOCKERHUB_TOKEN",
}
conda_docker_image = conda_docker_image_for_cuda(cu_version)
if conda_docker_image is not None:
d["conda_docker_image"] = conda_docker_image
if filter_branch is not None:
d["filters"] = {"branches": {"only": filter_branch}}

View File

@ -58,6 +58,16 @@ do
for cu_version in ${CONDA_CUDA_VERSIONS[$pytorch_version]}
do
case "$cu_version" in
cu113)
export CUDA_HOME=/usr/local/cuda-11.3/
export CUDA_TAG=11.3
export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_86,code=sm_86 -gencode=arch=compute_50,code=compute_50"
;;
cu112)
export CUDA_HOME=/usr/local/cuda-11.2/
export CUDA_TAG=11.2
export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_86,code=sm_86 -gencode=arch=compute_50,code=compute_50"
;;
cu111)
export CUDA_HOME=/usr/local/cuda-11.1/
export CUDA_TAG=11.1

View File

@ -51,6 +51,28 @@ setup_cuda() {
# Now work out the CUDA settings
case "$CU_VERSION" in
cu113)
if [[ "$OSTYPE" == "msys" ]]; then
export CUDA_HOME="C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v11.3"
else
export CUDA_HOME=/usr/local/cuda-11.3/
fi
export FORCE_CUDA=1
# Hard-coding gencode flags is temporary situation until
# https://github.com/pytorch/pytorch/pull/23408 lands
export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_86,code=sm_86 -gencode=arch=compute_50,code=compute_50"
;;
cu112)
if [[ "$OSTYPE" == "msys" ]]; then
export CUDA_HOME="C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v11.2"
else
export CUDA_HOME=/usr/local/cuda-11.2/
fi
export FORCE_CUDA=1
# Hard-coding gencode flags is temporary situation until
# https://github.com/pytorch/pytorch/pull/23408 lands
export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_86,code=sm_86 -gencode=arch=compute_50,code=compute_50"
;;
cu111)
if [[ "$OSTYPE" == "msys" ]]; then
export CUDA_HOME="C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v11.1"
@ -267,9 +289,14 @@ setup_conda_cudatoolkit_constraint() {
export CONDA_CUDATOOLKIT_CONSTRAINT=""
else
case "$CU_VERSION" in
cu113)
export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=11.3,<11.4 # [not osx]"
;;
cu112)
export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=11.2,<11.3 # [not osx]"
;;
cu111)
export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=11.1,<11.2 # [not osx]"
#export CONDA_CUB_CONSTRAINT="- nvidiacub"
;;
cu110)
export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=11.0,<11.1 # [not osx]"