builds for pytorch 1.10.0

Summary: Add builds corresponding to the new pytorch 1.10.0. We omit CUDA 11.3 testing because it fails with current hardware, and omit the main build too for the moment. Also move to the newer GPU circle CI executors. Reviewed By: gkioxari Differential Revision: D32335934 fbshipit-source-id: 416d92a8eecd06ef7fc742664a5f2d46f93415f8
2026-02-27 00:36:02 +08:00 · 2021-11-11 02:02:26 -08:00
parent 1836c786fe
commit 5fbdb99aec
5 changed files with 144 additions and 88 deletions
--- a/.circleci/config.in.yml
+++ b/.circleci/config.in.yml
@@ -18,20 +18,13 @@ setupcuda: &setupcuda
    working_directory: ~/
    command: |
      # download and install nvidia drivers, cuda, etc
-      wget --no-verbose --no-clobber -P ~/nvidia-downloads https://developer.download.nvidia.com/compute/cuda/11.2.2/local_installers/cuda_11.2.2_460.32.03_linux.run
-      sudo sh ~/nvidia-downloads/cuda_11.2.2_460.32.03_linux.run --silent
+      wget --no-verbose --no-clobber -P ~/nvidia-downloads https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.19.01_linux.run
+      sudo sh ~/nvidia-downloads/cuda_11.3.1_465.19.01_linux.run --silent
      echo "Done installing CUDA."
      pyenv versions
      nvidia-smi
      pyenv global 3.9.1

-gpu: &gpu
-  environment:
-    CUDA_VERSION: "10.2"
-  machine:
-    image: default
-  resource_class: gpu.medium # tesla m60
-
 binary_common: &binary_common
  parameters:
    # Edit these defaults to do a release`
@@ -54,42 +47,41 @@ binary_common: &binary_common
      description: "Wheel only: what docker image to use"
      type: string
      default: "pytorch/manylinux-cuda101"
+    conda_docker_image:
+      description: "what docker image to use for docker"
+      type: string
+      default: "pytorch/conda-cuda"
  environment:
    PYTHON_VERSION: << parameters.python_version >>
    BUILD_VERSION: << parameters.build_version >>
    PYTORCH_VERSION: << parameters.pytorch_version >>
    CU_VERSION: << parameters.cu_version >>
+    TESTRUN_DOCKER_IMAGE: << parameters.conda_docker_image >>

 jobs:
  main:
-    <<: *gpu
+    environment:
+      CUDA_VERSION: "11.3"
+    resource_class: gpu.nvidia.small.multi
    machine:
      image: ubuntu-2004:202101-01
    steps:
      - checkout
      - <<: *setupcuda
      - run: pip3 install --progress-bar off imageio wheel matplotlib 'pillow<7'
-      - run: pip3 install --progress-bar off torch torchvision
+      - run: pip3 install --progress-bar off torch==1.10.0+cu113 torchvision==0.11.1+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html
      # - run: conda create -p ~/conda_env python=3.7 numpy
      # - run: conda activate ~/conda_env
      # - run: conda install -c pytorch pytorch torchvision

      - run: pip3 install --progress-bar off 'git+https://github.com/facebookresearch/fvcore'
      - run: pip3 install --progress-bar off 'git+https://github.com/facebookresearch/iopath'
-      - run:
-          name: get cub
-          command: |
-            cd ..
-            wget --no-verbose https://github.com/NVIDIA/cub/archive/1.10.0.tar.gz
-            tar xzf 1.10.0.tar.gz
-            # This expands to a directory called cub-1.10.0
      - run:
          name: build
          command: |
-            export LD_LIBRARY_PATH=$LD_LIBARY_PATH:/usr/local/cuda-11.2/lib64
-            export CUB_HOME=$(realpath ../cub-1.10.0)
+            export LD_LIBRARY_PATH=$LD_LIBARY_PATH:/usr/local/cuda-11.3/lib64
            python3 setup.py build_ext --inplace
-      - run: LD_LIBRARY_PATH=$LD_LIBARY_PATH:/usr/local/cuda-11.2/lib64 python -m unittest discover -v -s tests
+      - run: LD_LIBRARY_PATH=$LD_LIBARY_PATH:/usr/local/cuda-11.3/lib64 python -m unittest discover -v -s tests
      - run: python3 setup.py bdist_wheel

  binary_linux_wheel:
@@ -113,7 +105,7 @@ jobs:
  binary_linux_conda:
    <<: *binary_common
    docker:
-      - image: "pytorch/conda-cuda"
+      - image: "<< parameters.conda_docker_image >>"
        auth:
          username: $DOCKERHUB_USERNAME
          password: $DOCKERHUB_TOKEN
@@ -137,7 +129,7 @@ jobs:
    <<: *binary_common
    machine:
      image: ubuntu-1604:201903-01
-    resource_class: gpu.medium
+    resource_class: gpu.nvidia.small.multi
    steps:
    - checkout
    - run:
@@ -189,9 +181,8 @@ jobs:

          { docker login -u="$DOCKERHUB_USERNAME" -p="$DOCKERHUB_TOKEN" ; } 2> /dev/null

-          DOCKER_IMAGE=pytorch/conda-cuda
-          echo Pulling docker image $DOCKER_IMAGE
-          docker pull $DOCKER_IMAGE
+          echo Pulling docker image $TESTRUN_DOCKER_IMAGE
+          docker pull $TESTRUN_DOCKER_IMAGE
    - run:
        name: Build and run tests
        no_output_timeout: 20m
@@ -200,11 +191,10 @@ jobs:

          cd ${HOME}/project/

-          DOCKER_IMAGE=pytorch/conda-cuda
          export JUST_TESTRUN=1
          VARS_TO_PASS="-e PYTHON_VERSION -e BUILD_VERSION -e PYTORCH_VERSION -e CU_VERSION -e JUST_TESTRUN"

-          docker run --gpus all  --ipc=host -v $(pwd):/remote -w /remote ${VARS_TO_PASS} ${DOCKER_IMAGE} ./packaging/build_conda.sh
+          docker run --gpus all  --ipc=host -v $(pwd):/remote -w /remote ${VARS_TO_PASS} ${TESTRUN_DOCKER_IMAGE} ./packaging/build_conda.sh

  binary_macos_wheel:
    <<: *binary_common
@@ -228,27 +218,15 @@ workflows:
  version: 2
  build_and_test:
    jobs:
-      - main:
-          context: DOCKERHUB_TOKEN
+      # - main:
+      #     context: DOCKERHUB_TOKEN
      {{workflows()}}
      - binary_linux_conda_cuda:
-          name: testrun_conda_cuda_py37_cu102_pyt190
-          context: DOCKERHUB_TOKEN
-          python_version: "3.7"
-          pytorch_version: '1.9.0'
-          cu_version: "cu102"
-      - binary_linux_conda_cuda:
-          name: testrun_conda_cuda_py37_cu110_pyt170
+          name: testrun_conda_cuda_py37_cu102_pyt170
          context: DOCKERHUB_TOKEN
          python_version: "3.7"
          pytorch_version: '1.7.0'
-          cu_version: "cu110"
-      - binary_linux_conda_cuda:
-          name: testrun_conda_cuda_py39_cu111_pyt181
-          context: DOCKERHUB_TOKEN
-          python_version: "3.9"
-          pytorch_version: '1.8.1'
-          cu_version: "cu111"
+          cu_version: "cu102"
      - binary_macos_wheel:
          cu_version: cpu
          name: macos_wheel_py36_cpu
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -18,20 +18,13 @@ setupcuda: &setupcuda
    working_directory: ~/
    command: |
      # download and install nvidia drivers, cuda, etc
-      wget --no-verbose --no-clobber -P ~/nvidia-downloads https://developer.download.nvidia.com/compute/cuda/11.2.2/local_installers/cuda_11.2.2_460.32.03_linux.run
-      sudo sh ~/nvidia-downloads/cuda_11.2.2_460.32.03_linux.run --silent
+      wget --no-verbose --no-clobber -P ~/nvidia-downloads https://developer.download.nvidia.com/compute/cuda/11.3.1/local_installers/cuda_11.3.1_465.19.01_linux.run
+      sudo sh ~/nvidia-downloads/cuda_11.3.1_465.19.01_linux.run --silent
      echo "Done installing CUDA."
      pyenv versions
      nvidia-smi
      pyenv global 3.9.1

-gpu: &gpu
-  environment:
-    CUDA_VERSION: "10.2"
-  machine:
-    image: default
-  resource_class: gpu.medium # tesla m60
-
 binary_common: &binary_common
  parameters:
    # Edit these defaults to do a release`
@@ -54,42 +47,41 @@ binary_common: &binary_common
      description: "Wheel only: what docker image to use"
      type: string
      default: "pytorch/manylinux-cuda101"
+    conda_docker_image:
+      description: "what docker image to use for docker"
+      type: string
+      default: "pytorch/conda-cuda"
  environment:
    PYTHON_VERSION: << parameters.python_version >>
    BUILD_VERSION: << parameters.build_version >>
    PYTORCH_VERSION: << parameters.pytorch_version >>
    CU_VERSION: << parameters.cu_version >>
+    TESTRUN_DOCKER_IMAGE: << parameters.conda_docker_image >>

 jobs:
  main:
-    <<: *gpu
+    environment:
+      CUDA_VERSION: "11.3"
+    resource_class: gpu.nvidia.small.multi
    machine:
      image: ubuntu-2004:202101-01
    steps:
      - checkout
      - <<: *setupcuda
      - run: pip3 install --progress-bar off imageio wheel matplotlib 'pillow<7'
-      - run: pip3 install --progress-bar off torch torchvision
+      - run: pip3 install --progress-bar off torch==1.10.0+cu113 torchvision==0.11.1+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html
      # - run: conda create -p ~/conda_env python=3.7 numpy
      # - run: conda activate ~/conda_env
      # - run: conda install -c pytorch pytorch torchvision

      - run: pip3 install --progress-bar off 'git+https://github.com/facebookresearch/fvcore'
      - run: pip3 install --progress-bar off 'git+https://github.com/facebookresearch/iopath'
-      - run:
-          name: get cub
-          command: |
-            cd ..
-            wget --no-verbose https://github.com/NVIDIA/cub/archive/1.10.0.tar.gz
-            tar xzf 1.10.0.tar.gz
-            # This expands to a directory called cub-1.10.0
      - run:
          name: build
          command: |
-            export LD_LIBRARY_PATH=$LD_LIBARY_PATH:/usr/local/cuda-11.2/lib64
-            export CUB_HOME=$(realpath ../cub-1.10.0)
+            export LD_LIBRARY_PATH=$LD_LIBARY_PATH:/usr/local/cuda-11.3/lib64
            python3 setup.py build_ext --inplace
-      - run: LD_LIBRARY_PATH=$LD_LIBARY_PATH:/usr/local/cuda-11.2/lib64 python -m unittest discover -v -s tests
+      - run: LD_LIBRARY_PATH=$LD_LIBARY_PATH:/usr/local/cuda-11.3/lib64 python -m unittest discover -v -s tests
      - run: python3 setup.py bdist_wheel

  binary_linux_wheel:
@@ -113,7 +105,7 @@ jobs:
  binary_linux_conda:
    <<: *binary_common
    docker:
-      - image: "pytorch/conda-cuda"
+      - image: "<< parameters.conda_docker_image >>"
        auth:
          username: $DOCKERHUB_USERNAME
          password: $DOCKERHUB_TOKEN
@@ -137,7 +129,7 @@ jobs:
    <<: *binary_common
    machine:
      image: ubuntu-1604:201903-01
-    resource_class: gpu.medium
+    resource_class: gpu.nvidia.small.multi
    steps:
    - checkout
    - run:
@@ -189,9 +181,8 @@ jobs:

          { docker login -u="$DOCKERHUB_USERNAME" -p="$DOCKERHUB_TOKEN" ; } 2> /dev/null

-          DOCKER_IMAGE=pytorch/conda-cuda
-          echo Pulling docker image $DOCKER_IMAGE
-          docker pull $DOCKER_IMAGE
+          echo Pulling docker image $TESTRUN_DOCKER_IMAGE
+          docker pull $TESTRUN_DOCKER_IMAGE
    - run:
        name: Build and run tests
        no_output_timeout: 20m
@@ -200,11 +191,10 @@ jobs:

          cd ${HOME}/project/

-          DOCKER_IMAGE=pytorch/conda-cuda
          export JUST_TESTRUN=1
          VARS_TO_PASS="-e PYTHON_VERSION -e BUILD_VERSION -e PYTORCH_VERSION -e CU_VERSION -e JUST_TESTRUN"

-          docker run --gpus all  --ipc=host -v $(pwd):/remote -w /remote ${VARS_TO_PASS} ${DOCKER_IMAGE} ./packaging/build_conda.sh
+          docker run --gpus all  --ipc=host -v $(pwd):/remote -w /remote ${VARS_TO_PASS} ${TESTRUN_DOCKER_IMAGE} ./packaging/build_conda.sh

  binary_macos_wheel:
    <<: *binary_common
@@ -228,8 +218,8 @@ workflows:
  version: 2
  build_and_test:
    jobs:
-      - main:
-          context: DOCKERHUB_TOKEN
+      # - main:
+      #     context: DOCKERHUB_TOKEN
      - binary_linux_conda:
          context: DOCKERHUB_TOKEN
          cu_version: cu92
@@ -344,6 +334,19 @@ workflows:
          name: linux_conda_py36_cu111_pyt191
          python_version: '3.6'
          pytorch_version: 1.9.1
+      - binary_linux_conda:
+          context: DOCKERHUB_TOKEN
+          cu_version: cu102
+          name: linux_conda_py36_cu102_pyt1100
+          python_version: '3.6'
+          pytorch_version: 1.10.0
+      - binary_linux_conda:
+          conda_docker_image: pytorch/conda-builder:cuda113
+          context: DOCKERHUB_TOKEN
+          cu_version: cu113
+          name: linux_conda_py36_cu113_pyt1100
+          python_version: '3.6'
+          pytorch_version: 1.10.0
      - binary_linux_conda:
          context: DOCKERHUB_TOKEN
          cu_version: cu92
@@ -458,6 +461,19 @@ workflows:
          name: linux_conda_py37_cu111_pyt191
          python_version: '3.7'
          pytorch_version: 1.9.1
+      - binary_linux_conda:
+          context: DOCKERHUB_TOKEN
+          cu_version: cu102
+          name: linux_conda_py37_cu102_pyt1100
+          python_version: '3.7'
+          pytorch_version: 1.10.0
+      - binary_linux_conda:
+          conda_docker_image: pytorch/conda-builder:cuda113
+          context: DOCKERHUB_TOKEN
+          cu_version: cu113
+          name: linux_conda_py37_cu113_pyt1100
+          python_version: '3.7'
+          pytorch_version: 1.10.0
      - binary_linux_conda:
          context: DOCKERHUB_TOKEN
          cu_version: cu92
@@ -572,6 +588,19 @@ workflows:
          name: linux_conda_py38_cu111_pyt191
          python_version: '3.8'
          pytorch_version: 1.9.1
+      - binary_linux_conda:
+          context: DOCKERHUB_TOKEN
+          cu_version: cu102
+          name: linux_conda_py38_cu102_pyt1100
+          python_version: '3.8'
+          pytorch_version: 1.10.0
+      - binary_linux_conda:
+          conda_docker_image: pytorch/conda-builder:cuda113
+          context: DOCKERHUB_TOKEN
+          cu_version: cu113
+          name: linux_conda_py38_cu113_pyt1100
+          python_version: '3.8'
+          pytorch_version: 1.10.0
      - binary_linux_conda:
          context: DOCKERHUB_TOKEN
          cu_version: cu101
@@ -650,24 +679,25 @@ workflows:
          name: linux_conda_py39_cu111_pyt191
          python_version: '3.9'
          pytorch_version: 1.9.1
-      - binary_linux_conda_cuda:
-          name: testrun_conda_cuda_py37_cu102_pyt190
+      - binary_linux_conda:
          context: DOCKERHUB_TOKEN
-          python_version: "3.7"
-          pytorch_version: '1.9.0'
-          cu_version: "cu102"
+          cu_version: cu102
+          name: linux_conda_py39_cu102_pyt1100
+          python_version: '3.9'
+          pytorch_version: 1.10.0
+      - binary_linux_conda:
+          conda_docker_image: pytorch/conda-builder:cuda113
+          context: DOCKERHUB_TOKEN
+          cu_version: cu113
+          name: linux_conda_py39_cu113_pyt1100
+          python_version: '3.9'
+          pytorch_version: 1.10.0
      - binary_linux_conda_cuda:
-          name: testrun_conda_cuda_py37_cu110_pyt170
+          name: testrun_conda_cuda_py37_cu102_pyt170
          context: DOCKERHUB_TOKEN
          python_version: "3.7"
          pytorch_version: '1.7.0'
-          cu_version: "cu110"
-      - binary_linux_conda_cuda:
-          name: testrun_conda_cuda_py39_cu111_pyt181
-          context: DOCKERHUB_TOKEN
-          python_version: "3.9"
-          pytorch_version: '1.8.1'
-          cu_version: "cu111"
+          cu_version: "cu102"
      - binary_macos_wheel:
          cu_version: cpu
          name: macos_wheel_py36_cpu
--- a/.circleci/regenerate.py
+++ b/.circleci/regenerate.py
@@ -26,9 +26,16 @@ CONDA_CUDA_VERSIONS = {
    "1.8.1": ["cu101", "cu102", "cu111"],
    "1.9.0": ["cu102", "cu111"],
    "1.9.1": ["cu102", "cu111"],
+    "1.10.0": ["cu102", "cu113"],
 }


+def conda_docker_image_for_cuda(cuda_version):
+    if cuda_version == "cu113":
+        return "pytorch/conda-builder:cuda113"
+    return None
+
+
 def pytorch_versions_for_python(python_version):
    if python_version in ["3.6", "3.7", "3.8"]:
        return list(CONDA_CUDA_VERSIONS)
@@ -113,6 +120,10 @@ def generate_base_workflow(
        "context": "DOCKERHUB_TOKEN",
    }

+    conda_docker_image = conda_docker_image_for_cuda(cu_version)
+    if conda_docker_image is not None:
+        d["conda_docker_image"] = conda_docker_image
+
    if filter_branch is not None:
        d["filters"] = {"branches": {"only": filter_branch}}

--- a/packaging/linux_wheels/inside.sh
+++ b/packaging/linux_wheels/inside.sh
@@ -58,6 +58,16 @@ do
        for cu_version in ${CONDA_CUDA_VERSIONS[$pytorch_version]}
        do
            case "$cu_version" in
+                cu113)
+                    export CUDA_HOME=/usr/local/cuda-11.3/
+                    export CUDA_TAG=11.3
+                    export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_86,code=sm_86 -gencode=arch=compute_50,code=compute_50"
+                ;;
+                cu112)
+                    export CUDA_HOME=/usr/local/cuda-11.2/
+                    export CUDA_TAG=11.2
+                    export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_86,code=sm_86 -gencode=arch=compute_50,code=compute_50"
+                ;;
                cu111)
                    export CUDA_HOME=/usr/local/cuda-11.1/
                    export CUDA_TAG=11.1
--- a/packaging/pkg_helpers.bash
+++ b/packaging/pkg_helpers.bash
@@ -51,6 +51,28 @@ setup_cuda() {

  # Now work out the CUDA settings
  case "$CU_VERSION" in
+    cu113)
+      if [[ "$OSTYPE" == "msys" ]]; then
+        export CUDA_HOME="C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v11.3"
+      else
+        export CUDA_HOME=/usr/local/cuda-11.3/
+      fi
+      export FORCE_CUDA=1
+      # Hard-coding gencode flags is temporary situation until
+      # https://github.com/pytorch/pytorch/pull/23408 lands
+      export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_86,code=sm_86 -gencode=arch=compute_50,code=compute_50"
+      ;;
+    cu112)
+      if [[ "$OSTYPE" == "msys" ]]; then
+        export CUDA_HOME="C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v11.2"
+      else
+        export CUDA_HOME=/usr/local/cuda-11.2/
+      fi
+      export FORCE_CUDA=1
+      # Hard-coding gencode flags is temporary situation until
+      # https://github.com/pytorch/pytorch/pull/23408 lands
+      export NVCC_FLAGS="-gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=sm_80 -gencode=arch=compute_86,code=sm_86 -gencode=arch=compute_50,code=compute_50"
+      ;;
    cu111)
      if [[ "$OSTYPE" == "msys" ]]; then
        export CUDA_HOME="C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v11.1"
@@ -267,9 +289,14 @@ setup_conda_cudatoolkit_constraint() {
    export CONDA_CUDATOOLKIT_CONSTRAINT=""
  else
    case "$CU_VERSION" in
+      cu113)
+        export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=11.3,<11.4 # [not osx]"
+        ;;
+      cu112)
+        export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=11.2,<11.3 # [not osx]"
+        ;;
      cu111)
        export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=11.1,<11.2 # [not osx]"
-        #export CONDA_CUB_CONSTRAINT="- nvidiacub"
        ;;
      cu110)
        export CONDA_CUDATOOLKIT_CONSTRAINT="- cudatoolkit >=11.0,<11.1 # [not osx]"