fbcode/vision/fair/pytorch3d/pytorch3d/transforms/rotation_conversions.py

Reviewed By: bottler Differential Revision: D93712828 fbshipit-source-id: 3465af450104bb1e5f491e3c0ee0259698cf8ceb
fbcode/vision/fair/pytorch3d/pytorch3d/renderer/mesh/textures.py
2026-06-21 06:28:53 +08:00 · 2026-02-22 07:53:20 -08:00 · 2026-02-22 07:13:45 -08:00 · 2026-02-22 07:13:09 -08:00 · 2026-02-22 07:08:02 -08:00 · 2026-02-22 07:06:34 -08:00
135 changed files with 682 additions and 478 deletions
--- a/dev/linter.sh
+++ b/dev/linter.sh
@@ -10,7 +10,7 @@
 DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 DIR=$(dirname "${DIR}")

-if [[ -f "${DIR}/TARGETS" ]]
+if [[ -f "${DIR}/BUCK" ]]
 then
  pyfmt "${DIR}"
 else
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -19,7 +19,6 @@
 #
 import os
 import sys
-
 import unittest.mock as mock

 from recommonmark.parser import CommonMarkParser
--- a/projects/implicitron_trainer/experiment.py
+++ b/projects/implicitron_trainer/experiment.py
@@ -48,22 +48,18 @@ The outputs of the experiment are saved and logged in multiple ways:
 import logging
 import os
 import warnings
-
 from dataclasses import field

 import hydra
-
 import torch
 from accelerate import Accelerator
 from omegaconf import DictConfig, OmegaConf
 from packaging import version
-
 from pytorch3d.implicitron.dataset.data_source import (
    DataSourceBase,
    ImplicitronDataSource,
 )
 from pytorch3d.implicitron.models.base_model import ImplicitronModelBase
-
 from pytorch3d.implicitron.models.renderer.multipass_ea import (
    MultiPassEmissionAbsorptionRenderer,
 )
--- a/projects/implicitron_trainer/impl/model_factory.py
+++ b/projects/implicitron_trainer/impl/model_factory.py
@@ -11,7 +11,6 @@ import os
 from typing import Optional

 import torch.optim
-
 from accelerate import Accelerator
 from pytorch3d.implicitron.models.base_model import ImplicitronModelBase
 from pytorch3d.implicitron.tools import model_io
--- a/projects/implicitron_trainer/impl/optimizer_factory.py
+++ b/projects/implicitron_trainer/impl/optimizer_factory.py
@@ -14,9 +14,7 @@ from dataclasses import field
 from typing import Any, Dict, List, Optional, Tuple

 import torch.optim
-
 from accelerate import Accelerator
-
 from pytorch3d.implicitron.models.base_model import ImplicitronModelBase
 from pytorch3d.implicitron.tools import model_io
 from pytorch3d.implicitron.tools.config import (
--- a/projects/implicitron_trainer/tests/test_experiment.py
+++ b/projects/implicitron_trainer/tests/test_experiment.py
@@ -12,7 +12,6 @@ import unittest
 from pathlib import Path

 import torch
-
 from hydra import compose, initialize_config_dir
 from omegaconf import OmegaConf
 from projects.implicitron_trainer.impl.optimizer_factory import (
--- a/pytorch3d/init.py
+++ b/pytorch3d/init.py
@@ -6,4 +6,4 @@

 # pyre-unsafe

-__version__ = "0.7.8"
+__version__ = "0.7.9"
--- a/pytorch3d/csrc/ball_query/ball_query.cu
+++ b/pytorch3d/csrc/ball_query/ball_query.cu
@@ -32,7 +32,9 @@ __global__ void BallQueryKernel(
    at::PackedTensorAccessor64<int64_t, 3, at::RestrictPtrTraits> idxs,
    at::PackedTensorAccessor64<scalar_t, 3, at::RestrictPtrTraits> dists,
    const int64_t K,
-    const float radius2) {
+    const float radius,
+    const float radius2,
+    const bool skip_points_outside_cube) {
  const int64_t N = p1.size(0);
  const int64_t chunks_per_cloud = (1 + (p1.size(1) - 1) / blockDim.x);
  const int64_t chunks_to_do = N * chunks_per_cloud;
@@ -51,7 +53,19 @@ __global__ void BallQueryKernel(
    // Iterate over points in p2 until desired count is reached or
    // all points have been considered
    for (int64_t j = 0, count = 0; j < lengths2[n] && count < K; ++j) {
-      // Calculate the distance between the points
+      if (skip_points_outside_cube) {
+        bool is_within_radius = true;
+        // Filter when any one coordinate is already outside the radius
+        for (int d = 0; is_within_radius && d < D; ++d) {
+          scalar_t abs_diff = fabs(p1[n][i][d] - p2[n][j][d]);
+          is_within_radius = (abs_diff <= radius);
+        }
+        if (!is_within_radius) {
+          continue;
+        }
+      }
+
+      // Else, calculate the distance between the points and compare
      scalar_t dist2 = 0.0;
      for (int d = 0; d < D; ++d) {
        scalar_t diff = p1[n][i][d] - p2[n][j][d];
@@ -77,7 +91,8 @@ std::tuple<at::Tensor, at::Tensor> BallQueryCuda(
    const at::Tensor& lengths1, // (N,)
    const at::Tensor& lengths2, // (N,)
    int K,
-    float radius) {
+    float radius,
+    bool skip_points_outside_cube) {
  // Check inputs are on the same device
  at::TensorArg p1_t{p1, "p1", 1}, p2_t{p2, "p2", 2},
      lengths1_t{lengths1, "lengths1", 3}, lengths2_t{lengths2, "lengths2", 4};
@@ -120,7 +135,9 @@ std::tuple<at::Tensor, at::Tensor> BallQueryCuda(
            idxs.packed_accessor64<int64_t, 3, at::RestrictPtrTraits>(),
            dists.packed_accessor64<float, 3, at::RestrictPtrTraits>(),
            K_64,
-            radius2);
+            radius,
+            radius2,
+            skip_points_outside_cube);
      }));

  AT_CUDA_CHECK(cudaGetLastError());
--- a/pytorch3d/csrc/ball_query/ball_query.h
+++ b/pytorch3d/csrc/ball_query/ball_query.h
@@ -25,6 +25,9 @@
 //      within the radius
 //    radius: the radius around each point within which the neighbors need to be
 //      located
+//    skip_points_outside_cube: If true, reduce multiplications of float values
+//      by not explicitly calculating distances to points that fall outside the
+//      D-cube with side length (2*radius) centered at each point in p1.
 //
 // Returns:
 //    p1_neighbor_idx: LongTensor of shape (N, P1, K), where
@@ -46,7 +49,8 @@ std::tuple<at::Tensor, at::Tensor> BallQueryCpu(
    const at::Tensor& lengths1,
    const at::Tensor& lengths2,
    const int K,
-    const float radius);
+    const float radius,
+    const bool skip_points_outside_cube);

 // CUDA implementation
 std::tuple<at::Tensor, at::Tensor> BallQueryCuda(
@@ -55,7 +59,8 @@ std::tuple<at::Tensor, at::Tensor> BallQueryCuda(
    const at::Tensor& lengths1,
    const at::Tensor& lengths2,
    const int K,
-    const float radius);
+    const float radius,
+    const bool skip_points_outside_cube);

 // Implementation which is exposed
 // Note: the backward pass reuses the KNearestNeighborBackward kernel
@@ -65,7 +70,8 @@ inline std::tuple<at::Tensor, at::Tensor> BallQuery(
    const at::Tensor& lengths1,
    const at::Tensor& lengths2,
    int K,
-    float radius) {
+    float radius,
+    bool skip_points_outside_cube) {
  if (p1.is_cuda() || p2.is_cuda()) {
 #ifdef WITH_CUDA
    CHECK_CUDA(p1);
@@ -76,16 +82,20 @@ inline std::tuple<at::Tensor, at::Tensor> BallQuery(
        lengths1.contiguous(),
        lengths2.contiguous(),
        K,
-        radius);
+        radius,
+        skip_points_outside_cube);
 #else
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
+  CHECK_CPU(p1);
+  CHECK_CPU(p2);
  return BallQueryCpu(
      p1.contiguous(),
      p2.contiguous(),
      lengths1.contiguous(),
      lengths2.contiguous(),
      K,
-      radius);
+      radius,
+      skip_points_outside_cube);
 }
--- a/pytorch3d/csrc/ball_query/ball_query_cpu.cpp
+++ b/pytorch3d/csrc/ball_query/ball_query_cpu.cpp
@@ -6,6 +6,7 @@
 * LICENSE file in the root directory of this source tree.
 */

+#include <math.h>
 #include <torch/extension.h>
 #include <tuple>

@@ -15,7 +16,8 @@ std::tuple<at::Tensor, at::Tensor> BallQueryCpu(
    const at::Tensor& lengths1,
    const at::Tensor& lengths2,
    int K,
-    float radius) {
+    float radius,
+    bool skip_points_outside_cube) {
  const int N = p1.size(0);
  const int P1 = p1.size(1);
  const int D = p1.size(2);
@@ -37,6 +39,16 @@ std::tuple<at::Tensor, at::Tensor> BallQueryCpu(
    const int64_t length2 = lengths2_a[n];
    for (int64_t i = 0; i < length1; ++i) {
      for (int64_t j = 0, count = 0; j < length2 && count < K; ++j) {
+        if (skip_points_outside_cube) {
+          bool is_within_radius = true;
+          for (int d = 0; is_within_radius && d < D; ++d) {
+            float abs_diff = fabs(p1_a[n][i][d] - p2_a[n][j][d]);
+            is_within_radius = (abs_diff <= radius);
+          }
+          if (!is_within_radius) {
+            continue;
+          }
+        }
        float dist2 = 0;
        for (int d = 0; d < D; ++d) {
          float diff = p1_a[n][i][d] - p2_a[n][j][d];
--- a/pytorch3d/csrc/blending/sigmoid_alpha_blend.h
+++ b/pytorch3d/csrc/blending/sigmoid_alpha_blend.h
@@ -98,6 +98,11 @@ at::Tensor SigmoidAlphaBlendBackward(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
+  CHECK_CPU(distances);
+  CHECK_CPU(pix_to_face);
+  CHECK_CPU(alphas);
+  CHECK_CPU(grad_alphas);
+
  return SigmoidAlphaBlendBackwardCpu(
      grad_alphas, alphas, distances, pix_to_face, sigma);
 }
--- a/pytorch3d/csrc/compositing/alpha_composite.h
+++ b/pytorch3d/csrc/compositing/alpha_composite.h
@@ -74,6 +74,9 @@ torch::Tensor alphaCompositeForward(
    AT_ERROR("Not compiled with GPU support");
 #endif
  } else {
+    CHECK_CPU(features);
+    CHECK_CPU(alphas);
+    CHECK_CPU(points_idx);
    return alphaCompositeCpuForward(features, alphas, points_idx);
  }
 }
@@ -101,6 +104,11 @@ std::tuple<torch::Tensor, torch::Tensor> alphaCompositeBackward(
    AT_ERROR("Not compiled with GPU support");
 #endif
  } else {
+    CHECK_CPU(grad_outputs);
+    CHECK_CPU(features);
+    CHECK_CPU(alphas);
+    CHECK_CPU(points_idx);
+
    return alphaCompositeCpuBackward(
        grad_outputs, features, alphas, points_idx);
  }
--- a/pytorch3d/csrc/compositing/norm_weighted_sum.h
+++ b/pytorch3d/csrc/compositing/norm_weighted_sum.h
@@ -73,6 +73,10 @@ torch::Tensor weightedSumNormForward(
    AT_ERROR("Not compiled with GPU support");
 #endif
  } else {
+    CHECK_CPU(features);
+    CHECK_CPU(alphas);
+    CHECK_CPU(points_idx);
+
    return weightedSumNormCpuForward(features, alphas, points_idx);
  }
 }
@@ -100,6 +104,11 @@ std::tuple<torch::Tensor, torch::Tensor> weightedSumNormBackward(
    AT_ERROR("Not compiled with GPU support");
 #endif
  } else {
+    CHECK_CPU(grad_outputs);
+    CHECK_CPU(features);
+    CHECK_CPU(alphas);
+    CHECK_CPU(points_idx);
+
    return weightedSumNormCpuBackward(
        grad_outputs, features, alphas, points_idx);
  }
--- a/pytorch3d/csrc/compositing/weighted_sum.h
+++ b/pytorch3d/csrc/compositing/weighted_sum.h
@@ -72,6 +72,9 @@ torch::Tensor weightedSumForward(
    AT_ERROR("Not compiled with GPU support");
 #endif
  } else {
+    CHECK_CPU(features);
+    CHECK_CPU(alphas);
+    CHECK_CPU(points_idx);
    return weightedSumCpuForward(features, alphas, points_idx);
  }
 }
@@ -98,6 +101,11 @@ std::tuple<torch::Tensor, torch::Tensor> weightedSumBackward(
    AT_ERROR("Not compiled with GPU support");
 #endif
  } else {
+    CHECK_CPU(grad_outputs);
+    CHECK_CPU(features);
+    CHECK_CPU(alphas);
+    CHECK_CPU(points_idx);
+
    return weightedSumCpuBackward(grad_outputs, features, alphas, points_idx);
  }
 }
--- a/pytorch3d/csrc/ext.cpp
+++ b/pytorch3d/csrc/ext.cpp
@@ -8,7 +8,6 @@

 // clang-format off
 #include "./pulsar/global.h" // Include before <torch/extension.h>.
-#include <torch/extension.h>
 // clang-format on
 #include "./pulsar/pytorch/renderer.h"
 #include "./pulsar/pytorch/tensor_util.h"
@@ -106,15 +105,16 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
  py::class_<
      pulsar::pytorch::Renderer,
      std::shared_ptr<pulsar::pytorch::Renderer>>(m, "PulsarRenderer")
-      .def(py::init<
-           const uint&,
-           const uint&,
-           const uint&,
-           const bool&,
-           const bool&,
-           const float&,
-           const uint&,
-           const uint&>())
+      .def(
+          py::init<
+              const uint&,
+              const uint&,
+              const uint&,
+              const bool&,
+              const bool&,
+              const float&,
+              const uint&,
+              const uint&>())
      .def(
          "__eq__",
          [](const pulsar::pytorch::Renderer& a,
--- a/pytorch3d/csrc/face_areas_normals/face_areas_normals.h
+++ b/pytorch3d/csrc/face_areas_normals/face_areas_normals.h
@@ -60,6 +60,8 @@ std::tuple<at::Tensor, at::Tensor> FaceAreasNormalsForward(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
+  CHECK_CPU(verts);
+  CHECK_CPU(faces);
  return FaceAreasNormalsForwardCpu(verts, faces);
 }

@@ -80,5 +82,9 @@ at::Tensor FaceAreasNormalsBackward(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
+  CHECK_CPU(grad_areas);
+  CHECK_CPU(grad_normals);
+  CHECK_CPU(verts);
+  CHECK_CPU(faces);
  return FaceAreasNormalsBackwardCpu(grad_areas, grad_normals, verts, faces);
 }
--- a/pytorch3d/csrc/gather_scatter/gather_scatter.h
+++ b/pytorch3d/csrc/gather_scatter/gather_scatter.h
@@ -53,5 +53,7 @@ at::Tensor GatherScatter(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
+  CHECK_CPU(input);
+  CHECK_CPU(edges);
  return GatherScatterCpu(input, edges, directed, backward);
 }
--- a/pytorch3d/csrc/interp_face_attrs/interp_face_attrs.h
+++ b/pytorch3d/csrc/interp_face_attrs/interp_face_attrs.h
@@ -57,6 +57,8 @@ at::Tensor InterpFaceAttrsForward(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
+  CHECK_CPU(face_attrs);
+  CHECK_CPU(barycentric_coords);
  return InterpFaceAttrsForwardCpu(pix_to_face, barycentric_coords, face_attrs);
 }

@@ -106,6 +108,9 @@ std::tuple<at::Tensor, at::Tensor> InterpFaceAttrsBackward(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
+  CHECK_CPU(face_attrs);
+  CHECK_CPU(barycentric_coords);
+  CHECK_CPU(grad_pix_attrs);
  return InterpFaceAttrsBackwardCpu(
      pix_to_face, barycentric_coords, face_attrs, grad_pix_attrs);
 }
--- a/pytorch3d/csrc/iou_box3d/iou_box3d.h
+++ b/pytorch3d/csrc/iou_box3d/iou_box3d.h
@@ -44,5 +44,7 @@ inline std::tuple<at::Tensor, at::Tensor> IoUBox3D(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
+  CHECK_CPU(boxes1);
+  CHECK_CPU(boxes2);
  return IoUBox3DCpu(boxes1.contiguous(), boxes2.contiguous());
 }
--- a/pytorch3d/csrc/knn/knn.h
+++ b/pytorch3d/csrc/knn/knn.h
@@ -74,6 +74,8 @@ std::tuple<at::Tensor, at::Tensor> KNearestNeighborIdx(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
+  CHECK_CPU(p1);
+  CHECK_CPU(p2);
  return KNearestNeighborIdxCpu(p1, p2, lengths1, lengths2, norm, K);
 }

@@ -140,6 +142,8 @@ std::tuple<at::Tensor, at::Tensor> KNearestNeighborBackward(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
+  CHECK_CPU(p1);
+  CHECK_CPU(p2);
  return KNearestNeighborBackwardCpu(
      p1, p2, lengths1, lengths2, idxs, norm, grad_dists);
 }
--- a/pytorch3d/csrc/marching_cubes/marching_cubes.h
+++ b/pytorch3d/csrc/marching_cubes/marching_cubes.h
@@ -58,5 +58,6 @@ inline std::tuple<at::Tensor, at::Tensor, at::Tensor> MarchingCubes(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
+  CHECK_CPU(vol);
  return MarchingCubesCpu(vol.contiguous(), isolevel);
 }
--- a/pytorch3d/csrc/packed_to_padded_tensor/packed_to_padded_tensor.h
+++ b/pytorch3d/csrc/packed_to_padded_tensor/packed_to_padded_tensor.h
@@ -88,6 +88,8 @@ at::Tensor PackedToPadded(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
+  CHECK_CPU(inputs_packed);
+  CHECK_CPU(first_idxs);
  return PackedToPaddedCpu(inputs_packed, first_idxs, max_size);
 }

@@ -105,5 +107,7 @@ at::Tensor PaddedToPacked(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
+  CHECK_CPU(inputs_padded);
+  CHECK_CPU(first_idxs);
  return PaddedToPackedCpu(inputs_padded, first_idxs, num_inputs);
 }
--- a/pytorch3d/csrc/point_mesh/point_mesh_cpu.cpp
+++ b/pytorch3d/csrc/point_mesh/point_mesh_cpu.cpp
@@ -174,8 +174,8 @@ std::tuple<at::Tensor, at::Tensor> HullHullDistanceForwardCpu(
  at::Tensor idxs = at::zeros({A_N,}, as_first_idx.options());
  // clang-format on

-  auto as_a = as.accessor < float, H1 == 1 ? 2 : 3 > ();
-  auto bs_a = bs.accessor < float, H2 == 1 ? 2 : 3 > ();
+  auto as_a = as.accessor<float, H1 == 1 ? 2 : 3>();
+  auto bs_a = bs.accessor<float, H2 == 1 ? 2 : 3>();
  auto as_first_idx_a = as_first_idx.accessor<int64_t, 1>();
  auto bs_first_idx_a = bs_first_idx.accessor<int64_t, 1>();
  auto dists_a = dists.accessor<float, 1>();
@@ -230,10 +230,10 @@ std::tuple<at::Tensor, at::Tensor> HullHullDistanceBackwardCpu(
  at::Tensor grad_as = at::zeros_like(as);
  at::Tensor grad_bs = at::zeros_like(bs);

-  auto as_a = as.accessor < float, H1 == 1 ? 2 : 3 > ();
-  auto bs_a = bs.accessor < float, H2 == 1 ? 2 : 3 > ();
-  auto grad_as_a = grad_as.accessor < float, H1 == 1 ? 2 : 3 > ();
-  auto grad_bs_a = grad_bs.accessor < float, H2 == 1 ? 2 : 3 > ();
+  auto as_a = as.accessor<float, H1 == 1 ? 2 : 3>();
+  auto bs_a = bs.accessor<float, H2 == 1 ? 2 : 3>();
+  auto grad_as_a = grad_as.accessor<float, H1 == 1 ? 2 : 3>();
+  auto grad_bs_a = grad_bs.accessor<float, H2 == 1 ? 2 : 3>();
  auto idx_bs_a = idx_bs.accessor<int64_t, 1>();
  auto grad_dists_a = grad_dists.accessor<float, 1>();

--- a/pytorch3d/csrc/point_mesh/point_mesh_cuda.h
+++ b/pytorch3d/csrc/point_mesh/point_mesh_cuda.h
@@ -88,6 +88,10 @@ std::tuple<torch::Tensor, torch::Tensor> PointFaceDistanceForward(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
+  CHECK_CPU(points);
+  CHECK_CPU(points_first_idx);
+  CHECK_CPU(tris);
+  CHECK_CPU(tris_first_idx);
  return PointFaceDistanceForwardCpu(
      points, points_first_idx, tris, tris_first_idx, min_triangle_area);
 }
@@ -143,6 +147,10 @@ std::tuple<torch::Tensor, torch::Tensor> PointFaceDistanceBackward(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
+  CHECK_CPU(points);
+  CHECK_CPU(tris);
+  CHECK_CPU(idx_points);
+  CHECK_CPU(grad_dists);
  return PointFaceDistanceBackwardCpu(
      points, tris, idx_points, grad_dists, min_triangle_area);
 }
@@ -221,6 +229,10 @@ std::tuple<torch::Tensor, torch::Tensor> FacePointDistanceForward(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
+  CHECK_CPU(points);
+  CHECK_CPU(points_first_idx);
+  CHECK_CPU(tris);
+  CHECK_CPU(tris_first_idx);
  return FacePointDistanceForwardCpu(
      points, points_first_idx, tris, tris_first_idx, min_triangle_area);
 }
@@ -277,6 +289,10 @@ std::tuple<torch::Tensor, torch::Tensor> FacePointDistanceBackward(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
+  CHECK_CPU(points);
+  CHECK_CPU(tris);
+  CHECK_CPU(idx_tris);
+  CHECK_CPU(grad_dists);
  return FacePointDistanceBackwardCpu(
      points, tris, idx_tris, grad_dists, min_triangle_area);
 }
@@ -346,6 +362,10 @@ std::tuple<torch::Tensor, torch::Tensor> PointEdgeDistanceForward(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
+  CHECK_CPU(points);
+  CHECK_CPU(points_first_idx);
+  CHECK_CPU(segms);
+  CHECK_CPU(segms_first_idx);
  return PointEdgeDistanceForwardCpu(
      points, points_first_idx, segms, segms_first_idx, max_points);
 }
@@ -396,6 +416,10 @@ std::tuple<torch::Tensor, torch::Tensor> PointEdgeDistanceBackward(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
+  CHECK_CPU(points);
+  CHECK_CPU(segms);
+  CHECK_CPU(idx_points);
+  CHECK_CPU(grad_dists);
  return PointEdgeDistanceBackwardCpu(points, segms, idx_points, grad_dists);
 }

@@ -464,6 +488,10 @@ std::tuple<torch::Tensor, torch::Tensor> EdgePointDistanceForward(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
+  CHECK_CPU(points);
+  CHECK_CPU(points_first_idx);
+  CHECK_CPU(segms);
+  CHECK_CPU(segms_first_idx);
  return EdgePointDistanceForwardCpu(
      points, points_first_idx, segms, segms_first_idx, max_segms);
 }
@@ -514,6 +542,10 @@ std::tuple<torch::Tensor, torch::Tensor> EdgePointDistanceBackward(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
+  CHECK_CPU(points);
+  CHECK_CPU(segms);
+  CHECK_CPU(idx_segms);
+  CHECK_CPU(grad_dists);
  return EdgePointDistanceBackwardCpu(points, segms, idx_segms, grad_dists);
 }

@@ -567,6 +599,8 @@ torch::Tensor PointFaceArrayDistanceForward(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
+  CHECK_CPU(points);
+  CHECK_CPU(tris);
  return PointFaceArrayDistanceForwardCpu(points, tris, min_triangle_area);
 }

@@ -613,6 +647,9 @@ std::tuple<torch::Tensor, torch::Tensor> PointFaceArrayDistanceBackward(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
+  CHECK_CPU(points);
+  CHECK_CPU(tris);
+  CHECK_CPU(grad_dists);
  return PointFaceArrayDistanceBackwardCpu(
      points, tris, grad_dists, min_triangle_area);
 }
@@ -661,6 +698,8 @@ torch::Tensor PointEdgeArrayDistanceForward(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
+  CHECK_CPU(points);
+  CHECK_CPU(segms);
  return PointEdgeArrayDistanceForwardCpu(points, segms);
 }

@@ -703,5 +742,8 @@ std::tuple<torch::Tensor, torch::Tensor> PointEdgeArrayDistanceBackward(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
+  CHECK_CPU(points);
+  CHECK_CPU(segms);
+  CHECK_CPU(grad_dists);
  return PointEdgeArrayDistanceBackwardCpu(points, segms, grad_dists);
 }
--- a/pytorch3d/csrc/points_to_volumes/points_to_volumes.h
+++ b/pytorch3d/csrc/points_to_volumes/points_to_volumes.h
@@ -104,6 +104,12 @@ inline void PointsToVolumesForward(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
+  CHECK_CPU(points_3d);
+  CHECK_CPU(points_features);
+  CHECK_CPU(volume_densities);
+  CHECK_CPU(volume_features);
+  CHECK_CPU(grid_sizes);
+  CHECK_CPU(mask);
  PointsToVolumesForwardCpu(
      points_3d,
      points_features,
@@ -183,6 +189,14 @@ inline void PointsToVolumesBackward(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
+  CHECK_CPU(points_3d);
+  CHECK_CPU(points_features);
+  CHECK_CPU(grid_sizes);
+  CHECK_CPU(mask);
+  CHECK_CPU(grad_volume_densities);
+  CHECK_CPU(grad_volume_features);
+  CHECK_CPU(grad_points_3d);
+  CHECK_CPU(grad_points_features);
  PointsToVolumesBackwardCpu(
      points_3d,
      points_features,
--- a/pytorch3d/csrc/pulsar/global.h
+++ b/pytorch3d/csrc/pulsar/global.h
@@ -15,8 +15,8 @@
 #endif

 #if defined(_WIN64) || defined(_WIN32)
-#define uint unsigned int
-#define ushort unsigned short
+using uint = unsigned int;
+using ushort = unsigned short;
 #endif

 #include "./logging.h" // <- include before torch/extension.h
--- a/pytorch3d/csrc/pulsar/gpu/commands.h
+++ b/pytorch3d/csrc/pulsar/gpu/commands.h
@@ -417,7 +417,7 @@ __device__ static float atomicMin(float* address, float val) {
      (OUT_PTR),              \
      (NUM_SELECTED_PTR),     \
      (NUM_ITEMS),            \
-      stream = (STREAM));
+      (STREAM));

 #define COPY_HOST_DEV(PTR_D, PTR_H, TYPE, SIZE) \
  HANDLECUDA(cudaMemcpy(                        \
--- a/pytorch3d/csrc/pulsar/host/commands.h
+++ b/pytorch3d/csrc/pulsar/host/commands.h
@@ -357,11 +357,11 @@ void MAX_WS(
 //
 //
 #define END_PARALLEL() \
-  end_parallel :;      \
+  end_parallel:;       \
  }
 #define END_PARALLEL_NORET() }
 #define END_PARALLEL_2D() \
-  end_parallel :;         \
+  end_parallel:;          \
  }                       \
  }
 #define END_PARALLEL_2D_NORET() \
--- a/pytorch3d/csrc/pulsar/include/camera.h
+++ b/pytorch3d/csrc/pulsar/include/camera.h
@@ -70,11 +70,6 @@ struct CamGradInfo {
  float3 pixel_dir_y;
 };

-// TODO: remove once https://github.com/NVlabs/cub/issues/172 is resolved.
-struct IntWrapper {
-  int val;
-};
-
 } // namespace pulsar

 #endif
--- a/pytorch3d/csrc/pulsar/include/math.h
+++ b/pytorch3d/csrc/pulsar/include/math.h
@@ -149,11 +149,6 @@ IHD CamGradInfo operator*(const CamGradInfo& a, const float& b) {
  return res;
 }

-IHD IntWrapper operator+(const IntWrapper& a, const IntWrapper& b) {
-  IntWrapper res;
-  res.val = a.val + b.val;
-  return res;
-}
 } // namespace pulsar

 #endif
--- a/pytorch3d/csrc/pulsar/include/renderer.backward.device.h
+++ b/pytorch3d/csrc/pulsar/include/renderer.backward.device.h
@@ -155,8 +155,8 @@ void backward(
        stream);
    CHECKLAUNCH();
    SUM_WS(
-        (IntWrapper*)(self->ids_sorted_d),
-        (IntWrapper*)(self->n_grad_contributions_d),
+        self->ids_sorted_d,
+        self->n_grad_contributions_d,
        static_cast<int>(num_balls),
        self->workspace_d,
        self->workspace_size,
--- a/pytorch3d/csrc/pulsar/include/renderer.construct.device.h
+++ b/pytorch3d/csrc/pulsar/include/renderer.construct.device.h
@@ -52,7 +52,7 @@ HOST void construct(
  self->cam.film_width = width;
  self->cam.film_height = height;
  self->max_num_balls = max_num_balls;
-  MALLOC(self->result_d, float, width* height* n_channels);
+  MALLOC(self->result_d, float, width * height * n_channels);
  self->cam.orthogonal_projection = orthogonal_projection;
  self->cam.right_handed = right_handed_system;
  self->cam.background_normalization_depth = background_normalization_depth;
@@ -93,7 +93,7 @@ HOST void construct(
  MALLOC(self->di_sorted_d, DrawInfo, max_num_balls);
  MALLOC(self->region_flags_d, char, max_num_balls);
  MALLOC(self->num_selected_d, size_t, 1);
-  MALLOC(self->forw_info_d, float, width* height * (3 + 2 * n_track));
+  MALLOC(self->forw_info_d, float, width * height * (3 + 2 * n_track));
  MALLOC(self->min_max_pixels_d, IntersectInfo, 1);
  MALLOC(self->grad_pos_d, float3, max_num_balls);
  MALLOC(self->grad_col_d, float, max_num_balls* n_channels);
--- a/pytorch3d/csrc/pulsar/include/renderer.h
+++ b/pytorch3d/csrc/pulsar/include/renderer.h
@@ -255,7 +255,7 @@ GLOBAL void calc_signature(
 * for every iteration through the loading loop every thread could add a
 * 'hit' to the buffer.
 */
-#define RENDER_BUFFER_SIZE RENDER_BLOCK_SIZE* RENDER_BLOCK_SIZE * 2
+#define RENDER_BUFFER_SIZE RENDER_BLOCK_SIZE * RENDER_BLOCK_SIZE * 2
 /**
 * The threshold after which the spheres that are in the render buffer
 * are rendered and the buffer is flushed.
--- a/pytorch3d/csrc/pulsar/warnings.cpp
+++ b/pytorch3d/csrc/pulsar/warnings.cpp
@@ -6,9 +6,6 @@
 * LICENSE file in the root directory of this source tree.
 */

-#include "./global.h"
-#include "./logging.h"
-
 /**
 * A compilation unit to provide warnings about the code and avoid
 * repeated messages.
--- a/pytorch3d/csrc/rasterize_meshes/rasterize_meshes.h
+++ b/pytorch3d/csrc/rasterize_meshes/rasterize_meshes.h
@@ -138,6 +138,9 @@ RasterizeMeshesNaive(
    AT_ERROR("Not compiled with GPU support");
 #endif
  } else {
+    CHECK_CPU(face_verts);
+    CHECK_CPU(mesh_to_face_first_idx);
+    CHECK_CPU(num_faces_per_mesh);
    return RasterizeMeshesNaiveCpu(
        face_verts,
        mesh_to_face_first_idx,
@@ -232,6 +235,11 @@ torch::Tensor RasterizeMeshesBackward(
    AT_ERROR("Not compiled with GPU support");
 #endif
  } else {
+    CHECK_CPU(face_verts);
+    CHECK_CPU(pix_to_face);
+    CHECK_CPU(grad_zbuf);
+    CHECK_CPU(grad_bary);
+    CHECK_CPU(grad_dists);
    return RasterizeMeshesBackwardCpu(
        face_verts,
        pix_to_face,
@@ -306,6 +314,9 @@ torch::Tensor RasterizeMeshesCoarse(
    AT_ERROR("Not compiled with GPU support");
 #endif
  } else {
+    CHECK_CPU(face_verts);
+    CHECK_CPU(mesh_to_face_first_idx);
+    CHECK_CPU(num_faces_per_mesh);
    return RasterizeMeshesCoarseCpu(
        face_verts,
        mesh_to_face_first_idx,
@@ -423,6 +434,8 @@ RasterizeMeshesFine(
    AT_ERROR("Not compiled with GPU support");
 #endif
  } else {
+    CHECK_CPU(face_verts);
+    CHECK_CPU(bin_faces);
    AT_ERROR("NOT IMPLEMENTED");
  }
 }
--- a/pytorch3d/csrc/rasterize_points/rasterize_points.h
+++ b/pytorch3d/csrc/rasterize_points/rasterize_points.h
@@ -91,6 +91,10 @@ std::tuple<torch::Tensor, torch::Tensor, torch::Tensor> RasterizePointsNaive(
    AT_ERROR("Not compiled with GPU support");
 #endif
  } else {
+    CHECK_CPU(points);
+    CHECK_CPU(cloud_to_packed_first_idx);
+    CHECK_CPU(num_points_per_cloud);
+    CHECK_CPU(radius);
    return RasterizePointsNaiveCpu(
        points,
        cloud_to_packed_first_idx,
@@ -166,6 +170,10 @@ torch::Tensor RasterizePointsCoarse(
    AT_ERROR("Not compiled with GPU support");
 #endif
  } else {
+    CHECK_CPU(points);
+    CHECK_CPU(cloud_to_packed_first_idx);
+    CHECK_CPU(num_points_per_cloud);
+    CHECK_CPU(radius);
    return RasterizePointsCoarseCpu(
        points,
        cloud_to_packed_first_idx,
@@ -232,6 +240,8 @@ std::tuple<torch::Tensor, torch::Tensor, torch::Tensor> RasterizePointsFine(
    AT_ERROR("Not compiled with GPU support");
 #endif
  } else {
+    CHECK_CPU(points);
+    CHECK_CPU(bin_points);
    AT_ERROR("NOT IMPLEMENTED");
  }
 }
@@ -284,6 +294,10 @@ torch::Tensor RasterizePointsBackward(
    AT_ERROR("Not compiled with GPU support");
 #endif
  } else {
+    CHECK_CPU(points);
+    CHECK_CPU(idxs);
+    CHECK_CPU(grad_zbuf);
+    CHECK_CPU(grad_dists);
    return RasterizePointsBackwardCpu(points, idxs, grad_zbuf, grad_dists);
  }
 }
--- a/pytorch3d/csrc/sample_farthest_points/sample_farthest_points.cu
+++ b/pytorch3d/csrc/sample_farthest_points/sample_farthest_points.cu
@@ -107,7 +107,8 @@ at::Tensor FarthestPointSamplingCuda(
    const at::Tensor& points, // (N, P, 3)
    const at::Tensor& lengths, // (N,)
    const at::Tensor& K, // (N,)
-    const at::Tensor& start_idxs) {
+    const at::Tensor& start_idxs,
+    const int64_t max_K_known = -1) {
  // Check inputs are on the same device
  at::TensorArg p_t{points, "points", 1}, lengths_t{lengths, "lengths", 2},
      k_t{K, "K", 3}, start_idxs_t{start_idxs, "start_idxs", 4};
@@ -129,7 +130,12 @@ at::Tensor FarthestPointSamplingCuda(

  const int64_t N = points.size(0);
  const int64_t P = points.size(1);
-  const int64_t max_K = at::max(K).item<int64_t>();
+  int64_t max_K;
+  if (max_K_known > 0) {
+    max_K = max_K_known;
+  } else {
+    max_K = at::max(K).item<int64_t>();
+  }

  // Initialize the output tensor with the sampled indices
  auto idxs = at::full({N, max_K}, -1, lengths.options());
--- a/pytorch3d/csrc/sample_farthest_points/sample_farthest_points.h
+++ b/pytorch3d/csrc/sample_farthest_points/sample_farthest_points.h
@@ -43,7 +43,8 @@ at::Tensor FarthestPointSamplingCuda(
    const at::Tensor& points,
    const at::Tensor& lengths,
    const at::Tensor& K,
-    const at::Tensor& start_idxs);
+    const at::Tensor& start_idxs,
+    const int64_t max_K_known = -1);

 at::Tensor FarthestPointSamplingCpu(
    const at::Tensor& points,
@@ -56,17 +57,23 @@ at::Tensor FarthestPointSampling(
    const at::Tensor& points,
    const at::Tensor& lengths,
    const at::Tensor& K,
-    const at::Tensor& start_idxs) {
+    const at::Tensor& start_idxs,
+    const int64_t max_K_known = -1) {
  if (points.is_cuda() || lengths.is_cuda() || K.is_cuda()) {
 #ifdef WITH_CUDA
    CHECK_CUDA(points);
    CHECK_CUDA(lengths);
    CHECK_CUDA(K);
    CHECK_CUDA(start_idxs);
-    return FarthestPointSamplingCuda(points, lengths, K, start_idxs);
+    return FarthestPointSamplingCuda(
+        points, lengths, K, start_idxs, max_K_known);
 #else
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
+  CHECK_CPU(points);
+  CHECK_CPU(lengths);
+  CHECK_CPU(K);
+  CHECK_CPU(start_idxs);
  return FarthestPointSamplingCpu(points, lengths, K, start_idxs);
 }
--- a/pytorch3d/csrc/sample_pdf/sample_pdf.h
+++ b/pytorch3d/csrc/sample_pdf/sample_pdf.h
@@ -71,6 +71,8 @@ inline void SamplePdf(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
+  CHECK_CPU(weights);
+  CHECK_CPU(outputs);
  CHECK_CONTIGUOUS(outputs);
  SamplePdfCpu(bins, weights, outputs, eps);
 }
--- a/pytorch3d/csrc/utils/dispatch.cuh
+++ b/pytorch3d/csrc/utils/dispatch.cuh
@@ -99,8 +99,7 @@ namespace {
 // and increment it via template recursion until it is equal to the run-time
 // argument N.
 template <
-    template <typename, int64_t>
-    class Kernel,
+    template <typename, int64_t> class Kernel,
    typename T,
    int64_t minN,
    int64_t maxN,
@@ -124,8 +123,7 @@ struct DispatchKernelHelper1D {
 // 1D dispatch: Specialization when curN == maxN
 // We need this base case to avoid infinite template recursion.
 template <
-    template <typename, int64_t>
-    class Kernel,
+    template <typename, int64_t> class Kernel,
    typename T,
    int64_t minN,
    int64_t maxN,
@@ -145,8 +143,7 @@ struct DispatchKernelHelper1D<Kernel, T, minN, maxN, maxN, Args...> {
 // the run-time values of N and M, at which point we dispatch to the run
 // method of the kernel.
 template <
-    template <typename, int64_t, int64_t>
-    class Kernel,
+    template <typename, int64_t, int64_t> class Kernel,
    typename T,
    int64_t minN,
    int64_t maxN,
@@ -203,8 +200,7 @@ struct DispatchKernelHelper2D {

 // 2D dispatch, specialization for curN == maxN
 template <
-    template <typename, int64_t, int64_t>
-    class Kernel,
+    template <typename, int64_t, int64_t> class Kernel,
    typename T,
    int64_t minN,
    int64_t maxN,
@@ -243,8 +239,7 @@ struct DispatchKernelHelper2D<

 // 2D dispatch, specialization for curM == maxM
 template <
-    template <typename, int64_t, int64_t>
-    class Kernel,
+    template <typename, int64_t, int64_t> class Kernel,
    typename T,
    int64_t minN,
    int64_t maxN,
@@ -283,8 +278,7 @@ struct DispatchKernelHelper2D<

 // 2D dispatch, specialization for curN == maxN, curM == maxM
 template <
-    template <typename, int64_t, int64_t>
-    class Kernel,
+    template <typename, int64_t, int64_t> class Kernel,
    typename T,
    int64_t minN,
    int64_t maxN,
@@ -313,8 +307,7 @@ struct DispatchKernelHelper2D<

 // This is the function we expect users to call to dispatch to 1D functions
 template <
-    template <typename, int64_t>
-    class Kernel,
+    template <typename, int64_t> class Kernel,
    typename T,
    int64_t minN,
    int64_t maxN,
@@ -330,8 +323,7 @@ void DispatchKernel1D(const int64_t N, Args... args) {

 // This is the function we expect users to call to dispatch to 2D functions
 template <
-    template <typename, int64_t, int64_t>
-    class Kernel,
+    template <typename, int64_t, int64_t> class Kernel,
    typename T,
    int64_t minN,
    int64_t maxN,
--- a/pytorch3d/csrc/utils/pytorch3d_cutils.h
+++ b/pytorch3d/csrc/utils/pytorch3d_cutils.h
@@ -15,3 +15,7 @@
 #define CHECK_CONTIGUOUS_CUDA(x) \
  CHECK_CUDA(x);                 \
  CHECK_CONTIGUOUS(x)
+#define CHECK_CPU(x)                    \
+  TORCH_CHECK(                          \
+      x.device().type() == torch::kCPU, \
+      "Cannot use CPU implementation: " #x " not on CPU.")
--- a/pytorch3d/implicitron/dataset/dataset_base.py
+++ b/pytorch3d/implicitron/dataset/dataset_base.py
@@ -21,7 +21,6 @@ from typing import (
 )

 import torch
-
 from pytorch3d.implicitron.dataset.frame_data import FrameData
 from pytorch3d.implicitron.dataset.utils import GenericWorkaround

--- a/pytorch3d/implicitron/dataset/frame_data.py
+++ b/pytorch3d/implicitron/dataset/frame_data.py
@@ -25,7 +25,6 @@ from typing import (

 import numpy as np
 import torch
-
 from pytorch3d.implicitron.dataset import orm_types, types
 from pytorch3d.implicitron.dataset.utils import (
    adjust_camera_to_bbox_crop_,
--- a/pytorch3d/implicitron/dataset/json_index_dataset.py
+++ b/pytorch3d/implicitron/dataset/json_index_dataset.py
@@ -38,7 +38,6 @@ from pytorch3d.implicitron.dataset.utils import is_known_frame_scalar
 from pytorch3d.implicitron.tools.config import registry, ReplaceableBase
 from pytorch3d.renderer.camera_utils import join_cameras_as_batch
 from pytorch3d.renderer.cameras import CamerasBase
-
 from tqdm import tqdm


@@ -327,9 +326,9 @@ class JsonIndexDataset(DatasetBase, ReplaceableBase):
                assert os.path.normpath(
                    # pyre-ignore[16]
                    self.frame_annots[idx]["frame_annotation"].image.path
-                ) == os.path.normpath(
-                    path
-                ), f"Inconsistent frame indices {seq_name, frame_no, path}."
+                ) == os.path.normpath(path), (
+                    f"Inconsistent frame indices {seq_name, frame_no, path}."
+                )
            return idx

        dataset_idx = [
--- a/pytorch3d/implicitron/dataset/json_index_dataset_map_provider.py
+++ b/pytorch3d/implicitron/dataset/json_index_dataset_map_provider.py
@@ -21,7 +21,6 @@ from pytorch3d.renderer.cameras import CamerasBase

 from .dataset_map_provider import DatasetMap, DatasetMapProviderBase, PathManagerFactory
 from .json_index_dataset import JsonIndexDataset
-
 from .utils import (
    DATASET_TYPE_KNOWN,
    DATASET_TYPE_TEST,
--- a/pytorch3d/implicitron/dataset/json_index_dataset_map_provider_v2.py
+++ b/pytorch3d/implicitron/dataset/json_index_dataset_map_provider_v2.py
@@ -18,7 +18,6 @@ from typing import Dict, List, Optional, Tuple, Type, Union

 import numpy as np
 from iopath.common.file_io import PathManager
-
 from omegaconf import DictConfig
 from pytorch3d.implicitron.dataset.dataset_map_provider import (
    DatasetMap,
@@ -31,7 +30,6 @@ from pytorch3d.implicitron.tools.config import (
    registry,
    run_auto_creation,
 )
-
 from pytorch3d.renderer.cameras import CamerasBase
 from tqdm import tqdm

--- a/pytorch3d/implicitron/dataset/llff_dataset_map_provider.py
+++ b/pytorch3d/implicitron/dataset/llff_dataset_map_provider.py
@@ -12,7 +12,6 @@ import torch
 from pytorch3d.implicitron.tools.config import registry

 from .load_llff import load_llff_data
-
 from .single_sequence_dataset import (
    _interpret_blender_cameras,
    SingleSceneDatasetMapProviderBase,
--- a/pytorch3d/implicitron/dataset/load_llff.py
+++ b/pytorch3d/implicitron/dataset/load_llff.py
@@ -8,7 +8,6 @@ import os
 import warnings

 import numpy as np
-
 from PIL import Image


--- a/pytorch3d/implicitron/dataset/orm_types.py
+++ b/pytorch3d/implicitron/dataset/orm_types.py
@@ -13,7 +13,6 @@ import struct
 from typing import Optional, Tuple

 import numpy as np
-
 from pytorch3d.implicitron.dataset.types import (
    DepthAnnotation,
    ImageAnnotation,
@@ -22,7 +21,6 @@ from pytorch3d.implicitron.dataset.types import (
    VideoAnnotation,
    ViewpointAnnotation,
 )
-
 from sqlalchemy import LargeBinary
 from sqlalchemy.orm import (
    composite,
--- a/pytorch3d/implicitron/dataset/sql_dataset.py
+++ b/pytorch3d/implicitron/dataset/sql_dataset.py
@@ -10,7 +10,6 @@ import hashlib
 import json
 import logging
 import os
-
 import urllib
 from dataclasses import dataclass, Field, field
 from typing import (
@@ -32,13 +31,11 @@ import pandas as pd
 import sqlalchemy as sa
 import torch
 from pytorch3d.implicitron.dataset.dataset_base import DatasetBase
-
 from pytorch3d.implicitron.dataset.frame_data import (
    FrameData,
    FrameDataBuilder,  # noqa
    FrameDataBuilderBase,
 )
-
 from pytorch3d.implicitron.tools.config import (
    registry,
    ReplaceableBase,
@@ -486,9 +483,10 @@ class SqlIndexDataset(DatasetBase, ReplaceableBase):
            *self._get_pick_filters(),
            *self._get_exclude_filters(),
        ]
-        if self.pick_sequences_sql_clause:
+        if pick_sequences_sql_clause := self.pick_sequences_sql_clause:
            print("Applying the custom SQL clause.")
-            where_conditions.append(sa.text(self.pick_sequences_sql_clause))
+            # pyre-ignore[6]: TextClause is compatible with where conditions
+            where_conditions.append(sa.text(pick_sequences_sql_clause))

        def add_where(stmt):
            return stmt.where(*where_conditions) if where_conditions else stmt
@@ -508,6 +506,7 @@ class SqlIndexDataset(DatasetBase, ReplaceableBase):

            subquery = add_where(subquery).subquery()
            stmt = sa.select(subquery.c.sequence_name).where(
+                # pyre-ignore[6]: SQLAlchemy column comparison returns ColumnElement, not bool
                subquery.c.row_number <= self.limit_sequences_per_category_to
            )

@@ -636,9 +635,10 @@ class SqlIndexDataset(DatasetBase, ReplaceableBase):
                    )
                )

-        if self.pick_frames_sql_clause:
+        if pick_frames_sql_clause := self.pick_frames_sql_clause:
            logger.info("Applying the custom SQL clause.")
-            pick_frames_criteria.append(sa.text(self.pick_frames_sql_clause))
+            # pyre-ignore[6]: TextClause is compatible with where conditions
+            pick_frames_criteria.append(sa.text(pick_frames_sql_clause))

        if pick_frames_criteria:
            index = self._pick_frames_by_criteria(index, pick_frames_criteria)
@@ -701,9 +701,10 @@ class SqlIndexDataset(DatasetBase, ReplaceableBase):
                )
            )

-        if self.pick_frames_sql_clause:
+        if pick_frames_sql_clause := self.pick_frames_sql_clause:
            logger.info("  applying custom SQL clause")
-            where_conditions.append(sa.text(self.pick_frames_sql_clause))
+            # pyre-ignore[6]: TextClause is compatible with where conditions
+            where_conditions.append(sa.text(pick_frames_sql_clause))

        if where_conditions:
            stmt = stmt.where(*where_conditions)
@@ -755,7 +756,7 @@ class SqlIndexDataset(DatasetBase, ReplaceableBase):
        if pick_sequences:
            old_len = len(eval_batches)
            eval_batches = [b for b in eval_batches if b[0][0] in pick_sequences]
-            logger.warn(
+            logger.warning(
                f"Picked eval batches by sequence/cat: {old_len} -> {len(eval_batches)}"
            )

@@ -763,7 +764,7 @@ class SqlIndexDataset(DatasetBase, ReplaceableBase):
            old_len = len(eval_batches)
            exclude_sequences = set(self.exclude_sequences)
            eval_batches = [b for b in eval_batches if b[0][0] not in exclude_sequences]
-            logger.warn(
+            logger.warning(
                f"Excluded eval batches by sequence: {old_len} -> {len(eval_batches)}"
            )

--- a/pytorch3d/implicitron/dataset/sql_dataset_provider.py
+++ b/pytorch3d/implicitron/dataset/sql_dataset_provider.py
@@ -12,9 +12,7 @@ import os
 from typing import List, Optional, Tuple, Type

 import numpy as np
-
 from omegaconf import DictConfig, OmegaConf
-
 from pytorch3d.implicitron.dataset.dataset_map_provider import (
    DatasetMap,
    DatasetMapProviderBase,
--- a/pytorch3d/implicitron/dataset/train_eval_data_loader_provider.py
+++ b/pytorch3d/implicitron/dataset/train_eval_data_loader_provider.py
@@ -18,7 +18,6 @@ from pytorch3d.implicitron.dataset.dataset_base import DatasetBase
 from pytorch3d.implicitron.dataset.dataset_map_provider import DatasetMap
 from pytorch3d.implicitron.dataset.frame_data import FrameData
 from pytorch3d.implicitron.tools.config import registry, run_auto_creation
-
 from torch.utils.data import DataLoader

 logger = logging.getLogger(__name__)
--- a/pytorch3d/implicitron/dataset/utils.py
+++ b/pytorch3d/implicitron/dataset/utils.py
@@ -15,7 +15,6 @@ from typing import List, Optional, Tuple, TypeVar, Union
 import numpy as np
 import torch
 from PIL import Image
-
 from pytorch3d.io import IO
 from pytorch3d.renderer.cameras import PerspectiveCameras
 from pytorch3d.structures.pointclouds import Pointclouds
--- a/pytorch3d/implicitron/evaluation/evaluator.py
+++ b/pytorch3d/implicitron/evaluation/evaluator.py
@@ -14,7 +14,6 @@ import warnings
 from typing import Any, Dict, List, Optional, Tuple

 import torch
-
 import tqdm
 from pytorch3d.implicitron.evaluation import evaluate_new_view_synthesis as evaluate
 from pytorch3d.implicitron.models.base_model import EvaluationMode, ImplicitronModelBase
--- a/pytorch3d/implicitron/models/base_model.py
+++ b/pytorch3d/implicitron/models/base_model.py
@@ -10,7 +10,6 @@ from dataclasses import dataclass, field
 from typing import Any, Dict, List, Optional

 import torch
-
 from pytorch3d.implicitron.models.renderer.base import EvaluationMode
 from pytorch3d.implicitron.tools.config import ReplaceableBase
 from pytorch3d.renderer.cameras import CamerasBase
--- a/pytorch3d/implicitron/models/generic_model.py
+++ b/pytorch3d/implicitron/models/generic_model.py
@@ -16,7 +16,6 @@ from typing import Any, Dict, List, Optional, Tuple, TYPE_CHECKING, Union

 import torch
 from omegaconf import DictConfig
-
 from pytorch3d.implicitron.models.base_model import (
    ImplicitronModelBase,
    ImplicitronRender,
@@ -28,7 +27,6 @@ from pytorch3d.implicitron.models.metrics import (
    RegularizationMetricsBase,
    ViewMetricsBase,
 )
-
 from pytorch3d.implicitron.models.renderer.base import (
    BaseRenderer,
    EvaluationMode,
@@ -38,7 +36,6 @@ from pytorch3d.implicitron.models.renderer.base import (
    RenderSamplingMode,
 )
 from pytorch3d.implicitron.models.renderer.ray_sampler import RaySamplerBase
-
 from pytorch3d.implicitron.models.utils import (
    apply_chunked,
    chunk_generator,
@@ -53,7 +50,6 @@ from pytorch3d.implicitron.tools.config import (
    registry,
    run_auto_creation,
 )
-
 from pytorch3d.implicitron.tools.rasterize_mc import rasterize_sparse_ray_bundle
 from pytorch3d.renderer import utils as rend_utils
 from pytorch3d.renderer.cameras import CamerasBase
--- a/pytorch3d/implicitron/models/implicit_function/base.py
+++ b/pytorch3d/implicitron/models/implicit_function/base.py
@@ -10,7 +10,6 @@ from abc import ABC, abstractmethod
 from typing import Optional

 from pytorch3d.implicitron.models.renderer.base import ImplicitronRayBundle
-
 from pytorch3d.implicitron.tools.config import ReplaceableBase
 from pytorch3d.renderer.cameras import CamerasBase

--- a/pytorch3d/implicitron/models/implicit_function/decoding_functions.py
+++ b/pytorch3d/implicitron/models/implicit_function/decoding_functions.py
@@ -16,14 +16,11 @@ This file contains

 import logging
 from dataclasses import field
-
 from enum import Enum
 from typing import Dict, Optional, Tuple

 import torch
-
 from omegaconf import DictConfig
-
 from pytorch3d.implicitron.tools.config import (
    Configurable,
    registry,
--- a/pytorch3d/implicitron/models/implicit_function/idr_feature_field.py
+++ b/pytorch3d/implicitron/models/implicit_function/idr_feature_field.py
@@ -11,7 +11,6 @@ import torch
 from pytorch3d.implicitron.models.renderer.base import ImplicitronRayBundle
 from pytorch3d.implicitron.tools.config import registry
 from pytorch3d.renderer.implicit import HarmonicEmbedding
-
 from torch import nn

 from .base import ImplicitFunctionBase
--- a/pytorch3d/implicitron/models/implicit_function/neural_radiance_field.py
+++ b/pytorch3d/implicitron/models/implicit_function/neural_radiance_field.py
@@ -21,7 +21,6 @@ from pytorch3d.renderer.implicit import HarmonicEmbedding
 from pytorch3d.renderer.implicit.utils import ray_bundle_to_ray_points

 from .base import ImplicitFunctionBase
-
 from .decoding_functions import (  # noqa
    _xavier_init,
    MLPWithInputSkips,
--- a/pytorch3d/implicitron/models/implicit_function/utils.py
+++ b/pytorch3d/implicitron/models/implicit_function/utils.py
@@ -9,7 +9,6 @@
 from typing import Callable, Optional

 import torch
-
 import torch.nn.functional as F
 from pytorch3d.common.compat import prod
 from pytorch3d.implicitron.models.renderer.base import ImplicitronRayBundle
--- a/pytorch3d/implicitron/models/implicit_function/voxel_grid.py
+++ b/pytorch3d/implicitron/models/implicit_function/voxel_grid.py
@@ -21,8 +21,6 @@ import logging
 import warnings
 from collections.abc import Mapping
 from dataclasses import dataclass, field
-
-from distutils.version import LooseVersion
 from typing import Any, Callable, ClassVar, Dict, Iterator, List, Optional, Tuple, Type

 import torch
@@ -222,7 +220,8 @@ class VoxelGridBase(ReplaceableBase, torch.nn.Module):
                + "| 'bicubic' | 'linear' | 'area' | 'nearest-exact'"
            )

-        interpolate_has_antialias = LooseVersion(torch.__version__) >= "1.11"
+        # We assume PyTorch 1.11 and newer.
+        interpolate_has_antialias = True

        if antialias and not interpolate_has_antialias:
            warnings.warn("Antialiased interpolation requires PyTorch 1.11+; ignoring")
--- a/pytorch3d/implicitron/models/implicit_function/voxel_grid_implicit_function.py
+++ b/pytorch3d/implicitron/models/implicit_function/voxel_grid_implicit_function.py
@@ -13,9 +13,7 @@ from dataclasses import fields
 from typing import Callable, Dict, Optional, Tuple

 import torch
-
 from omegaconf import DictConfig
-
 from pytorch3d.implicitron.models.implicit_function.base import ImplicitFunctionBase
 from pytorch3d.implicitron.models.implicit_function.decoding_functions import (
    DecoderFunctionBase,
--- a/pytorch3d/implicitron/models/overfit_model.py
+++ b/pytorch3d/implicitron/models/overfit_model.py
@@ -17,7 +17,6 @@ from typing import Any, Callable, Dict, List, Optional, Tuple, TYPE_CHECKING, Un

 import torch
 from omegaconf import DictConfig
-
 from pytorch3d.implicitron.models.base_model import (
    ImplicitronModelBase,
    ImplicitronRender,
@@ -28,7 +27,6 @@ from pytorch3d.implicitron.models.metrics import (
    RegularizationMetricsBase,
    ViewMetricsBase,
 )
-
 from pytorch3d.implicitron.models.renderer.base import (
    BaseRenderer,
    EvaluationMode,
@@ -50,7 +48,6 @@ from pytorch3d.implicitron.tools.config import (
    registry,
    run_auto_creation,
 )
-
 from pytorch3d.implicitron.tools.rasterize_mc import rasterize_sparse_ray_bundle
 from pytorch3d.renderer import utils as rend_utils
 from pytorch3d.renderer.cameras import CamerasBase
--- a/pytorch3d/implicitron/models/renderer/ray_point_refiner.py
+++ b/pytorch3d/implicitron/models/renderer/ray_point_refiner.py
@@ -11,7 +11,6 @@ import copy
 import torch
 from pytorch3d.implicitron.models.renderer.base import ImplicitronRayBundle
 from pytorch3d.implicitron.tools.config import Configurable, expand_args_fields
-
 from pytorch3d.renderer.implicit.sample_pdf import sample_pdf


--- a/pytorch3d/implicitron/models/renderer/rgb_net.py
+++ b/pytorch3d/implicitron/models/renderer/rgb_net.py
@@ -12,7 +12,6 @@ import torch
 from pytorch3d.implicitron.models.renderer.base import ImplicitronRayBundle
 from pytorch3d.implicitron.tools.config import enable_get_default_args
 from pytorch3d.renderer.implicit import HarmonicEmbedding
-
 from torch import nn


--- a/pytorch3d/implicitron/models/utils.py
+++ b/pytorch3d/implicitron/models/utils.py
@@ -17,11 +17,8 @@ from typing import Any, Dict, Optional, Tuple
 import torch
 import tqdm
 from pytorch3d.common.compat import prod
-
 from pytorch3d.implicitron.models.renderer.base import ImplicitronRayBundle
-
 from pytorch3d.implicitron.tools import image_utils
-
 from pytorch3d.implicitron.tools.utils import cat_dataclass


@@ -83,9 +80,9 @@ def preprocess_input(

    if mask_depths and fg_mask is not None and depth_map is not None:
        # mask the depths
-        assert (
-            mask_threshold > 0.0
-        ), "Depths should be masked only with thresholded masks"
+        assert mask_threshold > 0.0, (
+            "Depths should be masked only with thresholded masks"
+        )
        warnings.warn("Masking depths!")
        depth_map = depth_map * fg_mask

--- a/pytorch3d/implicitron/models/visualization/render_flyaround.py
+++ b/pytorch3d/implicitron/models/visualization/render_flyaround.py
@@ -304,11 +304,11 @@ def _show_predictions(
    assert isinstance(preds, list)

    pred_all = []
-    # Randomly choose a subset of the rendered images, sort by ordr in the sequence
+    # Randomly choose a subset of the rendered images, sort by order in the sequence
    n_samples = min(n_samples, len(preds))
    pred_idx = sorted(random.sample(list(range(len(preds))), n_samples))
    for predi in pred_idx:
-        # Make the concatentation for the same camera vertically
+        # Make the concatenation for the same camera vertically
        pred_all.append(
            torch.cat(
                [
@@ -359,7 +359,7 @@ def _generate_prediction_videos(
    vws = {}
    for k in predicted_keys:
        if k not in preds[0]:
-            logger.warn(f"Cannot generate video for prediction key '{k}'")
+            logger.warning(f"Cannot generate video for prediction key '{k}'")
            continue
        cache_dir = (
            None
--- a/pytorch3d/implicitron/tools/rasterize_mc.py
+++ b/pytorch3d/implicitron/tools/rasterize_mc.py
@@ -10,7 +10,6 @@ import math
 from typing import Optional, Tuple

 import pytorch3d
-
 import torch
 from pytorch3d.ops import packed_to_padded
 from pytorch3d.renderer import PerspectiveCameras
--- a/pytorch3d/implicitron/tools/stats.py
+++ b/pytorch3d/implicitron/tools/stats.py
@@ -499,7 +499,7 @@ class StatsJSONEncoder(json.JSONEncoder):
            return enc
        else:
            raise TypeError(
-                f"Object of type {o.__class__.__name__} " f"is not JSON serializable"
+                f"Object of type {o.__class__.__name__} is not JSON serializable"
            )


--- a/pytorch3d/implicitron/tools/video_writer.py
+++ b/pytorch3d/implicitron/tools/video_writer.py
@@ -17,7 +17,6 @@ import matplotlib
 import matplotlib.pyplot as plt
 import numpy as np
 import torch
-
 from PIL import Image

 _NO_TORCHVISION = False
--- a/pytorch3d/io/obj_io.py
+++ b/pytorch3d/io/obj_io.py
@@ -796,7 +796,7 @@ def save_obj(
        # Create .mtl file with the material name and texture map filename
        # TODO: enable material properties to also be saved.
        with _open_file(mtl_path, path_manager, "w") as f_mtl:
-            lines = f"newmtl mesh\n" f"map_Kd {output_path.stem}.png\n"
+            lines = f"newmtl mesh\nmap_Kd {output_path.stem}.png\n"
            f_mtl.write(lines)


--- a/pytorch3d/loss/init.py
+++ b/pytorch3d/loss/init.py
@@ -8,11 +8,8 @@


 from .chamfer import chamfer_distance
-
 from .mesh_edge_loss import mesh_edge_loss
-
 from .mesh_laplacian_smoothing import mesh_laplacian_smoothing
-
 from .mesh_normal_consistency import mesh_normal_consistency
 from .point_mesh_distance import point_mesh_edge_distance, point_mesh_face_distance

--- a/pytorch3d/loss/mesh_laplacian_smoothing.py
+++ b/pytorch3d/loss/mesh_laplacian_smoothing.py
@@ -114,9 +114,7 @@ def mesh_laplacian_smoothing(meshes, method: str = "uniform"):
            if method == "cot":
                norm_w = torch.sparse.sum(L, dim=1).to_dense().view(-1, 1)
                idx = norm_w > 0
-                # pyre-fixme[58]: `/` is not supported for operand types `float` and
-                #  `Tensor`.
-                norm_w[idx] = 1.0 / norm_w[idx]
+                norm_w[idx] = torch.reciprocal(norm_w[idx])
            else:
                L_sum = torch.sparse.sum(L, dim=1).to_dense().view(-1, 1)
                norm_w = 0.25 * inv_areas
--- a/pytorch3d/loss/point_mesh_distance.py
+++ b/pytorch3d/loss/point_mesh_distance.py
@@ -6,6 +6,7 @@

 # pyre-unsafe

+import torch
 from pytorch3d import _C
 from pytorch3d.structures import Meshes, Pointclouds
 from torch.autograd import Function
@@ -302,8 +303,7 @@ def point_mesh_edge_distance(meshes: Meshes, pcls: Pointclouds):
    point_to_cloud_idx = pcls.packed_to_cloud_idx()  # (sum(P_i), )
    num_points_per_cloud = pcls.num_points_per_cloud()  # (N,)
    weights_p = num_points_per_cloud.gather(0, point_to_cloud_idx)
-    # pyre-fixme[58]: `/` is not supported for operand types `float` and `Tensor`.
-    weights_p = 1.0 / weights_p.float()
+    weights_p = torch.reciprocal(weights_p.float())
    point_to_edge = point_to_edge * weights_p
    point_dist = point_to_edge.sum() / N

@@ -377,8 +377,7 @@ def point_mesh_face_distance(
    point_to_cloud_idx = pcls.packed_to_cloud_idx()  # (sum(P_i),)
    num_points_per_cloud = pcls.num_points_per_cloud()  # (N,)
    weights_p = num_points_per_cloud.gather(0, point_to_cloud_idx)
-    # pyre-fixme[58]: `/` is not supported for operand types `float` and `Tensor`.
-    weights_p = 1.0 / weights_p.float()
+    weights_p = torch.reciprocal(weights_p.float())
    point_to_face = point_to_face * weights_p
    point_dist = point_to_face.sum() / N

--- a/pytorch3d/ops/init.py
+++ b/pytorch3d/ops/init.py
@@ -8,17 +8,14 @@

 from .ball_query import ball_query
 from .cameras_alignment import corresponding_cameras_alignment
-
 from .cubify import cubify
 from .graph_conv import GraphConv
 from .interp_face_attrs import interpolate_face_attributes
 from .iou_box3d import box3d_overlap
 from .knn import knn_gather, knn_points
 from .laplacian_matrices import cot_laplacian, laplacian, norm_laplacian
-
 from .mesh_face_areas_normals import mesh_face_areas_normals
 from .mesh_filtering import taubin_smoothing
-
 from .packed_to_padded import packed_to_padded, padded_to_packed
 from .perspective_n_points import efficient_pnp
 from .points_alignment import corresponding_points_alignment, iterative_closest_point
@@ -30,9 +27,7 @@ from .points_to_volumes import (
    add_pointclouds_to_volumes,
    add_points_features_to_volume_densities_features,
 )
-
 from .sample_farthest_points import sample_farthest_points
-
 from .sample_points_from_meshes import sample_points_from_meshes
 from .subdivide_meshes import SubdivideMeshes
 from .utils import (
@@ -42,7 +37,6 @@ from .utils import (
    is_pointclouds,
    wmean,
 )
-
 from .vert_align import vert_align


--- a/pytorch3d/ops/ball_query.py
+++ b/pytorch3d/ops/ball_query.py
@@ -23,11 +23,13 @@ class _ball_query(Function):
    """

    @staticmethod
-    def forward(ctx, p1, p2, lengths1, lengths2, K, radius):
+    def forward(ctx, p1, p2, lengths1, lengths2, K, radius, skip_points_outside_cube):
        """
        Arguments defintions the same as in the ball_query function
        """
-        idx, dists = _C.ball_query(p1, p2, lengths1, lengths2, K, radius)
+        idx, dists = _C.ball_query(
+            p1, p2, lengths1, lengths2, K, radius, skip_points_outside_cube
+        )
        ctx.save_for_backward(p1, p2, lengths1, lengths2, idx)
        ctx.mark_non_differentiable(idx)
        return dists, idx
@@ -49,7 +51,7 @@ class _ball_query(Function):
        grad_p1, grad_p2 = _C.knn_points_backward(
            p1, p2, lengths1, lengths2, idx, 2, grad_dists
        )
-        return grad_p1, grad_p2, None, None, None, None
+        return grad_p1, grad_p2, None, None, None, None, None


 def ball_query(
@@ -60,6 +62,7 @@ def ball_query(
    K: int = 500,
    radius: float = 0.2,
    return_nn: bool = True,
+    skip_points_outside_cube: bool = False,
 ):
    """
    Ball Query is an alternative to KNN. It can be
@@ -98,6 +101,9 @@ def ball_query(
            within the radius
        radius: the radius around each point within which the neighbors need to be located
        return_nn: If set to True returns the K neighbor points in p2 for each point in p1.
+        skip_points_outside_cube: If set to True, reduce multiplications of float values
+            by not explicitly calculating distances to points that fall outside the
+            D-cube with side length (2*radius) centered at each point in p1.

    Returns:
        dists: Tensor of shape (N, P1, K) giving the squared distances to
@@ -134,7 +140,9 @@ def ball_query(
    if lengths2 is None:
        lengths2 = torch.full((N,), P2, dtype=torch.int64, device=p1.device)

-    dists, idx = _ball_query.apply(p1, p2, lengths1, lengths2, K, radius)
+    dists, idx = _ball_query.apply(
+        p1, p2, lengths1, lengths2, K, radius, skip_points_outside_cube
+    )

    # Gather the neighbors if needed
    points_nn = masked_gather(p2, idx) if return_nn else None
--- a/pytorch3d/ops/cubify.py
+++ b/pytorch3d/ops/cubify.py
@@ -11,9 +11,7 @@ from typing import Optional

 import torch
 import torch.nn.functional as F
-
 from pytorch3d.common.compat import meshgrid_ij
-
 from pytorch3d.structures import Meshes


--- a/pytorch3d/ops/laplacian_matrices.py
+++ b/pytorch3d/ops/laplacian_matrices.py
@@ -47,8 +47,7 @@ def laplacian(verts: torch.Tensor, edges: torch.Tensor) -> torch.Tensor:
    # i.e. A[i, j] = 1 if (i,j) is an edge, or
    # A[e0, e1] = 1 &  A[e1, e0] = 1
    ones = torch.ones(idx.shape[1], dtype=torch.float32, device=verts.device)
-    # pyre-fixme[16]: Module `sparse` has no attribute `FloatTensor`.
-    A = torch.sparse.FloatTensor(idx, ones, (V, V))
+    A = torch.sparse_coo_tensor(idx, ones, (V, V), dtype=torch.float32)

    # the sum of i-th row of A gives the degree of the i-th vertex
    deg = torch.sparse.sum(A, dim=1).to_dense()
@@ -56,21 +55,17 @@ def laplacian(verts: torch.Tensor, edges: torch.Tensor) -> torch.Tensor:
    # We construct the Laplacian matrix by adding the non diagonal values
    # i.e. L[i, j] = 1 ./ deg(i) if (i, j) is an edge
    deg0 = deg[e0]
-    # pyre-fixme[58]: `/` is not supported for operand types `float` and `Tensor`.
-    deg0 = torch.where(deg0 > 0.0, 1.0 / deg0, deg0)
+    deg0 = torch.where(deg0 > 0.0, torch.reciprocal(deg0), deg0)
    deg1 = deg[e1]
-    # pyre-fixme[58]: `/` is not supported for operand types `float` and `Tensor`.
-    deg1 = torch.where(deg1 > 0.0, 1.0 / deg1, deg1)
+    deg1 = torch.where(deg1 > 0.0, torch.reciprocal(deg1), deg1)
    val = torch.cat([deg0, deg1])
-    # pyre-fixme[16]: Module `sparse` has no attribute `FloatTensor`.
-    L = torch.sparse.FloatTensor(idx, val, (V, V))
+    L = torch.sparse_coo_tensor(idx, val, (V, V), dtype=torch.float32)

    # Then we add the diagonal values L[i, i] = -1.
    idx = torch.arange(V, device=verts.device)
    idx = torch.stack([idx, idx], dim=0)
    ones = torch.ones(idx.shape[1], dtype=torch.float32, device=verts.device)
-    # pyre-fixme[16]: Module `sparse` has no attribute `FloatTensor`.
-    L -= torch.sparse.FloatTensor(idx, ones, (V, V))
+    L -= torch.sparse_coo_tensor(idx, ones, (V, V), dtype=torch.float32)

    return L

@@ -126,8 +121,7 @@ def cot_laplacian(
    ii = faces[:, [1, 2, 0]]
    jj = faces[:, [2, 0, 1]]
    idx = torch.stack([ii, jj], dim=0).view(2, F * 3)
-    # pyre-fixme[16]: Module `sparse` has no attribute `FloatTensor`.
-    L = torch.sparse.FloatTensor(idx, cot.view(-1), (V, V))
+    L = torch.sparse_coo_tensor(idx, cot.view(-1), (V, V), dtype=torch.float32)

    # Make it symmetric; this means we are also setting
    # L[v2, v1] = cota
@@ -141,8 +135,7 @@ def cot_laplacian(
    val = torch.stack([area] * 3, dim=1).view(-1)
    inv_areas.scatter_add_(0, idx, val)
    idx = inv_areas > 0
-    # pyre-fixme[58]: `/` is not supported for operand types `float` and `Tensor`.
-    inv_areas[idx] = 1.0 / inv_areas[idx]
+    inv_areas[idx] = torch.reciprocal(inv_areas[idx])
    inv_areas = inv_areas.view(-1, 1)

    return L, inv_areas
@@ -167,7 +160,7 @@ def norm_laplacian(
    v0, v1 = edge_verts[:, 0], edge_verts[:, 1]

    # Side lengths of each edge, of shape (E,)
-    w01 = 1.0 / ((v0 - v1).norm(dim=1) + eps)
+    w01 = torch.reciprocal((v0 - v1).norm(dim=1) + eps)

    # Construct a sparse matrix by basically doing:
    # L[v0, v1] = w01
@@ -175,8 +168,7 @@ def norm_laplacian(
    e01 = edges.t()  # (2, E)

    V = verts.shape[0]
-    # pyre-fixme[16]: Module `sparse` has no attribute `FloatTensor`.
-    L = torch.sparse.FloatTensor(e01, w01, (V, V))
+    L = torch.sparse_coo_tensor(e01, w01, (V, V), dtype=torch.float32)
    L = L + L.t()

    return L
--- a/pytorch3d/ops/sample_farthest_points.py
+++ b/pytorch3d/ops/sample_farthest_points.py
@@ -55,6 +55,7 @@ def sample_farthest_points(
    N, P, D = points.shape
    device = points.device

+    constant_length = lengths is None
    # Validate inputs
    if lengths is None:
        lengths = torch.full((N,), P, dtype=torch.int64, device=device)
@@ -65,7 +66,9 @@ def sample_farthest_points(
            raise ValueError("A value in lengths was too large.")

    # TODO: support providing K as a ratio of the total number of points instead of as an int
+    max_K = -1
    if isinstance(K, int):
+        max_K = K
        K = torch.full((N,), K, dtype=torch.int64, device=device)
    elif isinstance(K, list):
        K = torch.tensor(K, dtype=torch.int64, device=device)
@@ -82,15 +85,19 @@ def sample_farthest_points(
        K = K.to(torch.int64)

    # Generate the starting indices for sampling
-    start_idxs = torch.zeros_like(lengths)
    if random_start_point:
-        for n in range(N):
-            # pyre-fixme[6]: For 1st param expected `int` but got `Tensor`.
-            start_idxs[n] = torch.randint(high=lengths[n], size=(1,)).item()
+        if constant_length:
+            start_idxs = torch.randint(high=P, size=(N,), device=device)
+        else:
+            start_idxs = (lengths * torch.rand(lengths.size(), device=device)).to(
+                torch.int64
+            )
+    else:
+        start_idxs = torch.zeros_like(lengths)

    with torch.no_grad():
        # pyre-fixme[16]: `pytorch3d_._C` has no attribute `sample_farthest_points`.
-        idx = _C.sample_farthest_points(points, lengths, K, start_idxs)
+        idx = _C.sample_farthest_points(points, lengths, K, start_idxs, max_K)
    sampled_points = masked_gather(points, idx)

    return sampled_points, idx
--- a/pytorch3d/ops/sample_points_from_meshes.py
+++ b/pytorch3d/ops/sample_points_from_meshes.py
@@ -16,9 +16,7 @@ import sys
 from typing import Tuple, Union

 import torch
-
 from pytorch3d.ops.mesh_face_areas_normals import mesh_face_areas_normals
-
 from pytorch3d.ops.packed_to_padded import packed_to_padded
 from pytorch3d.renderer.mesh.rasterizer import Fragments as MeshFragments

--- a/pytorch3d/renderer/init.py
+++ b/pytorch3d/renderer/init.py
@@ -69,7 +69,6 @@ from .mesh import (
    TexturesUV,
    TexturesVertex,
 )
-
 from .points import (
    AlphaCompositor,
    NormWeightedCompositor,
--- a/pytorch3d/renderer/camera_conversions.py
+++ b/pytorch3d/renderer/camera_conversions.py
@@ -153,12 +153,12 @@ def _pulsar_from_opencv_projection(
    # Check image sizes.
    image_w = image_size_wh[0, 0]
    image_h = image_size_wh[0, 1]
-    assert torch.all(
-        image_size_wh[:, 0] == image_w
-    ), "All images in a batch must have the same width!"
-    assert torch.all(
-        image_size_wh[:, 1] == image_h
-    ), "All images in a batch must have the same height!"
+    assert torch.all(image_size_wh[:, 0] == image_w), (
+        "All images in a batch must have the same width!"
+    )
+    assert torch.all(image_size_wh[:, 1] == image_h), (
+        "All images in a batch must have the same height!"
+    )
    # Focal length.
    fx = camera_matrix[:, 0, 0].unsqueeze(1)
    fy = camera_matrix[:, 1, 1].unsqueeze(1)
--- a/pytorch3d/renderer/cameras.py
+++ b/pytorch3d/renderer/cameras.py
@@ -629,10 +629,8 @@ class FoVPerspectiveCameras(CamerasBase):
        # so the so the z sign is 1.0.
        z_sign = 1.0

-        # pyre-fixme[58]: `/` is not supported for operand types `float` and `Tensor`.
-        K[:, 0, 0] = 2.0 * znear / (max_x - min_x)
-        # pyre-fixme[58]: `/` is not supported for operand types `float` and `Tensor`.
-        K[:, 1, 1] = 2.0 * znear / (max_y - min_y)
+        K[:, 0, 0] = torch.div(2.0 * znear, max_x - min_x)
+        K[:, 1, 1] = torch.div(2.0 * znear, max_y - min_y)
        K[:, 0, 2] = (max_x + min_x) / (max_x - min_x)
        K[:, 1, 2] = (max_y + min_y) / (max_y - min_y)
        K[:, 3, 2] = z_sign * ones
@@ -1178,9 +1176,7 @@ class PerspectiveCameras(CamerasBase):
        xy_inv_depth = torch.cat(
            # pyre-fixme[6]: For 1st argument expected `Union[List[Tensor],
            #  tuple[Tensor, ...]]` but got `Tuple[Tensor, float]`.
-            # pyre-fixme[58]: `/` is not supported for operand types `float` and
-            #  `Tensor`.
-            (xy_depth[..., :2], 1.0 / xy_depth[..., 2:3]),
+            (xy_depth[..., :2], torch.reciprocal(xy_depth[..., 2:3])),
            dim=-1,  # type: ignore
        )
        return unprojection_transform.transform_points(xy_inv_depth)
--- a/pytorch3d/renderer/mesh/init.py
+++ b/pytorch3d/renderer/mesh/init.py
@@ -12,7 +12,6 @@ from .clip import (
    ClippedFaces,
    convert_clipped_rasterization_to_original_faces,
 )
-
 from .rasterize_meshes import rasterize_meshes
 from .rasterizer import MeshRasterizer, RasterizationSettings
 from .renderer import MeshRenderer, MeshRendererWithFragments
--- a/pytorch3d/renderer/mesh/clip.py
+++ b/pytorch3d/renderer/mesh/clip.py
@@ -434,13 +434,7 @@ def clip_faces(
    # These will then be filled in for each case.
    ###########################################
    F_clipped = (
-        F
-        # pyre-fixme[58]: `+` is not supported for operand types `int` and
-        #  `Union[bool, float, int]`.
-        + faces_delta_cum[-1].item()
-        # pyre-fixme[58]: `+` is not supported for operand types `int` and
-        #  `Union[bool, float, int]`.
-        + faces_delta[-1].item()
+        F + int(faces_delta_cum[-1].item()) + int(faces_delta[-1].item())
    )  # Total number of faces in the new Meshes
    face_verts_clipped = torch.zeros(
        (F_clipped, 3, 3), dtype=face_verts_unclipped.dtype, device=device
--- a/pytorch3d/renderer/mesh/rasterize_meshes.py
+++ b/pytorch3d/renderer/mesh/rasterize_meshes.py
@@ -14,7 +14,6 @@ import torch
 from pytorch3d import _C

 from ..utils import parse_image_size
-
 from .clip import (
    clip_faces,
    ClipFrustum,
--- a/pytorch3d/renderer/mesh/textures.py
+++ b/pytorch3d/renderer/mesh/textures.py
@@ -71,9 +71,7 @@ def _list_to_padded_wrapper(
        # pyre-fixme[6]: For 2nd param expected `int` but got `Union[bool, float, int]`.
        x_reshaped.append(y.reshape(-1, D))
    x_padded = list_to_padded(x_reshaped, pad_size=pad_size, pad_value=pad_value)
-    # pyre-fixme[58]: `+` is not supported for operand types `Tuple[int, int]` and
-    #  `Size`.
-    return x_padded.reshape((N, -1) + reshape_dims)
+    return x_padded.reshape((N, -1) + tuple(reshape_dims))


 def _padded_to_list_wrapper(
@@ -104,9 +102,7 @@ def _padded_to_list_wrapper(
    # pyre-fixme[6]: For 3rd param expected `int` but got `Union[bool, float, int]`.
    x_reshaped = x.reshape(N, M, D)
    x_list = padded_to_list(x_reshaped, split_size=split_size)
-    # pyre-fixme[58]: `+` is not supported for operand types `Tuple[typing.Any]` and
-    #  `Size`.
-    x_list = [xl.reshape((xl.shape[0],) + reshape_dims) for xl in x_list]
+    x_list = [xl.reshape((xl.shape[0],) + tuple(reshape_dims)) for xl in x_list]
    return x_list


@@ -625,9 +621,7 @@ class TexturesAtlas(TexturesBase):
        of length `k`.
        """
        if len(faces_ids_list) != len(self.atlas_list()):
-            raise IndexError(
-                "faces_ids_list must be of " "the same length as atlas_list."
-            )
+            raise IndexError("faces_ids_list must be of the same length as atlas_list.")

        sub_features = []
        for atlas, faces_ids in zip(self.atlas_list(), faces_ids_list):
@@ -1657,7 +1651,7 @@ class TexturesUV(TexturesBase):
            raise NotImplementedError("This function does not support multiple maps.")
        if len(faces_ids_list) != len(self.faces_uvs_padded()):
            raise IndexError(
-                "faces_uvs_padded must be of " "the same length as face_ids_list."
+                "faces_uvs_padded must be of the same length as face_ids_list."
            )

        sub_faces_uvs, sub_verts_uvs, sub_maps = [], [], []
@@ -1871,7 +1865,7 @@ class TexturesVertex(TexturesBase):
        """
        if len(vertex_ids_list) != len(self.verts_features_list()):
            raise IndexError(
-                "verts_features_list must be of " "the same length as vertex_ids_list."
+                "verts_features_list must be of the same length as vertex_ids_list."
            )

        sub_features = []
--- a/pytorch3d/renderer/opengl/opengl_utils.py
+++ b/pytorch3d/renderer/opengl/opengl_utils.py
@@ -24,7 +24,6 @@ from typing import Any, Dict

 os.environ["PYOPENGL_PLATFORM"] = "egl"
 import OpenGL.EGL as egl  # noqa
-
 import pycuda.driver as cuda  # noqa
 from OpenGL._opaque import opaque_pointer_cls  # noqa
 from OpenGL.raw.EGL._errors import EGLError  # noqa
--- a/pytorch3d/renderer/opengl/rasterizer_opengl.py
+++ b/pytorch3d/renderer/opengl/rasterizer_opengl.py
@@ -17,15 +17,12 @@ import numpy as np
 import OpenGL.GL as gl
 import pycuda.gl
 import torch
-
 import torch.nn as nn
-
 from pytorch3d.structures.meshes import Meshes

 from ..cameras import FoVOrthographicCameras, FoVPerspectiveCameras
 from ..mesh.rasterizer import Fragments, RasterizationSettings
 from ..utils import parse_image_size
-
 from .opengl_utils import _torch_to_opengl, global_device_context_store

 # Shader strings, used below to compile an OpenGL program.
--- a/pytorch3d/renderer/points/init.py
+++ b/pytorch3d/renderer/points/init.py
@@ -9,9 +9,7 @@
 import torch

 from .compositor import AlphaCompositor, NormWeightedCompositor
-
 from .pulsar.unified import PulsarPointsRenderer
-
 from .rasterize_points import rasterize_points
 from .rasterizer import PointsRasterizationSettings, PointsRasterizer
 from .renderer import PointsRenderer
--- a/pytorch3d/renderer/points/rasterize_points.py
+++ b/pytorch3d/renderer/points/rasterize_points.py
@@ -11,7 +11,6 @@ from typing import List, Optional, Tuple, Union
 import numpy as np
 import torch
 from pytorch3d import _C
-
 from pytorch3d.renderer.mesh.rasterize_meshes import pix_to_non_square_ndc

 from ..utils import parse_image_size
--- a/pytorch3d/renderer/utils.py
+++ b/pytorch3d/renderer/utils.py
@@ -269,9 +269,7 @@ class TensorProperties(nn.Module):
                        # to have the same shape as the input tensor.
                        new_dims = len(tensor_dims) - len(idx_dims)
                        new_shape = idx_dims + (1,) * new_dims
-                        # pyre-fixme[58]: `+` is not supported for operand types
-                        # `Tuple[int]` and `torch._C.Size`
-                        expand_dims = (-1,) + tensor_dims[1:]
+                        expand_dims = (-1,) + tuple(tensor_dims[1:])
                        _batch_idx = _batch_idx.view(*new_shape)
                        _batch_idx = _batch_idx.expand(*expand_dims)

--- a/pytorch3d/structures/meshes.py
+++ b/pytorch3d/structures/meshes.py
@@ -531,9 +531,9 @@ class Meshes:
            list of tensors of vertices of shape (V_n, 3).
        """
        if self._verts_list is None:
-            assert (
-                self._verts_padded is not None
-            ), "verts_padded is required to compute verts_list."
+            assert self._verts_padded is not None, (
+                "verts_padded is required to compute verts_list."
+            )
            self._verts_list = struct_utils.padded_to_list(
                self._verts_padded, self.num_verts_per_mesh().tolist()
            )
@@ -547,9 +547,9 @@ class Meshes:
            list of tensors of faces of shape (F_n, 3).
        """
        if self._faces_list is None:
-            assert (
-                self._faces_padded is not None
-            ), "faces_padded is required to compute faces_list."
+            assert self._faces_padded is not None, (
+                "faces_padded is required to compute faces_list."
+            )
            self._faces_list = struct_utils.padded_to_list(
                self._faces_padded, self.num_faces_per_mesh().tolist()
            )
@@ -925,9 +925,9 @@ class Meshes:

        verts_list = self.verts_list()
        faces_list = self.faces_list()
-        assert (
-            faces_list is not None and verts_list is not None
-        ), "faces_list and verts_list arguments are required"
+        assert faces_list is not None and verts_list is not None, (
+            "faces_list and verts_list arguments are required"
+        )

        if self.isempty():
            self._faces_padded = torch.zeros(
--- a/pytorch3d/structures/pointclouds.py
+++ b/pytorch3d/structures/pointclouds.py
@@ -433,9 +433,9 @@ class Pointclouds:
            list of tensors of points of shape (P_n, 3).
        """
        if self._points_list is None:
-            assert (
-                self._points_padded is not None
-            ), "points_padded is required to compute points_list."
+            assert self._points_padded is not None, (
+                "points_padded is required to compute points_list."
+            )
            points_list = []
            for i in range(self._N):
                points_list.append(
--- a/pytorch3d/transforms/rotation_conversions.py
+++ b/pytorch3d/transforms/rotation_conversions.py
@@ -52,8 +52,7 @@ def quaternion_to_matrix(quaternions: torch.Tensor) -> torch.Tensor:
        Rotation matrices as tensor of shape (..., 3, 3).
    """
    r, i, j, k = torch.unbind(quaternions, -1)
-    # pyre-fixme[58]: `/` is not supported for operand types `float` and `Tensor`.
-    two_s = 2.0 / (quaternions * quaternions).sum(-1)
+    two_s = torch.div(2.0, (quaternions * quaternions).sum(-1))

    o = torch.stack(
        (
@@ -137,18 +136,18 @@ def matrix_to_quaternion(matrix: torch.Tensor) -> torch.Tensor:
    # we produce the desired quaternion multiplied by each of r, i, j, k
    quat_by_rijk = torch.stack(
        [
-            # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and
-            #  `int`.
-            torch.stack([q_abs[..., 0] ** 2, m21 - m12, m02 - m20, m10 - m01], dim=-1),
-            # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and
-            #  `int`.
-            torch.stack([m21 - m12, q_abs[..., 1] ** 2, m10 + m01, m02 + m20], dim=-1),
-            # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and
-            #  `int`.
-            torch.stack([m02 - m20, m10 + m01, q_abs[..., 2] ** 2, m12 + m21], dim=-1),
-            # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and
-            #  `int`.
-            torch.stack([m10 - m01, m20 + m02, m21 + m12, q_abs[..., 3] ** 2], dim=-1),
+            torch.stack(
+                [torch.square(q_abs[..., 0]), m21 - m12, m02 - m20, m10 - m01], dim=-1
+            ),
+            torch.stack(
+                [m21 - m12, torch.square(q_abs[..., 1]), m10 + m01, m02 + m20], dim=-1
+            ),
+            torch.stack(
+                [m02 - m20, m10 + m01, torch.square(q_abs[..., 2]), m12 + m21], dim=-1
+            ),
+            torch.stack(
+                [m10 - m01, m20 + m02, m21 + m12, torch.square(q_abs[..., 3])], dim=-1
+            ),
        ],
        dim=-2,
    )
@@ -160,9 +159,10 @@ def matrix_to_quaternion(matrix: torch.Tensor) -> torch.Tensor:

    # if not for numerical problems, quat_candidates[i] should be same (up to a sign),
    # forall i; we pick the best-conditioned one (with the largest denominator)
-    out = quat_candidates[
-        F.one_hot(q_abs.argmax(dim=-1), num_classes=4) > 0.5, :
-    ].reshape(batch_dim + (4,))
+    indices = q_abs.argmax(dim=-1, keepdim=True)
+    expand_dims = list(batch_dim) + [1, 4]
+    gather_indices = indices.unsqueeze(-1).expand(expand_dims)
+    out = torch.gather(quat_candidates, -2, gather_indices).squeeze(-2)
    return standardize_quaternion(out)


@@ -293,10 +293,11 @@ def matrix_to_euler_angles(matrix: torch.Tensor, convention: str) -> torch.Tenso
    tait_bryan = i0 != i2
    if tait_bryan:
        central_angle = torch.asin(
-            matrix[..., i0, i2] * (-1.0 if i0 - i2 in [-1, 2] else 1.0)
+            torch.clamp(matrix[..., i0, i2], -1.0, 1.0)
+            * (-1.0 if i0 - i2 in [-1, 2] else 1.0)
        )
    else:
-        central_angle = torch.acos(matrix[..., i0, i0])
+        central_angle = torch.acos(torch.clamp(matrix[..., i0, i0], -1.0, 1.0))

    o = (
        _angle_from_tan(
--- a/pytorch3d/transforms/transform3d.py
+++ b/pytorch3d/transforms/transform3d.py
@@ -623,9 +623,7 @@ class Scale(Transform3d):
        Return the inverse of self._matrix.
        """
        xyz = torch.stack([self._matrix[:, i, i] for i in range(4)], dim=1)
-        # pyre-fixme[58]: `/` is not supported for operand types `float` and `Tensor`.
-        ixyz = 1.0 / xyz
-        # pyre-fixme[6]: For 1st param expected `Tensor` but got `float`.
+        ixyz = torch.reciprocal(xyz)
        imat = torch.diag_embed(ixyz, dim1=1, dim2=2)
        return imat

--- a/pytorch3d/utils/init.py
+++ b/pytorch3d/utils/init.py
@@ -12,11 +12,8 @@ from .camera_conversions import (
    pulsar_from_cameras_projection,
    pulsar_from_opencv_projection,
 )
-
 from .checkerboard import checkerboard
-
 from .ico_sphere import ico_sphere
-
 from .torus import torus


--- a/setup.py
+++ b/setup.py
@@ -75,6 +75,21 @@ def get_extensions():
        ]
        if os.name != "nt":
            nvcc_args.append("-std=c++17")
+
+        # CUDA 13.0+ compatibility flags for pulsar.
+        # Starting with CUDA 13, __global__ function visibility changed.
+        # See: https://developer.nvidia.com/blog/
+        #      cuda-c-compiler-updates-impacting-elf-visibility-and-linkage/
+        cuda_version = torch.version.cuda
+        if cuda_version is not None:
+            major = int(cuda_version.split(".")[0])
+            if major >= 13:
+                nvcc_args.extend(
+                    [
+                        "--device-entity-has-hidden-visibility=false",
+                        "-static-global-template-stub=false",
+                    ]
+                )
        if cub_home is None:
            prefix = os.environ.get("CONDA_PREFIX", None)
            if prefix is not None and os.path.isdir(prefix + "/include/cub"):
@@ -134,7 +149,7 @@ if os.getenv("PYTORCH3D_NO_NINJA", "0") == "1":

    class BuildExtension(torch.utils.cpp_extension.BuildExtension):
        def __init__(self, *args, **kwargs):
-            super().__init__(use_ninja=False, *args, **kwargs)
+            super().__init__(*args, use_ninja=False, **kwargs)

 else:
    BuildExtension = torch.utils.cpp_extension.BuildExtension
--- a/tests/benchmarks/bm_ball_query_large.py
+++ b/tests/benchmarks/bm_ball_query_large.py
@@ -0,0 +1,55 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+from itertools import product
+
+import torch
+from fvcore.common.benchmark import benchmark
+from pytorch3d.ops.ball_query import ball_query
+
+
+def ball_query_square(
+    N: int, P1: int, P2: int, D: int, K: int, radius: float, device: str
+):
+    device = torch.device(device)
+    pts1 = torch.rand(N, P1, D, device=device)
+    pts2 = torch.rand(N, P2, D, device=device)
+    torch.cuda.synchronize()
+
+    def output():
+        ball_query(pts1, pts2, K=K, radius=radius, skip_points_outside_cube=True)
+        torch.cuda.synchronize()
+
+    return output
+
+
+def bm_ball_query() -> None:
+    backends = ["cpu", "cuda:0"]
+
+    kwargs_list = []
+    Ns = [32]
+    P1s = [256]
+    P2s = [2**p for p in range(9, 20, 2)]
+    Ds = [3, 10]
+    Ks = [500]
+    Rs = [0.01, 0.1]
+    test_cases = product(Ns, P1s, P2s, Ds, Ks, Rs, backends)
+    for case in test_cases:
+        N, P1, P2, D, K, R, b = case
+        kwargs_list.append(
+            {"N": N, "P1": P1, "P2": P2, "D": D, "K": K, "radius": R, "device": b}
+        )
+    benchmark(
+        ball_query_square,
+        "BALLQUERY_SQUARE",
+        kwargs_list,
+        num_iters=30,
+        warmup_iters=1,
+    )
+
+
+if __name__ == "__main__":
+    bm_ball_query()
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
generatedunixname1417043136753450	e43ed8c76e	fbcode/vision/fair/pytorch3d/pytorch3d/transforms/rotation_conversions.py Reviewed By: bottler Differential Revision: D93712828 fbshipit-source-id: 3465af450104bb1e5f491e3c0ee0259698cf8ceb	2026-02-22 07:53:20 -08:00
generatedunixname1417043136753450	49f43402c6	fbcode/vision/fair/pytorch3d/pytorch3d/renderer/mesh/textures.py Reviewed By: bottler Differential Revision: D93710616 fbshipit-source-id: 599fe7425066bc85c0999765168788f8df7e34ce	2026-02-22 07:13:45 -08:00
generatedunixname1417043136753450	90646d93ab	fbcode/vision/fair/pytorch3d/pytorch3d/renderer/mesh/clip.py Reviewed By: bottler Differential Revision: D93715239 fbshipit-source-id: 7417015251fe96be72daf4894e946edd43bb9c46	2026-02-22 07:13:09 -08:00
generatedunixname1417043136753450	eabb511410	fbcode/vision/fair/pytorch3d/pytorch3d/loss/mesh_laplacian_smoothing.py Reviewed By: bottler Differential Revision: D93709347 fbshipit-source-id: 69710e6082a0785126a121e26f1d96a571360f1d	2026-02-22 07:08:02 -08:00
generatedunixname1417043136753450	e70188ebbc	fbcode/vision/fair/pytorch3d/pytorch3d/transforms/transform3d.py Reviewed By: bottler Differential Revision: D93713606 fbshipit-source-id: a8aa52328a76d95d3985daec529cdce04ba12bd4	2026-02-22 07:06:34 -08:00
generatedunixname1417043136753450	1bd911d534	fbcode/vision/fair/pytorch3d/pytorch3d/renderer/cameras.py Reviewed By: bottler Differential Revision: D93712137 fbshipit-source-id: 3457f0f9fb7d7baa29be2eaf731074a49bdbb0c8	2026-02-22 07:05:45 -08:00
generatedunixname1417043136753450	3aadd19a2b	fbcode/vision/fair/pytorch3d/pytorch3d/ops/laplacian_matrices.py Reviewed By: bottler Differential Revision: D93708383 fbshipit-source-id: 7576f0c9800ed3d28795e521be5c63799b7e6676	2026-02-22 06:57:57 -08:00
generatedunixname1417043136753450	42d66c1145	fbcode/vision/fair/pytorch3d/pytorch3d/loss/point_mesh_distance.py Reviewed By: bottler Differential Revision: D93708351 fbshipit-source-id: 06a877777e4cb72a497a44ff55db0b6222bda83b	2026-02-22 06:55:36 -08:00
generatedunixname1417043136753450	e9ed1cb178	fbcode/vision/fair/pytorch3d/pytorch3d/renderer/utils.py Reviewed By: bottler Differential Revision: D93708316 fbshipit-source-id: f8ae2432ad34116278b3f7f7de5146b89c3fe63e	2026-02-22 04:09:20 -08:00
Jeremy Reizenstein	cbcae096a0	Add atol=1e-4 to assertClose calls in test_inverse for Translate Summary: Added `atol=1e-4` tolerance parameter to the `assertClose` calls on lines 682 and 683 in the `test_inverse` method of `TestTranslate` class. This is a retry of D90225548 Reviewed By: sgrigory Differential Revision: D90682979 fbshipit-source-id: ac13f000174dd9962326296e1c3116d0d39c7751	2026-01-14 08:57:43 -08:00
generatedunixname537391475639613	5b1cce56bc	Fix for T251460511 ("Your diff, D90498281, broke one test") Reviewed By: sgrigory Differential Revision: D90649493 fbshipit-source-id: 2a77c45ec8e6e5aa0a20437a765fbb9f0b566406	2026-01-14 08:53:26 -08:00
Bowie Chen	0c3b204375	apply Black 25.11.0 style in fbcode (70/92) Summary: Formats the covered files with pyfmt. paintitblack Reviewed By: itamaro Differential Revision: D90476295 fbshipit-source-id: 5101d4aae980a9f8955a4cb10bae23997c48837f	2026-01-12 02:54:36 -08:00
Jeremy Reizenstein	6be5e2da06	Replace assertTrue(torch.allclose(...)) with assertClose in test_transforms.py Summary: ## LLM-generated Summary: Replaces self.assertTrue(torch.allclose(...)) with self.assertClose(...) throughout fbcode/vision/fair/pytorch3d/tests/test_transforms.py. This standardizes numeric closeness assertions for clearer failures and consistency while preserving tolerances and test behavior. --- Session: DEV34970678 Reviewed By: shapovalov Differential Revision: D90251428 fbshipit-source-id: cdae842be82f0ba548802e6977be272134e8508c	2026-01-08 04:35:40 -08:00
Guilherme Albertini	f5f6b78e70	Add initial CUDA 13.0 support for pulsar and pycuda modules Summary: CUDA 13.0 introduced breaking changes that cause build failures in pytorch3d: 1. Symbol Visibility Changes (pulsar) - NVCC now forces `__global__` functions to have hidden ELF visibility by default - `__global__` function template stubs now have internal linkage Fix: Added NVCC flags (`--device-entity-has-hidden-visibility=false` and `-static-global-template-stub=false`) for fbcode builds with CUDA 13.0+. 2. cuCtxCreate API Change (pycuda) - CUDA 13.0 changed `cuCtxCreate` from 3 to 4 arguments - pycuda 2022.2 (current default) uses the old signature and fails to compile - pycuda 2025.1.2 (D83501913) includes the CUDA 13.0 fix Fix: Added CUDA 13.0 constraint to pycuda alias to auto-select pycuda 2025.1.2. NCCL Compatibility Note: - Current stable NCCL (2.25) is NOT compatible with CUDA 13.0 (`cudaTypedefs.h` removed) - NCCL 2.27+ works with CUDA 13.0 and will become stable in early January 2026 (per HPC Comms team) - Until then, CUDA 13.0 builds require `-c hpc_comms.use_nccl=2.27` References: - GitHub issue: https://github.com/facebookresearch/pytorch3d/issues/2011 - NVIDIA blog: https://developer.nvidia.com/blog/cuda-c-compiler-updates-impacting-elf-visibility-and-linkage/ - FBGEMM_GPU fix: D86474263 - pycuda 2025.1.2 buckification: D83501913 Reviewed By: bottler Differential Revision: D88816596 fbshipit-source-id: 1ba666dab8c0e06d1286b8d5bc5d84cfc55c86e6	2025-12-17 10:02:10 -08:00
Jeremy Reizenstein	33824be3cb	version 0.7.9 Reviewed By: shapovalov Differential Revision: D87984194 fbshipit-source-id: dee8123a2c3f5cc34ada52f4663c9bbb329e03a7	2025-11-27 09:52:08 -08:00
Eugene Park	2d4d345b6f	Improve `ball_query()` runtime for large-scale cases (#2006 ) Summary: ### Overview The current C++ code for `pytorch3d.ops.ball_query()` performs floating point multiplication for every coordinate of every pair of points (up until the maximum number of neighbor points is reached). This PR modifies the code (for both CPU and CUDA versions) to implement idea presented [here](https://stackoverflow.com/a/3939525): a `D`-cube around the `D`-ball is first constructed, and any point pairs falling outside the cube are skipped, without explicitly computing the squared distances. This change is especially useful for when the dimension `D` and the number of points `P2` are large and the radius is much smaller than the overall volume of space occupied by the point clouds; as much as ~2.5x speedup (CPU case; ~1.8x speedup in CUDA case) is observed when `D = 10` and `radius = 0.01`. In all benchmark cases, points were uniform randomly distributed inside a unit `D`-cube. The benchmark code used was different from `tests/benchmarks/bm_ball_query.py` (only the forward part is benchmarked, larger input sizes were used) and is stored in `tests/benchmarks/bm_ball_query_large.py`. ### Average time comparisons <img width="360" height="270" alt="cpu-03-0 01-avg" src="https://github.com/user-attachments/assets/6cc79893-7921-44af-9366-1766c3caf142" /> <img width="360" height="270" alt="cuda-03-0 01-avg" src="https://github.com/user-attachments/assets/5151647d-0273-40a3-aac6-8b9399ede18a" /> <img width="360" height="270" alt="cpu-03-0 10-avg" src="https://github.com/user-attachments/assets/a87bc150-a5eb-47cd-a4ba-83c2ec81edaf" /> <img width="360" height="270" alt="cuda-03-0 10-avg" src="https://github.com/user-attachments/assets/e3699a9f-dfd3-4dd3-b3c9-619296186d43" /> <img width="360" height="270" alt="cpu-10-0 01-avg" src="https://github.com/user-attachments/assets/5ec8c32d-8e4d-4ced-a94e-1b816b1cb0f8" /> <img width="360" height="270" alt="cuda-10-0 01-avg" src="https://github.com/user-attachments/assets/168a3dfc-777a-4fb3-8023-1ac8c13985b8" /> <img width="360" height="270" alt="cpu-10-0 10-avg" src="https://github.com/user-attachments/assets/43a57fd6-1e01-4c5e-87a9-8ef604ef5fa0" /> <img width="360" height="270" alt="cuda-10-0 10-avg" src="https://github.com/user-attachments/assets/a7c7cc69-f273-493e-95b8-3ba2bb2e32da" /> ### Peak time comparisons <img width="360" height="270" alt="cpu-03-0 01-peak" src="https://github.com/user-attachments/assets/5bbbea3f-ef9b-490d-ab0d-ce551711d74f" /> <img width="360" height="270" alt="cuda-03-0 01-peak" src="https://github.com/user-attachments/assets/30b5ab9b-45cb-4057-b69f-bda6e76bd1dc" /> <img width="360" height="270" alt="cpu-03-0 10-peak" src="https://github.com/user-attachments/assets/db69c333-e5ac-4305-8a86-a26a8a9fe80d" /> <img width="360" height="270" alt="cuda-03-0 10-peak" src="https://github.com/user-attachments/assets/82549656-1f12-409e-8160-dd4c4c9d14f7" /> <img width="360" height="270" alt="cpu-10-0 01-peak" src="https://github.com/user-attachments/assets/d0be8ef1-535e-47bc-b773-b87fad625bf0" /> <img width="360" height="270" alt="cuda-10-0 01-peak" src="https://github.com/user-attachments/assets/e308e66e-ae30-400f-8ad2-015517f6e1af" /> <img width="360" height="270" alt="cpu-10-0 10-peak" src="https://github.com/user-attachments/assets/c9b5bf59-9cc2-465c-ad5d-d4e23bdd138a" /> <img width="360" height="270" alt="cuda-10-0 10-peak" src="https://github.com/user-attachments/assets/311354d4-b488-400c-a1dc-c85a21917aa9" /> ### Full benchmark logs [benchmark-before-change.txt](https://github.com/user-attachments/files/22978300/benchmark-before-change.txt) [benchmark-after-change.txt](https://github.com/user-attachments/files/22978299/benchmark-after-change.txt) Pull Request resolved: https://github.com/facebookresearch/pytorch3d/pull/2006 Reviewed By: shapovalov Differential Revision: D85356394 Pulled By: bottler fbshipit-source-id: 9b3ce5fc87bb73d4323cc5b4190fc38ae42f41b2	2025-10-30 05:01:32 -07:00
Nikita Lutsenko	45df20e9e2	clang-format \| Format fbsource with clang-format 21. Reviewed By: ChristianK275 Differential Revision: D85317706 fbshipit-source-id: b399c5c4b75252999442b7d7d2778e7a241b0025	2025-10-26 23:40:59 -07:00
Jeremy Reizenstein	fc6a6b8951	separate multigpu tests Reviewed By: MichaelRamamonjisoa Differential Revision: D83477594 fbshipit-source-id: 5ea67543e288e9a06ee5141f436e879aa5cfb7f3	2025-10-09 08:17:20 -07:00
Kihyuk Sohn	7711bf34a8	fix device error Summary: When using `sample_farthest_points` with `lengths`, it throws an error because of the device mismatch between `lengths` and `torch.rand(lengths.size())` on GPU. Reviewed By: bottler Differential Revision: D82378997 fbshipit-source-id: 8e929256177d543d1dd1249e8488f70e03e4101f	2025-09-15 06:41:00 -07:00
Jeremy Reizenstein	d098beb7a7	allow python 3.12 Summary: Remove use of distutils Reviewed By: MichaelRamamonjisoa Differential Revision: D81594552 fbshipit-source-id: 4e979d5e03ea873bd09bc2b674b7e6480b9c6d65	2025-09-04 08:31:32 -07:00
Jeremy Reizenstein	dd068703d1	test fixes Summary: Some random seed changes. Skip multigpu tests when there's only one gpu. This is a better fix for what AI is doing in D80600882. Reviewed By: MichaelRamamonjisoa Differential Revision: D80625966 fbshipit-source-id: ac3952e7144125fd3a05ad6e4e6e5976ae10a8ef	2025-08-27 06:55:50 -07:00
Antoine Dumoulin	50f8efa1cb	Use sparse_coo_tensor in laplacian_matrices.py (#1991 ) Summary: update obsolete torch.sparse.FloatTensor to torch.sparse_coo_tensor Pull Request resolved: https://github.com/facebookresearch/pytorch3d/pull/1991 Reviewed By: MichaelRamamonjisoa Differential Revision: D80084359 Pulled By: bottler fbshipit-source-id: dc6c7a90211113d1ce5338a92c8c0030bfe12e65	2025-08-13 07:55:57 -07:00
Olga Gerasimova	5043d15361	avoid CPU/GPU sync in sample_farthest_points Summary: Optimizing sample_farthest_poinst by reducing CPU/GPU sync: 1. replacing iterative randint for starting indexes for 1 function call, if length is constant 2. Avoid sync in fetching maxumum of sample points, if we sample the same amount 3. Initializing 1 tensor for samples and indixes compare https://fburl.com/mlhub/7wk0xi98 Before {F1980383703} after {F1980383707} Histogram match pretty closely {F1980464338} Reviewed By: bottler Differential Revision: D78731869 fbshipit-source-id: 060528ae7a1e0fbbd005d129c151eaf9405841de	2025-07-23 10:23:40 -07:00
Stone Tao	e3d3a67a89	Clamp matrices in matrix_to_euler_angles function (#1989 ) Summary: Closes https://github.com/facebookresearch/pytorch3d/issues/1988 Credit goes to tylerlum for raising this issue and suggesting this fix in https://github.com/haosulab/ManiSkill/pull/1090 Pull Request resolved: https://github.com/facebookresearch/pytorch3d/pull/1989 Reviewed By: MichaelRamamonjisoa Differential Revision: D78021983 Pulled By: bottler fbshipit-source-id: d723f1924a399f4d7fd072e96ea740ae73cf280f	2025-07-10 06:08:19 -07:00
Jeremy Reizenstein	e55ea90609	disable import tests Summary: these tests don't work, aren't needed right now Reviewed By: MichaelRamamonjisoa Differential Revision: D78084742 fbshipit-source-id: 9cff2b30427dec314e34e81179816af4073bbe23	2025-07-10 05:20:22 -07:00
Melvin He	3aee2a6005	Fixes bus error hard crashes on Apple Silicon MPS devices Summary: Fixes hard crashes (bus errors) when using MPS device (Apple Silicon) by implementing CPU checks throughout files in csrc subdirectories to check if on same mesh on a CPU device. Note that this is the fourth and ultimate part of a larger change through multiple files & directories. Reviewed By: bottler Differential Revision: D77698176 fbshipit-source-id: 5bc9e3c5cea61afd486aed7396f390d92775ec6d	2025-07-03 12:34:37 -07:00
Melvin He	c5ea8fa49e	Adds CHECK_CPU macros checks for tensors not on CPU Summary: Adds CHECK_CPU macros that checks if a tensor is on the CPU device throughout csrc directories and subdir up to `pulsar`. Note that this is the third part of a larger change, and to keep diffs better organized, subsequent diffs will update the remaining directories. Reviewed By: bottler Differential Revision: D77696998 fbshipit-source-id: 470ca65b23d9965483b5bdd30c712da8e1131787	2025-07-03 08:29:36 -07:00
Melvin He	3ff6c5ab85	Error instead of crash for tensors on exotic devices Summary: Adds CHECK_CPU macros that checks if a tensor is on the CPU device throughout csrc directories up to `marching_cubes`. Directories updated include those in `gather_scatter`, `interp_face_attrs`, `iou_box3d`, `knn`, and `marching_cubes`. Note that this is the second part of a larger change, and to keep diffs better organized, subsequent diffs will update the remaining directories. Reviewed By: bottler Differential Revision: D77558550 fbshipit-source-id: 762a0fe88548dc8d0901b198a11c40d0c36e173f	2025-07-01 09:14:38 -07:00
Srivathsan Govindarajan	267bd8ef87	Revert `_sqrt_positive_part` change Reviewed By: bottler Differential Revision: D77549647 fbshipit-source-id: a0ef0bc015c643ad7416c781886e2e23b5105bdd	2025-06-30 14:13:27 -07:00
Melvin He	177eec6378	Error instead of crash for tensors on exotic devices (#1986 ) Summary: Pull Request resolved: https://github.com/facebookresearch/pytorch3d/pull/1986 Adds device checks to prevent crashes on unsupported devices in PyTorch3D. Updates the `pytorch3d_cutils.h` file to include new macro CHECK_CPU that checks if a tensor is on the CPU device. This macro is then used in the directories from `ball_query` to `face_area_normals` to ensure that tensors are not on unsupported devices like MPS. Note that this is the first part of a larger change, and to keep diffs better organized, subsequent diffs will update the remaining directories. Reviewed By: bottler Differential Revision: D77473296 fbshipit-source-id: 13dc84620dee667bddebad1dade2d2cb5a59c737	2025-06-30 12:27:38 -07:00
Srivathsan Govindarajan	71db7a0ea2	Removing dynamic shape ops and boolean indexing in matrix_to_quaternion Summary: The current implementation of `matrix_to_quaternion` and `_sqrt_positive_part` uses boolean indexing, which can slow down performance and cause incompatibility with `torch.compile` unless `torch._dynamo.config.capture_dynamic_output_shape_ops` is set to `True`. To enhance performance and compatibility, I recommend using `torch.gather` to select the best-conditioned quaternions and `F.relu` instead of `x>0` (bottler's suggestion) For a detailed comparison of the implementation differences when using `torch.compile`, please refer to my Bento notebook N7438339. Reviewed By: bottler Differential Revision: D77176230 fbshipit-source-id: 9a6a2e0015b5865056297d5f45badc3c425b93ce	2025-06-25 01:18:46 -07:00
Grace Cheng	6020323d94	Fix Self-Assignment in CUDA Stream Parameter in renderer.forward.device.h Summary: Resolved self-assignment warnings in the `renderer.forward.device.h` file by removing redundant assignments of the `stream` variable to itself in `cub::DeviceSelect::Flagged` function calls. This change eliminates compiler errors and ensures cleaner, more efficient code execution. Reviewed By: bottler Differential Revision: D76554140 fbshipit-source-id: 28eae0186246f51a8ac8002644f184349aa49560	2025-06-13 11:00:16 -07:00
Emmanuel Ferdman	182e845c19	Resolve logger warnings (#1981 ) Summary: # PR Summary This small PR resolves the annoying deprecation warnings of the `logger` library: ```python DeprecationWarning: The 'warn' method is deprecated, use 'warning' instead ``` Pull Request resolved: https://github.com/facebookresearch/pytorch3d/pull/1981 Reviewed By: MichaelRamamonjisoa Differential Revision: D75287169 Pulled By: bottler fbshipit-source-id: 9ff9f5dd648aca8d8bb5d33577909da711d18647	2025-06-10 02:27:54 -07:00
generatedunixname89002005287564	f315ac131b	Fix CQS signal facebook-unused-include-check in fbcode/vision/fair/pytorch3d/pytorch3d/csrc Reviewed By: dtolnay Differential Revision: D75938951 fbshipit-source-id: 8e4f9ce82ec988a30e4c8d54881b78560ceab0e0	2025-06-04 13:09:58 -07:00
Nick Riasanovsky	fc08621879	Fix distutils failure in Triton Beta testing Summary: Fixes the distutils issues similar to D73934713 Reviewed By: bottler Differential Revision: D75631611 fbshipit-source-id: 09c354d8cc51ff2c46f4688d7f674370e3f48f1e	2025-05-29 18:18:49 -07:00
generatedunixname89002005287564	3f327a516b	Fix CQS signal facebook-unused-include-check in fbcode/vision/fair/pytorch3d/pytorch3d/csrc/pulsar Reviewed By: dtolnay Differential Revision: D75209078 fbshipit-source-id: 6b67d3354091d18b8171a6f4b38465ffcc9e17c5	2025-05-26 19:14:57 -07:00
Ting Xu	366eff21d9	Fix PyTorch3D build failure on windows Summary: Replace #defines by typedefs by following the instructions at https://github.com/facebookresearch/pytorch3d/issues/1970?fbclid=IwY2xjawKZqMJleHRuA2FlbQIxMQBicmlkETFyWFczV2hMVmdOczJWellIAR7jxI6zGQiC5ag-FUXjSK12ljn7rmbMKc3HsLX-BC1TMpOUTJy-bsZxmfKzmw_aem_MIG_nc3eg7LL1o2fSAbl0A#issuecomment-2894339456 Reviewed By: bottler Differential Revision: D75083182 fbshipit-source-id: 7131fe555bb0da615b341e77ddd8761ebce9d7eb	2025-05-21 07:46:49 -07:00
Jeff Daily	0a59450f0e	remove IntWrapper (#1964 ) Summary: I could not access https://github.com/NVlabs/cub/issues/172 to understand whether IntWrapper was still necessary but the comment is from 5 years ago and causes problems for the ROCm build. Pull Request resolved: https://github.com/facebookresearch/pytorch3d/pull/1964 Reviewed By: MichaelRamamonjisoa Differential Revision: D71937895 Pulled By: bottler fbshipit-source-id: 5e0351e1bd8599b670436cd3464796eca33156f6	2025-03-28 08:16:54 -07:00