Run tests in github action not circleci (#1896 )

Summary: Pull Request resolved: https://github.com/facebookresearch/pytorch3d/pull/1896 Differential Revision: D65272512 Pulled By: bottler
2026-04-16 18:05:59 +08:00 · 2024-10-31 08:41:20 -07:00
197 changed files with 823 additions and 1454 deletions
--- a/.circleci/regenerate.py
+++ b/.circleci/regenerate.py
@@ -88,6 +88,7 @@ def workflow_pair(
    upload=False,
    filter_branch,
 ):
+
    w = []
    py = python_version.replace(".", "")
    pyt = pytorch_version.replace(".", "")
@@ -126,6 +127,7 @@ def generate_base_workflow(
    btype,
    filter_branch=None,
 ):
+
    d = {
        "name": base_workflow_name,
        "python_version": python_version,
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -3,9 +3,6 @@ on:
  pull_request:
    branches:
      - main
-  push:
-    branches:
-      - main
 jobs:
  binary_linux_conda_cuda:
    runs-on: 4-core-ubuntu-gpu-t4
--- a/dev/linter.sh
+++ b/dev/linter.sh
@@ -10,7 +10,7 @@
 DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 DIR=$(dirname "${DIR}")

-if [[ -f "${DIR}/BUCK" ]]
+if [[ -f "${DIR}/TARGETS" ]]
 then
  pyfmt "${DIR}"
 else
@@ -36,5 +36,5 @@ then

  echo "Running pyre..."
  echo "To restart/kill pyre server, run 'pyre restart' or 'pyre kill' in fbcode/"
-  ( cd ~/fbsource/fbcode; arc pyre check //vision/fair/pytorch3d/... )
+  ( cd ~/fbsource/fbcode; pyre -l vision/fair/pytorch3d/ )
 fi
--- a/docs/examples/pulsar_basic.py
+++ b/docs/examples/pulsar_basic.py
@@ -10,7 +10,6 @@ This example demonstrates the most trivial, direct interface of the pulsar
 sphere renderer. It renders and saves an image with 10 random spheres.
 Output: basic.png.
 """
-
 import logging
 import math
 from os import path
--- a/docs/examples/pulsar_basic_unified.py
+++ b/docs/examples/pulsar_basic_unified.py
@@ -11,7 +11,6 @@ interface for sphere renderering. It renders and saves an image with
 10 random spheres.
 Output: basic-pt3d.png.
 """
-
 import logging
 from os import path

--- a/docs/examples/pulsar_cam.py
+++ b/docs/examples/pulsar_cam.py
@@ -14,7 +14,6 @@ distorted. Gradient-based optimization is used to converge towards the
 original camera parameters.
 Output: cam.gif.
 """
-
 import logging
 import math
 from os import path
--- a/docs/examples/pulsar_cam_unified.py
+++ b/docs/examples/pulsar_cam_unified.py
@@ -14,7 +14,6 @@ distorted. Gradient-based optimization is used to converge towards the
 original camera parameters.
 Output: cam-pt3d.gif
 """
-
 import logging
 from os import path

--- a/docs/examples/pulsar_multiview.py
+++ b/docs/examples/pulsar_multiview.py
@@ -18,7 +18,6 @@ This example is not available yet through the 'unified' interface,
 because opacity support has not landed in PyTorch3D for general data
 structures yet.
 """
-
 import logging
 import math
 from os import path
--- a/docs/examples/pulsar_optimization.py
+++ b/docs/examples/pulsar_optimization.py
@@ -13,7 +13,6 @@ The scene is initialized with random spheres. Gradient-based
 optimization is used to converge towards a faithful
 scene representation.
 """
-
 import logging
 import math

--- a/docs/examples/pulsar_optimization_unified.py
+++ b/docs/examples/pulsar_optimization_unified.py
@@ -13,7 +13,6 @@ The scene is initialized with random spheres. Gradient-based
 optimization is used to converge towards a faithful
 scene representation.
 """
-
 import logging
 import math

--- a/packaging/pytorch3d/meta.yaml
+++ b/packaging/pytorch3d/meta.yaml
@@ -32,6 +32,7 @@ requirements:

 build:
  string: py{{py}}_{{ environ['CU_VERSION'] }}_pyt{{ environ['PYTORCH_VERSION_NODOT']}}
+  # script: LD_LIBRARY_PATH=$PREFIX/lib:$BUILD_PREFIX/lib:$LD_LIBRARY_PATH python setup.py install --single-version-externally-managed --record=record.txt # [not win]
  script: python setup.py install --single-version-externally-managed --record=record.txt # [not win]
  script_env:
    - CUDA_HOME
@@ -56,6 +57,7 @@ test:
    - pandas
    - sqlalchemy
  commands:
+    #pytest .
    python -m unittest discover -v -s tests -t .


--- a/projects/implicitron_trainer/experiment.py
+++ b/projects/implicitron_trainer/experiment.py
@@ -7,7 +7,7 @@

 # pyre-unsafe

-""" "
+""""
 This file is the entry point for launching experiments with Implicitron.

 Launch Training
@@ -44,7 +44,6 @@ The outputs of the experiment are saved and logged in multiple ways:
        config file.

 """
-
 import logging
 import os
 import warnings
--- a/projects/implicitron_trainer/impl/model_factory.py
+++ b/projects/implicitron_trainer/impl/model_factory.py
@@ -26,6 +26,7 @@ logger = logging.getLogger(__name__)


 class ModelFactoryBase(ReplaceableBase):
+
    resume: bool = True  # resume from the last checkpoint

    def __call__(self, **kwargs) -> ImplicitronModelBase:
@@ -115,9 +116,7 @@ class ImplicitronModelFactory(ModelFactoryBase):
                        "cuda:%d" % 0: "cuda:%d" % accelerator.local_process_index
                    }
                model_state_dict = torch.load(
-                    model_io.get_model_path(model_path),
-                    map_location=map_location,
-                    weights_only=True,
+                    model_io.get_model_path(model_path), map_location=map_location
                )

                try:
--- a/projects/implicitron_trainer/impl/optimizer_factory.py
+++ b/projects/implicitron_trainer/impl/optimizer_factory.py
@@ -123,7 +123,6 @@ class ImplicitronOptimizerFactory(OptimizerFactoryBase):
        """
        # Get the parameters to optimize
        if hasattr(model, "_get_param_groups"):  # use the model function
-            # pyre-fixme[29]: `Union[Tensor, Module]` is not a function.
            p_groups = model._get_param_groups(self.lr, wd=self.weight_decay)
        else:
            p_groups = [
@@ -242,7 +241,7 @@ class ImplicitronOptimizerFactory(OptimizerFactoryBase):
                    map_location = {
                        "cuda:%d" % 0: "cuda:%d" % accelerator.local_process_index
                    }
-                optimizer_state = torch.load(opt_path, map_location, weights_only=True)
+                optimizer_state = torch.load(opt_path, map_location)
            else:
                raise FileNotFoundError(f"Optimizer state {opt_path} does not exist.")
        return optimizer_state
--- a/projects/implicitron_trainer/impl/training_loop.py
+++ b/projects/implicitron_trainer/impl/training_loop.py
@@ -161,6 +161,7 @@ class ImplicitronTrainingLoop(TrainingLoopBase):
        for epoch in range(start_epoch, self.max_epochs):
            # automatic new_epoch and plotting of stats at every epoch start
            with stats:
+
                # Make sure to re-seed random generators to ensure reproducibility
                # even after restart.
                seed_all_random_engines(seed + epoch)
@@ -394,7 +395,6 @@ class ImplicitronTrainingLoop(TrainingLoopBase):
            ):
                prefix = f"e{stats.epoch}_it{stats.it[trainmode]}"
                if hasattr(model, "visualize"):
-                    # pyre-fixme[29]: `Union[Tensor, Module]` is not a function.
                    model.visualize(
                        viz,
                        visdom_env_imgs,
--- a/projects/implicitron_trainer/tests/test_experiment.py
+++ b/projects/implicitron_trainer/tests/test_experiment.py
@@ -53,8 +53,12 @@ class TestExperiment(unittest.TestCase):
        cfg.data_source_ImplicitronDataSource_args.dataset_map_provider_class_type = (
            "JsonIndexDatasetMapProvider"
        )
-        dataset_args = cfg.data_source_ImplicitronDataSource_args.dataset_map_provider_JsonIndexDatasetMapProvider_args
-        dataloader_args = cfg.data_source_ImplicitronDataSource_args.data_loader_map_provider_SequenceDataLoaderMapProvider_args
+        dataset_args = (
+            cfg.data_source_ImplicitronDataSource_args.dataset_map_provider_JsonIndexDatasetMapProvider_args
+        )
+        dataloader_args = (
+            cfg.data_source_ImplicitronDataSource_args.data_loader_map_provider_SequenceDataLoaderMapProvider_args
+        )
        dataset_args.category = "skateboard"
        dataset_args.test_restrict_sequence_id = 0
        dataset_args.dataset_root = "manifold://co3d/tree/extracted"
@@ -90,8 +94,12 @@ class TestExperiment(unittest.TestCase):
        cfg.data_source_ImplicitronDataSource_args.dataset_map_provider_class_type = (
            "JsonIndexDatasetMapProvider"
        )
-        dataset_args = cfg.data_source_ImplicitronDataSource_args.dataset_map_provider_JsonIndexDatasetMapProvider_args
-        dataloader_args = cfg.data_source_ImplicitronDataSource_args.data_loader_map_provider_SequenceDataLoaderMapProvider_args
+        dataset_args = (
+            cfg.data_source_ImplicitronDataSource_args.dataset_map_provider_JsonIndexDatasetMapProvider_args
+        )
+        dataloader_args = (
+            cfg.data_source_ImplicitronDataSource_args.data_loader_map_provider_SequenceDataLoaderMapProvider_args
+        )
        dataset_args.category = "skateboard"
        dataset_args.test_restrict_sequence_id = 0
        dataset_args.dataset_root = "manifold://co3d/tree/extracted"
@@ -103,7 +111,9 @@ class TestExperiment(unittest.TestCase):
        cfg.training_loop_ImplicitronTrainingLoop_args.max_epochs = 2
        cfg.training_loop_ImplicitronTrainingLoop_args.store_checkpoints = False
        cfg.optimizer_factory_ImplicitronOptimizerFactory_args.lr_policy = "Exponential"
-        cfg.optimizer_factory_ImplicitronOptimizerFactory_args.exponential_lr_step_size = 2
+        cfg.optimizer_factory_ImplicitronOptimizerFactory_args.exponential_lr_step_size = (
+            2
+        )

        if DEBUG:
            experiment.dump_cfg(cfg)
--- a/projects/implicitron_trainer/tests/test_optimizer_factory.py
+++ b/projects/implicitron_trainer/tests/test_optimizer_factory.py
@@ -81,9 +81,8 @@ class TestOptimizerFactory(unittest.TestCase):

    def test_param_overrides_self_param_group_assignment(self):
        pa, pb, pc = [torch.nn.Parameter(data=torch.tensor(i * 1.0)) for i in range(3)]
-        na, nb = (
-            Node(params=[pa]),
-            Node(params=[pb], param_groups={"self": "pb_self", "p1": "pb_param"}),
+        na, nb = Node(params=[pa]), Node(
+            params=[pb], param_groups={"self": "pb_self", "p1": "pb_param"}
        )
        root = Node(children=[na, nb], params=[pc], param_groups={"m1": "pb_member"})
        param_groups = self._get_param_groups(root)
--- a/projects/nerf/nerf/dataset.py
+++ b/projects/nerf/nerf/dataset.py
@@ -84,9 +84,9 @@ def get_nerf_datasets(

    if autodownload and any(not os.path.isfile(p) for p in (cameras_path, image_path)):
        # Automatically download the data files if missing.
-        download_data([dataset_name], data_root=data_root)
+        download_data((dataset_name,), data_root=data_root)

-    train_data = torch.load(cameras_path, weights_only=True)
+    train_data = torch.load(cameras_path)
    n_cameras = train_data["cameras"]["R"].shape[0]

    _image_max_image_pixels = Image.MAX_IMAGE_PIXELS
--- a/projects/nerf/nerf/stats.py
+++ b/projects/nerf/nerf/stats.py
@@ -194,6 +194,7 @@ class Stats:
        it = self.it[stat_set]

        for stat in self.log_vars:
+
            if stat not in self.stats[stat_set]:
                self.stats[stat_set][stat] = AverageMeter()

--- a/projects/nerf/test_nerf.py
+++ b/projects/nerf/test_nerf.py
@@ -24,6 +24,7 @@ CONFIG_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "configs"

@hydra.main(config_path=CONFIG_DIR, config_name="lego")
 def main(cfg: DictConfig):
+
    # Device on which to run.
    if torch.cuda.is_available():
        device = "cuda"
@@ -62,7 +63,7 @@ def main(cfg: DictConfig):
        raise ValueError(f"Model checkpoint {checkpoint_path} does not exist!")

    print(f"Loading checkpoint {checkpoint_path}.")
-    loaded_data = torch.load(checkpoint_path, weights_only=True)
+    loaded_data = torch.load(checkpoint_path)
    # Do not load the cached xy grid.
    # - this allows setting an arbitrary evaluation image size.
    state_dict = {
--- a/projects/nerf/tests/test_raysampler.py
+++ b/projects/nerf/tests/test_raysampler.py
@@ -42,6 +42,7 @@ class TestRaysampler(unittest.TestCase):
        cameras, rays = [], []

        for _ in range(batch_size):
+
            R = random_rotations(1)
            T = torch.randn(1, 3)
            focal_length = torch.rand(1, 2) + 0.5
--- a/projects/nerf/train_nerf.py
+++ b/projects/nerf/train_nerf.py
@@ -25,6 +25,7 @@ CONFIG_DIR = os.path.join(os.path.dirname(os.path.realpath(__file__)), "configs"

@hydra.main(config_path=CONFIG_DIR, config_name="lego")
 def main(cfg: DictConfig):
+
    # Set the relevant seeds for reproducibility.
    np.random.seed(cfg.seed)
    torch.manual_seed(cfg.seed)
@@ -76,7 +77,7 @@ def main(cfg: DictConfig):
        # Resume training if requested.
        if cfg.resume and os.path.isfile(checkpoint_path):
            print(f"Resuming from checkpoint {checkpoint_path}.")
-            loaded_data = torch.load(checkpoint_path, weights_only=True)
+            loaded_data = torch.load(checkpoint_path)
            model.load_state_dict(loaded_data["model"])
            stats = pickle.loads(loaded_data["stats"])
            print(f"   => resuming from epoch {stats.epoch}.")
@@ -218,6 +219,7 @@ def main(cfg: DictConfig):

        # Validation
        if epoch % cfg.validation_epoch_interval == 0 and epoch > 0:
+
            # Sample a validation camera/image.
            val_batch = next(val_dataloader.__iter__())
            val_image, val_camera, camera_idx = val_batch[0].values()
--- a/pytorch3d/init.py
+++ b/pytorch3d/init.py
@@ -6,4 +6,4 @@

 # pyre-unsafe

-__version__ = "0.7.9"
+__version__ = "0.7.8"
--- a/pytorch3d/common/compat.py
+++ b/pytorch3d/common/compat.py
@@ -17,7 +17,7 @@ Some functions which depend on PyTorch or Python versions.


 def meshgrid_ij(
-    *A: Union[torch.Tensor, Sequence[torch.Tensor]],
+    *A: Union[torch.Tensor, Sequence[torch.Tensor]]
 ) -> Tuple[torch.Tensor, ...]:  # pragma: no cover
    """
    Like torch.meshgrid was before PyTorch 1.10.0, i.e. with indexing set to ij
--- a/pytorch3d/csrc/ball_query/ball_query.cu
+++ b/pytorch3d/csrc/ball_query/ball_query.cu
@@ -32,9 +32,7 @@ __global__ void BallQueryKernel(
    at::PackedTensorAccessor64<int64_t, 3, at::RestrictPtrTraits> idxs,
    at::PackedTensorAccessor64<scalar_t, 3, at::RestrictPtrTraits> dists,
    const int64_t K,
-    const float radius,
-    const float radius2,
-    const bool skip_points_outside_cube) {
+    const float radius2) {
  const int64_t N = p1.size(0);
  const int64_t chunks_per_cloud = (1 + (p1.size(1) - 1) / blockDim.x);
  const int64_t chunks_to_do = N * chunks_per_cloud;
@@ -53,19 +51,7 @@ __global__ void BallQueryKernel(
    // Iterate over points in p2 until desired count is reached or
    // all points have been considered
    for (int64_t j = 0, count = 0; j < lengths2[n] && count < K; ++j) {
-      if (skip_points_outside_cube) {
-        bool is_within_radius = true;
-        // Filter when any one coordinate is already outside the radius
-        for (int d = 0; is_within_radius && d < D; ++d) {
-          scalar_t abs_diff = fabs(p1[n][i][d] - p2[n][j][d]);
-          is_within_radius = (abs_diff <= radius);
-        }
-        if (!is_within_radius) {
-          continue;
-        }
-      }
-
-      // Else, calculate the distance between the points and compare
+      // Calculate the distance between the points
      scalar_t dist2 = 0.0;
      for (int d = 0; d < D; ++d) {
        scalar_t diff = p1[n][i][d] - p2[n][j][d];
@@ -91,8 +77,7 @@ std::tuple<at::Tensor, at::Tensor> BallQueryCuda(
    const at::Tensor& lengths1, // (N,)
    const at::Tensor& lengths2, // (N,)
    int K,
-    float radius,
-    bool skip_points_outside_cube) {
+    float radius) {
  // Check inputs are on the same device
  at::TensorArg p1_t{p1, "p1", 1}, p2_t{p2, "p2", 2},
      lengths1_t{lengths1, "lengths1", 3}, lengths2_t{lengths2, "lengths2", 4};
@@ -135,9 +120,7 @@ std::tuple<at::Tensor, at::Tensor> BallQueryCuda(
            idxs.packed_accessor64<int64_t, 3, at::RestrictPtrTraits>(),
            dists.packed_accessor64<float, 3, at::RestrictPtrTraits>(),
            K_64,
-            radius,
-            radius2,
-            skip_points_outside_cube);
+            radius2);
      }));

  AT_CUDA_CHECK(cudaGetLastError());
--- a/pytorch3d/csrc/ball_query/ball_query.h
+++ b/pytorch3d/csrc/ball_query/ball_query.h
@@ -25,9 +25,6 @@
 //      within the radius
 //    radius: the radius around each point within which the neighbors need to be
 //      located
-//    skip_points_outside_cube: If true, reduce multiplications of float values
-//      by not explicitly calculating distances to points that fall outside the
-//      D-cube with side length (2*radius) centered at each point in p1.
 //
 // Returns:
 //    p1_neighbor_idx: LongTensor of shape (N, P1, K), where
@@ -49,8 +46,7 @@ std::tuple<at::Tensor, at::Tensor> BallQueryCpu(
    const at::Tensor& lengths1,
    const at::Tensor& lengths2,
    const int K,
-    const float radius,
-    const bool skip_points_outside_cube);
+    const float radius);

 // CUDA implementation
 std::tuple<at::Tensor, at::Tensor> BallQueryCuda(
@@ -59,8 +55,7 @@ std::tuple<at::Tensor, at::Tensor> BallQueryCuda(
    const at::Tensor& lengths1,
    const at::Tensor& lengths2,
    const int K,
-    const float radius,
-    const bool skip_points_outside_cube);
+    const float radius);

 // Implementation which is exposed
 // Note: the backward pass reuses the KNearestNeighborBackward kernel
@@ -70,8 +65,7 @@ inline std::tuple<at::Tensor, at::Tensor> BallQuery(
    const at::Tensor& lengths1,
    const at::Tensor& lengths2,
    int K,
-    float radius,
-    bool skip_points_outside_cube) {
+    float radius) {
  if (p1.is_cuda() || p2.is_cuda()) {
 #ifdef WITH_CUDA
    CHECK_CUDA(p1);
@@ -82,20 +76,16 @@ inline std::tuple<at::Tensor, at::Tensor> BallQuery(
        lengths1.contiguous(),
        lengths2.contiguous(),
        K,
-        radius,
-        skip_points_outside_cube);
+        radius);
 #else
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
-  CHECK_CPU(p1);
-  CHECK_CPU(p2);
  return BallQueryCpu(
      p1.contiguous(),
      p2.contiguous(),
      lengths1.contiguous(),
      lengths2.contiguous(),
      K,
-      radius,
-      skip_points_outside_cube);
+      radius);
 }
--- a/pytorch3d/csrc/ball_query/ball_query_cpu.cpp
+++ b/pytorch3d/csrc/ball_query/ball_query_cpu.cpp
@@ -6,8 +6,8 @@
 * LICENSE file in the root directory of this source tree.
 */

-#include <math.h>
 #include <torch/extension.h>
+#include <queue>
 #include <tuple>

 std::tuple<at::Tensor, at::Tensor> BallQueryCpu(
@@ -16,8 +16,7 @@ std::tuple<at::Tensor, at::Tensor> BallQueryCpu(
    const at::Tensor& lengths1,
    const at::Tensor& lengths2,
    int K,
-    float radius,
-    bool skip_points_outside_cube) {
+    float radius) {
  const int N = p1.size(0);
  const int P1 = p1.size(1);
  const int D = p1.size(2);
@@ -39,16 +38,6 @@ std::tuple<at::Tensor, at::Tensor> BallQueryCpu(
    const int64_t length2 = lengths2_a[n];
    for (int64_t i = 0; i < length1; ++i) {
      for (int64_t j = 0, count = 0; j < length2 && count < K; ++j) {
-        if (skip_points_outside_cube) {
-          bool is_within_radius = true;
-          for (int d = 0; is_within_radius && d < D; ++d) {
-            float abs_diff = fabs(p1_a[n][i][d] - p2_a[n][j][d]);
-            is_within_radius = (abs_diff <= radius);
-          }
-          if (!is_within_radius) {
-            continue;
-          }
-        }
        float dist2 = 0;
        for (int d = 0; d < D; ++d) {
          float diff = p1_a[n][i][d] - p2_a[n][j][d];
--- a/pytorch3d/csrc/blending/sigmoid_alpha_blend.h
+++ b/pytorch3d/csrc/blending/sigmoid_alpha_blend.h
@@ -98,11 +98,6 @@ at::Tensor SigmoidAlphaBlendBackward(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
-  CHECK_CPU(distances);
-  CHECK_CPU(pix_to_face);
-  CHECK_CPU(alphas);
-  CHECK_CPU(grad_alphas);
-
  return SigmoidAlphaBlendBackwardCpu(
      grad_alphas, alphas, distances, pix_to_face, sigma);
 }
--- a/pytorch3d/csrc/compositing/alpha_composite.cu
+++ b/pytorch3d/csrc/compositing/alpha_composite.cu
@@ -28,16 +28,17 @@ __global__ void alphaCompositeCudaForwardKernel(
    const at::PackedTensorAccessor64<float, 4, at::RestrictPtrTraits> alphas,
    const at::PackedTensorAccessor64<int64_t, 4, at::RestrictPtrTraits> points_idx) {
  // clang-format on
+  const int64_t batch_size = result.size(0);
  const int64_t C = features.size(0);
  const int64_t H = points_idx.size(2);
  const int64_t W = points_idx.size(3);

  // Get the batch and index
-  const auto batch = blockIdx.x;
+  const int batch = blockIdx.x;

  const int num_pixels = C * H * W;
-  const auto num_threads = gridDim.y * blockDim.x;
-  const auto tid = blockIdx.y * blockDim.x + threadIdx.x;
+  const int num_threads = gridDim.y * blockDim.x;
+  const int tid = blockIdx.y * blockDim.x + threadIdx.x;

  // Iterate over each feature in each pixel
  for (int pid = tid; pid < num_pixels; pid += num_threads) {
@@ -78,16 +79,17 @@ __global__ void alphaCompositeCudaBackwardKernel(
    const at::PackedTensorAccessor64<float, 4, at::RestrictPtrTraits> alphas,
    const at::PackedTensorAccessor64<int64_t, 4, at::RestrictPtrTraits> points_idx) {
  // clang-format on
+  const int64_t batch_size = points_idx.size(0);
  const int64_t C = features.size(0);
  const int64_t H = points_idx.size(2);
  const int64_t W = points_idx.size(3);

  // Get the batch and index
-  const auto batch = blockIdx.x;
+  const int batch = blockIdx.x;

  const int num_pixels = C * H * W;
-  const auto num_threads = gridDim.y * blockDim.x;
-  const auto tid = blockIdx.y * blockDim.x + threadIdx.x;
+  const int num_threads = gridDim.y * blockDim.x;
+  const int tid = blockIdx.y * blockDim.x + threadIdx.x;

  // Parallelize over each feature in each pixel in images of size H * W,
  // for each image in the batch of size batch_size
--- a/pytorch3d/csrc/compositing/alpha_composite.h
+++ b/pytorch3d/csrc/compositing/alpha_composite.h
@@ -74,9 +74,6 @@ torch::Tensor alphaCompositeForward(
    AT_ERROR("Not compiled with GPU support");
 #endif
  } else {
-    CHECK_CPU(features);
-    CHECK_CPU(alphas);
-    CHECK_CPU(points_idx);
    return alphaCompositeCpuForward(features, alphas, points_idx);
  }
 }
@@ -104,11 +101,6 @@ std::tuple<torch::Tensor, torch::Tensor> alphaCompositeBackward(
    AT_ERROR("Not compiled with GPU support");
 #endif
  } else {
-    CHECK_CPU(grad_outputs);
-    CHECK_CPU(features);
-    CHECK_CPU(alphas);
-    CHECK_CPU(points_idx);
-
    return alphaCompositeCpuBackward(
        grad_outputs, features, alphas, points_idx);
  }
--- a/pytorch3d/csrc/compositing/norm_weighted_sum.cu
+++ b/pytorch3d/csrc/compositing/norm_weighted_sum.cu
@@ -28,16 +28,17 @@ __global__ void weightedSumNormCudaForwardKernel(
    const at::PackedTensorAccessor64<float, 4, at::RestrictPtrTraits> alphas,
    const at::PackedTensorAccessor64<int64_t, 4, at::RestrictPtrTraits> points_idx) {
  // clang-format on
+  const int64_t batch_size = result.size(0);
  const int64_t C = features.size(0);
  const int64_t H = points_idx.size(2);
  const int64_t W = points_idx.size(3);

  // Get the batch and index
-  const auto batch = blockIdx.x;
+  const int batch = blockIdx.x;

  const int num_pixels = C * H * W;
-  const auto num_threads = gridDim.y * blockDim.x;
-  const auto tid = blockIdx.y * blockDim.x + threadIdx.x;
+  const int num_threads = gridDim.y * blockDim.x;
+  const int tid = blockIdx.y * blockDim.x + threadIdx.x;

  // Parallelize over each feature in each pixel in images of size H * W,
  // for each image in the batch of size batch_size
@@ -91,16 +92,17 @@ __global__ void weightedSumNormCudaBackwardKernel(
    const at::PackedTensorAccessor64<float, 4, at::RestrictPtrTraits> alphas,
    const at::PackedTensorAccessor64<int64_t, 4, at::RestrictPtrTraits> points_idx) {
  // clang-format on
+  const int64_t batch_size = points_idx.size(0);
  const int64_t C = features.size(0);
  const int64_t H = points_idx.size(2);
  const int64_t W = points_idx.size(3);

  // Get the batch and index
-  const auto batch = blockIdx.x;
+  const int batch = blockIdx.x;

  const int num_pixels = C * W * H;
-  const auto num_threads = gridDim.y * blockDim.x;
-  const auto tid = blockIdx.y * blockDim.x + threadIdx.x;
+  const int num_threads = gridDim.y * blockDim.x;
+  const int tid = blockIdx.y * blockDim.x + threadIdx.x;

  // Parallelize over each feature in each pixel in images of size H * W,
  // for each image in the batch of size batch_size
--- a/pytorch3d/csrc/compositing/norm_weighted_sum.h
+++ b/pytorch3d/csrc/compositing/norm_weighted_sum.h
@@ -73,10 +73,6 @@ torch::Tensor weightedSumNormForward(
    AT_ERROR("Not compiled with GPU support");
 #endif
  } else {
-    CHECK_CPU(features);
-    CHECK_CPU(alphas);
-    CHECK_CPU(points_idx);
-
    return weightedSumNormCpuForward(features, alphas, points_idx);
  }
 }
@@ -104,11 +100,6 @@ std::tuple<torch::Tensor, torch::Tensor> weightedSumNormBackward(
    AT_ERROR("Not compiled with GPU support");
 #endif
  } else {
-    CHECK_CPU(grad_outputs);
-    CHECK_CPU(features);
-    CHECK_CPU(alphas);
-    CHECK_CPU(points_idx);
-
    return weightedSumNormCpuBackward(
        grad_outputs, features, alphas, points_idx);
  }
--- a/pytorch3d/csrc/compositing/weighted_sum.cu
+++ b/pytorch3d/csrc/compositing/weighted_sum.cu
@@ -26,16 +26,17 @@ __global__ void weightedSumCudaForwardKernel(
    const at::PackedTensorAccessor64<float, 4, at::RestrictPtrTraits> alphas,
    const at::PackedTensorAccessor64<int64_t, 4, at::RestrictPtrTraits> points_idx) {
  // clang-format on
+  const int64_t batch_size = result.size(0);
  const int64_t C = features.size(0);
  const int64_t H = points_idx.size(2);
  const int64_t W = points_idx.size(3);

  // Get the batch and index
-  const auto batch = blockIdx.x;
+  const int batch = blockIdx.x;

  const int num_pixels = C * H * W;
-  const auto num_threads = gridDim.y * blockDim.x;
-  const auto tid = blockIdx.y * blockDim.x + threadIdx.x;
+  const int num_threads = gridDim.y * blockDim.x;
+  const int tid = blockIdx.y * blockDim.x + threadIdx.x;

  // Parallelize over each feature in each pixel in images of size H * W,
  // for each image in the batch of size batch_size
@@ -73,16 +74,17 @@ __global__ void weightedSumCudaBackwardKernel(
    const at::PackedTensorAccessor64<float, 4, at::RestrictPtrTraits> alphas,
    const at::PackedTensorAccessor64<int64_t, 4, at::RestrictPtrTraits> points_idx) {
  // clang-format on
+  const int64_t batch_size = points_idx.size(0);
  const int64_t C = features.size(0);
  const int64_t H = points_idx.size(2);
  const int64_t W = points_idx.size(3);

  // Get the batch and index
-  const auto batch = blockIdx.x;
+  const int batch = blockIdx.x;

  const int num_pixels = C * H * W;
-  const auto num_threads = gridDim.y * blockDim.x;
-  const auto tid = blockIdx.y * blockDim.x + threadIdx.x;
+  const int num_threads = gridDim.y * blockDim.x;
+  const int tid = blockIdx.y * blockDim.x + threadIdx.x;

  // Iterate over each pixel to compute the contribution to the
  // gradient for the features and weights
--- a/pytorch3d/csrc/compositing/weighted_sum.h
+++ b/pytorch3d/csrc/compositing/weighted_sum.h
@@ -72,9 +72,6 @@ torch::Tensor weightedSumForward(
    AT_ERROR("Not compiled with GPU support");
 #endif
  } else {
-    CHECK_CPU(features);
-    CHECK_CPU(alphas);
-    CHECK_CPU(points_idx);
    return weightedSumCpuForward(features, alphas, points_idx);
  }
 }
@@ -101,11 +98,6 @@ std::tuple<torch::Tensor, torch::Tensor> weightedSumBackward(
    AT_ERROR("Not compiled with GPU support");
 #endif
  } else {
-    CHECK_CPU(grad_outputs);
-    CHECK_CPU(features);
-    CHECK_CPU(alphas);
-    CHECK_CPU(points_idx);
-
    return weightedSumCpuBackward(grad_outputs, features, alphas, points_idx);
  }
 }
--- a/pytorch3d/csrc/ext.cpp
+++ b/pytorch3d/csrc/ext.cpp
@@ -8,6 +8,7 @@

 // clang-format off
 #include "./pulsar/global.h" // Include before <torch/extension.h>.
+#include <torch/extension.h>
 // clang-format on
 #include "./pulsar/pytorch/renderer.h"
 #include "./pulsar/pytorch/tensor_util.h"
@@ -105,16 +106,15 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
  py::class_<
      pulsar::pytorch::Renderer,
      std::shared_ptr<pulsar::pytorch::Renderer>>(m, "PulsarRenderer")
-      .def(
-          py::init<
-              const uint&,
-              const uint&,
-              const uint&,
-              const bool&,
-              const bool&,
-              const float&,
-              const uint&,
-              const uint&>())
+      .def(py::init<
+           const uint&,
+           const uint&,
+           const uint&,
+           const bool&,
+           const bool&,
+           const float&,
+           const uint&,
+           const uint&>())
      .def(
          "__eq__",
          [](const pulsar::pytorch::Renderer& a,
@@ -149,10 +149,10 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
          py::arg("gamma"),
          py::arg("max_depth"),
          py::arg("min_depth") /* = 0.f*/,
-          py::arg("bg_col") /* = std::nullopt not exposed properly in
-                               pytorch 1.1. */
+          py::arg(
+              "bg_col") /* = at::nullopt not exposed properly in pytorch 1.1. */
          ,
-          py::arg("opacity") /* = std::nullopt ... */,
+          py::arg("opacity") /* = at::nullopt ... */,
          py::arg("percent_allowed_difference") = 0.01f,
          py::arg("max_n_hits") = MAX_UINT,
          py::arg("mode") = 0)
--- a/pytorch3d/csrc/face_areas_normals/face_areas_normals.h
+++ b/pytorch3d/csrc/face_areas_normals/face_areas_normals.h
@@ -60,8 +60,6 @@ std::tuple<at::Tensor, at::Tensor> FaceAreasNormalsForward(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
-  CHECK_CPU(verts);
-  CHECK_CPU(faces);
  return FaceAreasNormalsForwardCpu(verts, faces);
 }

@@ -82,9 +80,5 @@ at::Tensor FaceAreasNormalsBackward(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
-  CHECK_CPU(grad_areas);
-  CHECK_CPU(grad_normals);
-  CHECK_CPU(verts);
-  CHECK_CPU(faces);
  return FaceAreasNormalsBackwardCpu(grad_areas, grad_normals, verts, faces);
 }
--- a/pytorch3d/csrc/gather_scatter/gather_scatter.cu
+++ b/pytorch3d/csrc/gather_scatter/gather_scatter.cu
@@ -20,14 +20,14 @@ __global__ void GatherScatterCudaKernel(
    const size_t V,
    const size_t D,
    const size_t E) {
-  const auto tid = threadIdx.x;
+  const int tid = threadIdx.x;

  // Reverse the vertex order if backward.
  const int v0_idx = backward ? 1 : 0;
  const int v1_idx = backward ? 0 : 1;

  // Edges are split evenly across the blocks.
-  for (auto e = blockIdx.x; e < E; e += gridDim.x) {
+  for (int e = blockIdx.x; e < E; e += gridDim.x) {
    // Get indices of vertices which form the edge.
    const int64_t v0 = edges[2 * e + v0_idx];
    const int64_t v1 = edges[2 * e + v1_idx];
@@ -35,7 +35,7 @@ __global__ void GatherScatterCudaKernel(
    // Split vertex features evenly across threads.
    // This implementation will be quite wasteful when D<128 since there will be
    // a lot of threads doing nothing.
-    for (auto d = tid; d < D; d += blockDim.x) {
+    for (int d = tid; d < D; d += blockDim.x) {
      const float val = input[v1 * D + d];
      float* address = output + v0 * D + d;
      atomicAdd(address, val);
--- a/pytorch3d/csrc/gather_scatter/gather_scatter.h
+++ b/pytorch3d/csrc/gather_scatter/gather_scatter.h
@@ -53,7 +53,5 @@ at::Tensor GatherScatter(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
-  CHECK_CPU(input);
-  CHECK_CPU(edges);
  return GatherScatterCpu(input, edges, directed, backward);
 }
--- a/pytorch3d/csrc/interp_face_attrs/interp_face_attrs.cu
+++ b/pytorch3d/csrc/interp_face_attrs/interp_face_attrs.cu
@@ -20,8 +20,8 @@ __global__ void InterpFaceAttrsForwardKernel(
    const size_t P,
    const size_t F,
    const size_t D) {
-  const auto tid = threadIdx.x + blockIdx.x * blockDim.x;
-  const auto num_threads = blockDim.x * gridDim.x;
+  const int tid = threadIdx.x + blockIdx.x * blockDim.x;
+  const int num_threads = blockDim.x * gridDim.x;
  for (int pd = tid; pd < P * D; pd += num_threads) {
    const int p = pd / D;
    const int d = pd % D;
@@ -93,8 +93,8 @@ __global__ void InterpFaceAttrsBackwardKernel(
    const size_t P,
    const size_t F,
    const size_t D) {
-  const auto tid = threadIdx.x + blockIdx.x * blockDim.x;
-  const auto num_threads = blockDim.x * gridDim.x;
+  const int tid = threadIdx.x + blockIdx.x * blockDim.x;
+  const int num_threads = blockDim.x * gridDim.x;
  for (int pd = tid; pd < P * D; pd += num_threads) {
    const int p = pd / D;
    const int d = pd % D;
--- a/pytorch3d/csrc/interp_face_attrs/interp_face_attrs.h
+++ b/pytorch3d/csrc/interp_face_attrs/interp_face_attrs.h
@@ -57,8 +57,6 @@ at::Tensor InterpFaceAttrsForward(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
-  CHECK_CPU(face_attrs);
-  CHECK_CPU(barycentric_coords);
  return InterpFaceAttrsForwardCpu(pix_to_face, barycentric_coords, face_attrs);
 }

@@ -108,9 +106,6 @@ std::tuple<at::Tensor, at::Tensor> InterpFaceAttrsBackward(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
-  CHECK_CPU(face_attrs);
-  CHECK_CPU(barycentric_coords);
-  CHECK_CPU(grad_pix_attrs);
  return InterpFaceAttrsBackwardCpu(
      pix_to_face, barycentric_coords, face_attrs, grad_pix_attrs);
 }
--- a/pytorch3d/csrc/iou_box3d/iou_box3d.h
+++ b/pytorch3d/csrc/iou_box3d/iou_box3d.h
@@ -44,7 +44,5 @@ inline std::tuple<at::Tensor, at::Tensor> IoUBox3D(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
-  CHECK_CPU(boxes1);
-  CHECK_CPU(boxes2);
  return IoUBox3DCpu(boxes1.contiguous(), boxes2.contiguous());
 }
--- a/pytorch3d/csrc/iou_box3d/iou_box3d_cpu.cpp
+++ b/pytorch3d/csrc/iou_box3d/iou_box3d_cpu.cpp
@@ -7,7 +7,10 @@
 */

 #include <torch/extension.h>
+#include <torch/torch.h>
 #include <list>
+#include <numeric>
+#include <queue>
 #include <tuple>
 #include "iou_box3d/iou_utils.h"

--- a/pytorch3d/csrc/iou_box3d/iou_utils.cuh
+++ b/pytorch3d/csrc/iou_box3d/iou_utils.cuh
@@ -461,8 +461,10 @@ __device__ inline std::tuple<float3, float3> ArgMaxVerts(
 __device__ inline bool IsCoplanarTriTri(
    const FaceVerts& tri1,
    const FaceVerts& tri2) {
+  const float3 tri1_ctr = FaceCenter({tri1.v0, tri1.v1, tri1.v2});
  const float3 tri1_n = FaceNormal({tri1.v0, tri1.v1, tri1.v2});

+  const float3 tri2_ctr = FaceCenter({tri2.v0, tri2.v1, tri2.v2});
  const float3 tri2_n = FaceNormal({tri2.v0, tri2.v1, tri2.v2});

  // Check if parallel
@@ -498,6 +500,7 @@ __device__ inline bool IsCoplanarTriPlane(
    const FaceVerts& tri,
    const FaceVerts& plane,
    const float3& normal) {
+  const float3 tri_ctr = FaceCenter({tri.v0, tri.v1, tri.v2});
  const float3 nt = FaceNormal({tri.v0, tri.v1, tri.v2});

  // check if parallel
@@ -725,7 +728,7 @@ __device__ inline int BoxIntersections(
      }
    }
    // Update the face_verts_out tris
-    num_tris = min(MAX_TRIS, offset);
+    num_tris = offset;
    for (int j = 0; j < num_tris; ++j) {
      face_verts_out[j] = tri_verts_updated[j];
    }
--- a/pytorch3d/csrc/knn/knn.h
+++ b/pytorch3d/csrc/knn/knn.h
@@ -74,8 +74,6 @@ std::tuple<at::Tensor, at::Tensor> KNearestNeighborIdx(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
-  CHECK_CPU(p1);
-  CHECK_CPU(p2);
  return KNearestNeighborIdxCpu(p1, p2, lengths1, lengths2, norm, K);
 }

@@ -142,8 +140,6 @@ std::tuple<at::Tensor, at::Tensor> KNearestNeighborBackward(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
-  CHECK_CPU(p1);
-  CHECK_CPU(p2);
  return KNearestNeighborBackwardCpu(
      p1, p2, lengths1, lengths2, idxs, norm, grad_dists);
 }
--- a/pytorch3d/csrc/marching_cubes/marching_cubes.h
+++ b/pytorch3d/csrc/marching_cubes/marching_cubes.h
@@ -58,6 +58,5 @@ inline std::tuple<at::Tensor, at::Tensor, at::Tensor> MarchingCubes(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
-  CHECK_CPU(vol);
  return MarchingCubesCpu(vol.contiguous(), isolevel);
 }
--- a/pytorch3d/csrc/packed_to_padded_tensor/packed_to_padded_tensor.h
+++ b/pytorch3d/csrc/packed_to_padded_tensor/packed_to_padded_tensor.h
@@ -88,8 +88,6 @@ at::Tensor PackedToPadded(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
-  CHECK_CPU(inputs_packed);
-  CHECK_CPU(first_idxs);
  return PackedToPaddedCpu(inputs_packed, first_idxs, max_size);
 }

@@ -107,7 +105,5 @@ at::Tensor PaddedToPacked(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
-  CHECK_CPU(inputs_padded);
-  CHECK_CPU(first_idxs);
  return PaddedToPackedCpu(inputs_padded, first_idxs, num_inputs);
 }
--- a/pytorch3d/csrc/point_mesh/point_mesh_cpu.cpp
+++ b/pytorch3d/csrc/point_mesh/point_mesh_cpu.cpp
@@ -174,8 +174,8 @@ std::tuple<at::Tensor, at::Tensor> HullHullDistanceForwardCpu(
  at::Tensor idxs = at::zeros({A_N,}, as_first_idx.options());
  // clang-format on

-  auto as_a = as.accessor<float, H1 == 1 ? 2 : 3>();
-  auto bs_a = bs.accessor<float, H2 == 1 ? 2 : 3>();
+  auto as_a = as.accessor < float, H1 == 1 ? 2 : 3 > ();
+  auto bs_a = bs.accessor < float, H2 == 1 ? 2 : 3 > ();
  auto as_first_idx_a = as_first_idx.accessor<int64_t, 1>();
  auto bs_first_idx_a = bs_first_idx.accessor<int64_t, 1>();
  auto dists_a = dists.accessor<float, 1>();
@@ -230,10 +230,10 @@ std::tuple<at::Tensor, at::Tensor> HullHullDistanceBackwardCpu(
  at::Tensor grad_as = at::zeros_like(as);
  at::Tensor grad_bs = at::zeros_like(bs);

-  auto as_a = as.accessor<float, H1 == 1 ? 2 : 3>();
-  auto bs_a = bs.accessor<float, H2 == 1 ? 2 : 3>();
-  auto grad_as_a = grad_as.accessor<float, H1 == 1 ? 2 : 3>();
-  auto grad_bs_a = grad_bs.accessor<float, H2 == 1 ? 2 : 3>();
+  auto as_a = as.accessor < float, H1 == 1 ? 2 : 3 > ();
+  auto bs_a = bs.accessor < float, H2 == 1 ? 2 : 3 > ();
+  auto grad_as_a = grad_as.accessor < float, H1 == 1 ? 2 : 3 > ();
+  auto grad_bs_a = grad_bs.accessor < float, H2 == 1 ? 2 : 3 > ();
  auto idx_bs_a = idx_bs.accessor<int64_t, 1>();
  auto grad_dists_a = grad_dists.accessor<float, 1>();

--- a/pytorch3d/csrc/point_mesh/point_mesh_cuda.cu
+++ b/pytorch3d/csrc/point_mesh/point_mesh_cuda.cu
@@ -110,7 +110,7 @@ __global__ void DistanceForwardKernel(
    __syncthreads();

    // Perform reduction in shared memory.
-    for (auto s = blockDim.x / 2; s > 32; s >>= 1) {
+    for (int s = blockDim.x / 2; s > 32; s >>= 1) {
      if (tid < s) {
        if (min_dists[tid] > min_dists[tid + s]) {
          min_dists[tid] = min_dists[tid + s];
@@ -502,8 +502,8 @@ __global__ void PointFaceArrayForwardKernel(
  const float3* tris_f3 = (float3*)tris;

  // Parallelize over P * S computations
-  const auto num_threads = gridDim.x * blockDim.x;
-  const auto tid = blockIdx.x * blockDim.x + threadIdx.x;
+  const int num_threads = gridDim.x * blockDim.x;
+  const int tid = blockIdx.x * blockDim.x + threadIdx.x;

  for (int t_i = tid; t_i < P * T; t_i += num_threads) {
    const int t = t_i / P; // segment index.
@@ -576,8 +576,8 @@ __global__ void PointFaceArrayBackwardKernel(
  const float3* tris_f3 = (float3*)tris;

  // Parallelize over P * S computations
-  const auto num_threads = gridDim.x * blockDim.x;
-  const auto tid = blockIdx.x * blockDim.x + threadIdx.x;
+  const int num_threads = gridDim.x * blockDim.x;
+  const int tid = blockIdx.x * blockDim.x + threadIdx.x;

  for (int t_i = tid; t_i < P * T; t_i += num_threads) {
    const int t = t_i / P; // triangle index.
@@ -683,8 +683,8 @@ __global__ void PointEdgeArrayForwardKernel(
  float3* segms_f3 = (float3*)segms;

  // Parallelize over P * S computations
-  const auto num_threads = gridDim.x * blockDim.x;
-  const auto tid = blockIdx.x * blockDim.x + threadIdx.x;
+  const int num_threads = gridDim.x * blockDim.x;
+  const int tid = blockIdx.x * blockDim.x + threadIdx.x;

  for (int t_i = tid; t_i < P * S; t_i += num_threads) {
    const int s = t_i / P; // segment index.
@@ -752,8 +752,8 @@ __global__ void PointEdgeArrayBackwardKernel(
  float3* segms_f3 = (float3*)segms;

  // Parallelize over P * S computations
-  const auto num_threads = gridDim.x * blockDim.x;
-  const auto tid = blockIdx.x * blockDim.x + threadIdx.x;
+  const int num_threads = gridDim.x * blockDim.x;
+  const int tid = blockIdx.x * blockDim.x + threadIdx.x;

  for (int t_i = tid; t_i < P * S; t_i += num_threads) {
    const int s = t_i / P; // segment index.
--- a/pytorch3d/csrc/point_mesh/point_mesh_cuda.h
+++ b/pytorch3d/csrc/point_mesh/point_mesh_cuda.h
@@ -88,10 +88,6 @@ std::tuple<torch::Tensor, torch::Tensor> PointFaceDistanceForward(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
-  CHECK_CPU(points);
-  CHECK_CPU(points_first_idx);
-  CHECK_CPU(tris);
-  CHECK_CPU(tris_first_idx);
  return PointFaceDistanceForwardCpu(
      points, points_first_idx, tris, tris_first_idx, min_triangle_area);
 }
@@ -147,10 +143,6 @@ std::tuple<torch::Tensor, torch::Tensor> PointFaceDistanceBackward(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
-  CHECK_CPU(points);
-  CHECK_CPU(tris);
-  CHECK_CPU(idx_points);
-  CHECK_CPU(grad_dists);
  return PointFaceDistanceBackwardCpu(
      points, tris, idx_points, grad_dists, min_triangle_area);
 }
@@ -229,10 +221,6 @@ std::tuple<torch::Tensor, torch::Tensor> FacePointDistanceForward(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
-  CHECK_CPU(points);
-  CHECK_CPU(points_first_idx);
-  CHECK_CPU(tris);
-  CHECK_CPU(tris_first_idx);
  return FacePointDistanceForwardCpu(
      points, points_first_idx, tris, tris_first_idx, min_triangle_area);
 }
@@ -289,10 +277,6 @@ std::tuple<torch::Tensor, torch::Tensor> FacePointDistanceBackward(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
-  CHECK_CPU(points);
-  CHECK_CPU(tris);
-  CHECK_CPU(idx_tris);
-  CHECK_CPU(grad_dists);
  return FacePointDistanceBackwardCpu(
      points, tris, idx_tris, grad_dists, min_triangle_area);
 }
@@ -362,10 +346,6 @@ std::tuple<torch::Tensor, torch::Tensor> PointEdgeDistanceForward(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
-  CHECK_CPU(points);
-  CHECK_CPU(points_first_idx);
-  CHECK_CPU(segms);
-  CHECK_CPU(segms_first_idx);
  return PointEdgeDistanceForwardCpu(
      points, points_first_idx, segms, segms_first_idx, max_points);
 }
@@ -416,10 +396,6 @@ std::tuple<torch::Tensor, torch::Tensor> PointEdgeDistanceBackward(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
-  CHECK_CPU(points);
-  CHECK_CPU(segms);
-  CHECK_CPU(idx_points);
-  CHECK_CPU(grad_dists);
  return PointEdgeDistanceBackwardCpu(points, segms, idx_points, grad_dists);
 }

@@ -488,10 +464,6 @@ std::tuple<torch::Tensor, torch::Tensor> EdgePointDistanceForward(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
-  CHECK_CPU(points);
-  CHECK_CPU(points_first_idx);
-  CHECK_CPU(segms);
-  CHECK_CPU(segms_first_idx);
  return EdgePointDistanceForwardCpu(
      points, points_first_idx, segms, segms_first_idx, max_segms);
 }
@@ -542,10 +514,6 @@ std::tuple<torch::Tensor, torch::Tensor> EdgePointDistanceBackward(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
-  CHECK_CPU(points);
-  CHECK_CPU(segms);
-  CHECK_CPU(idx_segms);
-  CHECK_CPU(grad_dists);
  return EdgePointDistanceBackwardCpu(points, segms, idx_segms, grad_dists);
 }

@@ -599,8 +567,6 @@ torch::Tensor PointFaceArrayDistanceForward(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
-  CHECK_CPU(points);
-  CHECK_CPU(tris);
  return PointFaceArrayDistanceForwardCpu(points, tris, min_triangle_area);
 }

@@ -647,9 +613,6 @@ std::tuple<torch::Tensor, torch::Tensor> PointFaceArrayDistanceBackward(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
-  CHECK_CPU(points);
-  CHECK_CPU(tris);
-  CHECK_CPU(grad_dists);
  return PointFaceArrayDistanceBackwardCpu(
      points, tris, grad_dists, min_triangle_area);
 }
@@ -698,8 +661,6 @@ torch::Tensor PointEdgeArrayDistanceForward(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
-  CHECK_CPU(points);
-  CHECK_CPU(segms);
  return PointEdgeArrayDistanceForwardCpu(points, segms);
 }

@@ -742,8 +703,5 @@ std::tuple<torch::Tensor, torch::Tensor> PointEdgeArrayDistanceBackward(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
-  CHECK_CPU(points);
-  CHECK_CPU(segms);
-  CHECK_CPU(grad_dists);
  return PointEdgeArrayDistanceBackwardCpu(points, segms, grad_dists);
 }
--- a/pytorch3d/csrc/points_to_volumes/points_to_volumes.h
+++ b/pytorch3d/csrc/points_to_volumes/points_to_volumes.h
@@ -104,12 +104,6 @@ inline void PointsToVolumesForward(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
-  CHECK_CPU(points_3d);
-  CHECK_CPU(points_features);
-  CHECK_CPU(volume_densities);
-  CHECK_CPU(volume_features);
-  CHECK_CPU(grid_sizes);
-  CHECK_CPU(mask);
  PointsToVolumesForwardCpu(
      points_3d,
      points_features,
@@ -189,14 +183,6 @@ inline void PointsToVolumesBackward(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
-  CHECK_CPU(points_3d);
-  CHECK_CPU(points_features);
-  CHECK_CPU(grid_sizes);
-  CHECK_CPU(mask);
-  CHECK_CPU(grad_volume_densities);
-  CHECK_CPU(grad_volume_features);
-  CHECK_CPU(grad_points_3d);
-  CHECK_CPU(grad_points_features);
  PointsToVolumesBackwardCpu(
      points_3d,
      points_features,
--- a/pytorch3d/csrc/points_to_volumes/points_to_volumes_cpu.cpp
+++ b/pytorch3d/csrc/points_to_volumes/points_to_volumes_cpu.cpp
@@ -8,7 +8,9 @@

 #include <torch/csrc/autograd/VariableTypeUtils.h>
 #include <torch/extension.h>
+#include <algorithm>
 #include <cmath>
+#include <thread>
 #include <vector>

 // In the x direction, the location {0, ..., grid_size_x - 1} correspond to
--- a/pytorch3d/csrc/pulsar/global.h
+++ b/pytorch3d/csrc/pulsar/global.h
@@ -15,8 +15,8 @@
 #endif

 #if defined(_WIN64) || defined(_WIN32)
-using uint = unsigned int;
-using ushort = unsigned short;
+#define uint unsigned int
+#define ushort unsigned short
 #endif

 #include "./logging.h" // <- include before torch/extension.h
--- a/pytorch3d/csrc/pulsar/gpu/commands.h
+++ b/pytorch3d/csrc/pulsar/gpu/commands.h
@@ -417,7 +417,7 @@ __device__ static float atomicMin(float* address, float val) {
      (OUT_PTR),              \
      (NUM_SELECTED_PTR),     \
      (NUM_ITEMS),            \
-      (STREAM));
+      stream = (STREAM));

 #define COPY_HOST_DEV(PTR_D, PTR_H, TYPE, SIZE) \
  HANDLECUDA(cudaMemcpy(                        \
--- a/pytorch3d/csrc/pulsar/host/commands.h
+++ b/pytorch3d/csrc/pulsar/host/commands.h
@@ -357,11 +357,11 @@ void MAX_WS(
 //
 //
 #define END_PARALLEL() \
-  end_parallel:;       \
+  end_parallel :;      \
  }
 #define END_PARALLEL_NORET() }
 #define END_PARALLEL_2D() \
-  end_parallel:;          \
+  end_parallel :;         \
  }                       \
  }
 #define END_PARALLEL_2D_NORET() \
--- a/pytorch3d/csrc/pulsar/include/camera.h
+++ b/pytorch3d/csrc/pulsar/include/camera.h
@@ -70,6 +70,11 @@ struct CamGradInfo {
  float3 pixel_dir_y;
 };

+// TODO: remove once https://github.com/NVlabs/cub/issues/172 is resolved.
+struct IntWrapper {
+  int val;
+};
+
 } // namespace pulsar

 #endif
--- a/pytorch3d/csrc/pulsar/include/math.h
+++ b/pytorch3d/csrc/pulsar/include/math.h
@@ -149,6 +149,11 @@ IHD CamGradInfo operator*(const CamGradInfo& a, const float& b) {
  return res;
 }

+IHD IntWrapper operator+(const IntWrapper& a, const IntWrapper& b) {
+  IntWrapper res;
+  res.val = a.val + b.val;
+  return res;
+}
 } // namespace pulsar

 #endif
--- a/pytorch3d/csrc/pulsar/include/renderer.backward.device.h
+++ b/pytorch3d/csrc/pulsar/include/renderer.backward.device.h
@@ -155,8 +155,8 @@ void backward(
        stream);
    CHECKLAUNCH();
    SUM_WS(
-        self->ids_sorted_d,
-        self->n_grad_contributions_d,
+        (IntWrapper*)(self->ids_sorted_d),
+        (IntWrapper*)(self->n_grad_contributions_d),
        static_cast<int>(num_balls),
        self->workspace_d,
        self->workspace_size,
--- a/pytorch3d/csrc/pulsar/include/renderer.construct.device.h
+++ b/pytorch3d/csrc/pulsar/include/renderer.construct.device.h
@@ -52,7 +52,7 @@ HOST void construct(
  self->cam.film_width = width;
  self->cam.film_height = height;
  self->max_num_balls = max_num_balls;
-  MALLOC(self->result_d, float, width * height * n_channels);
+  MALLOC(self->result_d, float, width* height* n_channels);
  self->cam.orthogonal_projection = orthogonal_projection;
  self->cam.right_handed = right_handed_system;
  self->cam.background_normalization_depth = background_normalization_depth;
@@ -93,7 +93,7 @@ HOST void construct(
  MALLOC(self->di_sorted_d, DrawInfo, max_num_balls);
  MALLOC(self->region_flags_d, char, max_num_balls);
  MALLOC(self->num_selected_d, size_t, 1);
-  MALLOC(self->forw_info_d, float, width * height * (3 + 2 * n_track));
+  MALLOC(self->forw_info_d, float, width* height * (3 + 2 * n_track));
  MALLOC(self->min_max_pixels_d, IntersectInfo, 1);
  MALLOC(self->grad_pos_d, float3, max_num_balls);
  MALLOC(self->grad_col_d, float, max_num_balls* n_channels);
--- a/pytorch3d/csrc/pulsar/include/renderer.h
+++ b/pytorch3d/csrc/pulsar/include/renderer.h
@@ -255,7 +255,7 @@ GLOBAL void calc_signature(
 * for every iteration through the loading loop every thread could add a
 * 'hit' to the buffer.
 */
-#define RENDER_BUFFER_SIZE RENDER_BLOCK_SIZE * RENDER_BLOCK_SIZE * 2
+#define RENDER_BUFFER_SIZE RENDER_BLOCK_SIZE* RENDER_BLOCK_SIZE * 2
 /**
 * The threshold after which the spheres that are in the render buffer
 * are rendered and the buffer is flushed.
--- a/pytorch3d/csrc/pulsar/pytorch/tensor_util.cpp
+++ b/pytorch3d/csrc/pulsar/pytorch/tensor_util.cpp
@@ -8,7 +8,6 @@

 #ifdef WITH_CUDA
 #include <ATen/cuda/CUDAContext.h>
-#include <c10/cuda/CUDAException.h>
 #include <cuda_runtime_api.h>
 #endif
 #include <torch/extension.h>
@@ -34,13 +33,13 @@ torch::Tensor sphere_ids_from_result_info_nograd(
          .contiguous();
  if (forw_info.device().type() == c10::DeviceType::CUDA) {
 #ifdef WITH_CUDA
-    C10_CUDA_CHECK(cudaMemcpyAsync(
+    cudaMemcpyAsync(
        result.data_ptr(),
        tmp.data_ptr(),
        sizeof(uint32_t) * tmp.size(0) * tmp.size(1) * tmp.size(2) *
            tmp.size(3),
        cudaMemcpyDeviceToDevice,
-        at::cuda::getCurrentCUDAStream()));
+        at::cuda::getCurrentCUDAStream());
 #else
    throw std::runtime_error(
        "Copy on CUDA device initiated but built "
--- a/pytorch3d/csrc/pulsar/pytorch/util.cpp
+++ b/pytorch3d/csrc/pulsar/pytorch/util.cpp
@@ -7,7 +7,6 @@
 */

 #ifdef WITH_CUDA
-#include <c10/cuda/CUDAException.h>
 #include <cuda_runtime_api.h>

 namespace pulsar {
@@ -18,8 +17,7 @@ void cudaDevToDev(
    const void* src,
    const int& size,
    const cudaStream_t& stream) {
-  C10_CUDA_CHECK(
-      cudaMemcpyAsync(trg, src, size, cudaMemcpyDeviceToDevice, stream));
+  cudaMemcpyAsync(trg, src, size, cudaMemcpyDeviceToDevice, stream);
 }

 void cudaDevToHost(
@@ -27,8 +25,7 @@ void cudaDevToHost(
    const void* src,
    const int& size,
    const cudaStream_t& stream) {
-  C10_CUDA_CHECK(
-      cudaMemcpyAsync(trg, src, size, cudaMemcpyDeviceToHost, stream));
+  cudaMemcpyAsync(trg, src, size, cudaMemcpyDeviceToHost, stream);
 }

 } // namespace pytorch
--- a/pytorch3d/csrc/pulsar/warnings.cpp
+++ b/pytorch3d/csrc/pulsar/warnings.cpp
@@ -6,6 +6,9 @@
 * LICENSE file in the root directory of this source tree.
 */

+#include "./global.h"
+#include "./logging.h"
+
 /**
 * A compilation unit to provide warnings about the code and avoid
 * repeated messages.
--- a/pytorch3d/csrc/rasterize_coarse/bitmask.cuh
+++ b/pytorch3d/csrc/rasterize_coarse/bitmask.cuh
@@ -25,7 +25,7 @@ class BitMask {

  // Use all threads in the current block to clear all bits of this BitMask
  __device__ void block_clear() {
-    for (auto i = threadIdx.x; i < H * W * D; i += blockDim.x) {
+    for (int i = threadIdx.x; i < H * W * D; i += blockDim.x) {
      data[i] = 0;
    }
    __syncthreads();
--- a/pytorch3d/csrc/rasterize_coarse/rasterize_coarse.cu
+++ b/pytorch3d/csrc/rasterize_coarse/rasterize_coarse.cu
@@ -23,8 +23,8 @@ __global__ void TriangleBoundingBoxKernel(
    const float blur_radius,
    float* bboxes, // (4, F)
    bool* skip_face) { // (F,)
-  const auto tid = blockIdx.x * blockDim.x + threadIdx.x;
-  const auto num_threads = blockDim.x * gridDim.x;
+  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
+  const int num_threads = blockDim.x * gridDim.x;
  const float sqrt_radius = sqrt(blur_radius);
  for (int f = tid; f < F; f += num_threads) {
    const float v0x = face_verts[f * 9 + 0 * 3 + 0];
@@ -56,8 +56,8 @@ __global__ void PointBoundingBoxKernel(
    const int P,
    float* bboxes, // (4, P)
    bool* skip_points) {
-  const auto tid = blockIdx.x * blockDim.x + threadIdx.x;
-  const auto num_threads = blockDim.x * gridDim.x;
+  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
+  const int num_threads = blockDim.x * gridDim.x;
  for (int p = tid; p < P; p += num_threads) {
    const float x = points[p * 3 + 0];
    const float y = points[p * 3 + 1];
@@ -113,7 +113,7 @@ __global__ void RasterizeCoarseCudaKernel(
  const int chunks_per_batch = 1 + (E - 1) / chunk_size;
  const int num_chunks = N * chunks_per_batch;

-  for (auto chunk = blockIdx.x; chunk < num_chunks; chunk += gridDim.x) {
+  for (int chunk = blockIdx.x; chunk < num_chunks; chunk += gridDim.x) {
    const int batch_idx = chunk / chunks_per_batch; // batch index
    const int chunk_idx = chunk % chunks_per_batch;
    const int elem_chunk_start_idx = chunk_idx * chunk_size;
@@ -123,7 +123,7 @@ __global__ void RasterizeCoarseCudaKernel(
    const int64_t elem_stop_idx = elem_start_idx + elems_per_batch[batch_idx];

    // Have each thread handle a different face within the chunk
-    for (auto e = threadIdx.x; e < chunk_size; e += blockDim.x) {
+    for (int e = threadIdx.x; e < chunk_size; e += blockDim.x) {
      const int e_idx = elem_chunk_start_idx + e;

      // Check that we are still within the same element of the batch
@@ -170,7 +170,7 @@ __global__ void RasterizeCoarseCudaKernel(
    // Now we have processed every elem in the current chunk. We need to
    // count the number of elems in each bin so we can write the indices
    // out to global memory. We have each thread handle a different bin.
-    for (auto byx = threadIdx.x; byx < num_bins_y * num_bins_x;
+    for (int byx = threadIdx.x; byx < num_bins_y * num_bins_x;
         byx += blockDim.x) {
      const int by = byx / num_bins_x;
      const int bx = byx % num_bins_x;
--- a/pytorch3d/csrc/rasterize_meshes/rasterize_meshes.cu
+++ b/pytorch3d/csrc/rasterize_meshes/rasterize_meshes.cu
@@ -260,8 +260,8 @@ __global__ void RasterizeMeshesNaiveCudaKernel(
    float* pix_dists,
    float* bary) {
  // Simple version: One thread per output pixel
-  auto num_threads = gridDim.x * blockDim.x;
-  auto tid = blockDim.x * blockIdx.x + threadIdx.x;
+  int num_threads = gridDim.x * blockDim.x;
+  int tid = blockDim.x * blockIdx.x + threadIdx.x;

  for (int i = tid; i < N * H * W; i += num_threads) {
    // Convert linear index to 3D index
@@ -446,8 +446,8 @@ __global__ void RasterizeMeshesBackwardCudaKernel(

  // Parallelize over each pixel in images of
  // size H * W, for each image in the batch of size N.
-  const auto num_threads = gridDim.x * blockDim.x;
-  const auto tid = blockIdx.x * blockDim.x + threadIdx.x;
+  const int num_threads = gridDim.x * blockDim.x;
+  const int tid = blockIdx.x * blockDim.x + threadIdx.x;

  for (int t_i = tid; t_i < N * H * W; t_i += num_threads) {
    // Convert linear index to 3D index
@@ -650,8 +650,8 @@ __global__ void RasterizeMeshesFineCudaKernel(
 ) {
  // This can be more than H * W if H or W are not divisible by bin_size.
  int num_pixels = N * BH * BW * bin_size * bin_size;
-  auto num_threads = gridDim.x * blockDim.x;
-  auto tid = blockIdx.x * blockDim.x + threadIdx.x;
+  int num_threads = gridDim.x * blockDim.x;
+  int tid = blockIdx.x * blockDim.x + threadIdx.x;

  for (int pid = tid; pid < num_pixels; pid += num_threads) {
    // Convert linear index into bin and pixel indices. We make the within
--- a/pytorch3d/csrc/rasterize_meshes/rasterize_meshes.h
+++ b/pytorch3d/csrc/rasterize_meshes/rasterize_meshes.h
@@ -138,9 +138,6 @@ RasterizeMeshesNaive(
    AT_ERROR("Not compiled with GPU support");
 #endif
  } else {
-    CHECK_CPU(face_verts);
-    CHECK_CPU(mesh_to_face_first_idx);
-    CHECK_CPU(num_faces_per_mesh);
    return RasterizeMeshesNaiveCpu(
        face_verts,
        mesh_to_face_first_idx,
@@ -235,11 +232,6 @@ torch::Tensor RasterizeMeshesBackward(
    AT_ERROR("Not compiled with GPU support");
 #endif
  } else {
-    CHECK_CPU(face_verts);
-    CHECK_CPU(pix_to_face);
-    CHECK_CPU(grad_zbuf);
-    CHECK_CPU(grad_bary);
-    CHECK_CPU(grad_dists);
    return RasterizeMeshesBackwardCpu(
        face_verts,
        pix_to_face,
@@ -314,9 +306,6 @@ torch::Tensor RasterizeMeshesCoarse(
    AT_ERROR("Not compiled with GPU support");
 #endif
  } else {
-    CHECK_CPU(face_verts);
-    CHECK_CPU(mesh_to_face_first_idx);
-    CHECK_CPU(num_faces_per_mesh);
    return RasterizeMeshesCoarseCpu(
        face_verts,
        mesh_to_face_first_idx,
@@ -434,8 +423,6 @@ RasterizeMeshesFine(
    AT_ERROR("Not compiled with GPU support");
 #endif
  } else {
-    CHECK_CPU(face_verts);
-    CHECK_CPU(bin_faces);
    AT_ERROR("NOT IMPLEMENTED");
  }
 }
--- a/pytorch3d/csrc/rasterize_meshes/rasterize_meshes_cpu.cpp
+++ b/pytorch3d/csrc/rasterize_meshes/rasterize_meshes_cpu.cpp
@@ -9,6 +9,7 @@
 #include <torch/extension.h>
 #include <algorithm>
 #include <list>
+#include <queue>
 #include <thread>
 #include <tuple>
 #include "ATen/core/TensorAccessor.h"
--- a/pytorch3d/csrc/rasterize_points/rasterize_points.cu
+++ b/pytorch3d/csrc/rasterize_points/rasterize_points.cu
@@ -97,8 +97,8 @@ __global__ void RasterizePointsNaiveCudaKernel(
    float* zbuf, // (N, H, W, K)
    float* pix_dists) { // (N, H, W, K)
  // Simple version: One thread per output pixel
-  const auto num_threads = gridDim.x * blockDim.x;
-  const auto tid = blockDim.x * blockIdx.x + threadIdx.x;
+  const int num_threads = gridDim.x * blockDim.x;
+  const int tid = blockDim.x * blockIdx.x + threadIdx.x;
  for (int i = tid; i < N * H * W; i += num_threads) {
    // Convert linear index to 3D index
    const int n = i / (H * W); // Batch index
@@ -237,8 +237,8 @@ __global__ void RasterizePointsFineCudaKernel(
    float* pix_dists) { // (N, H, W, K)
  // This can be more than H * W if H or W are not divisible by bin_size.
  const int num_pixels = N * BH * BW * bin_size * bin_size;
-  const auto num_threads = gridDim.x * blockDim.x;
-  const auto tid = blockIdx.x * blockDim.x + threadIdx.x;
+  const int num_threads = gridDim.x * blockDim.x;
+  const int tid = blockIdx.x * blockDim.x + threadIdx.x;

  for (int pid = tid; pid < num_pixels; pid += num_threads) {
    // Convert linear index into bin and pixel indices. We make the within
@@ -376,8 +376,8 @@ __global__ void RasterizePointsBackwardCudaKernel(
    float* grad_points) { // (P, 3)
  // Parallelized over each of K points per pixel, for each pixel in images of
  // size H * W, for each image in the batch of size N.
-  auto num_threads = gridDim.x * blockDim.x;
-  auto tid = blockIdx.x * blockDim.x + threadIdx.x;
+  int num_threads = gridDim.x * blockDim.x;
+  int tid = blockIdx.x * blockDim.x + threadIdx.x;
  for (int i = tid; i < N * H * W * K; i += num_threads) {
    // const int n = i / (H * W * K); // batch index (not needed).
    const int yxk = i % (H * W * K);
--- a/pytorch3d/csrc/rasterize_points/rasterize_points.h
+++ b/pytorch3d/csrc/rasterize_points/rasterize_points.h
@@ -91,10 +91,6 @@ std::tuple<torch::Tensor, torch::Tensor, torch::Tensor> RasterizePointsNaive(
    AT_ERROR("Not compiled with GPU support");
 #endif
  } else {
-    CHECK_CPU(points);
-    CHECK_CPU(cloud_to_packed_first_idx);
-    CHECK_CPU(num_points_per_cloud);
-    CHECK_CPU(radius);
    return RasterizePointsNaiveCpu(
        points,
        cloud_to_packed_first_idx,
@@ -170,10 +166,6 @@ torch::Tensor RasterizePointsCoarse(
    AT_ERROR("Not compiled with GPU support");
 #endif
  } else {
-    CHECK_CPU(points);
-    CHECK_CPU(cloud_to_packed_first_idx);
-    CHECK_CPU(num_points_per_cloud);
-    CHECK_CPU(radius);
    return RasterizePointsCoarseCpu(
        points,
        cloud_to_packed_first_idx,
@@ -240,8 +232,6 @@ std::tuple<torch::Tensor, torch::Tensor, torch::Tensor> RasterizePointsFine(
    AT_ERROR("Not compiled with GPU support");
 #endif
  } else {
-    CHECK_CPU(points);
-    CHECK_CPU(bin_points);
    AT_ERROR("NOT IMPLEMENTED");
  }
 }
@@ -294,10 +284,6 @@ torch::Tensor RasterizePointsBackward(
    AT_ERROR("Not compiled with GPU support");
 #endif
  } else {
-    CHECK_CPU(points);
-    CHECK_CPU(idxs);
-    CHECK_CPU(grad_zbuf);
-    CHECK_CPU(grad_dists);
    return RasterizePointsBackwardCpu(points, idxs, grad_zbuf, grad_dists);
  }
 }
--- a/pytorch3d/csrc/sample_farthest_points/sample_farthest_points.cu
+++ b/pytorch3d/csrc/sample_farthest_points/sample_farthest_points.cu
@@ -35,6 +35,8 @@ __global__ void FarthestPointSamplingKernel(
  __shared__ int64_t selected_store;

  // Get constants
+  const int64_t N = points.size(0);
+  const int64_t P = points.size(1);
  const int64_t D = points.size(2);

  // Get batch index and thread index
@@ -107,8 +109,7 @@ at::Tensor FarthestPointSamplingCuda(
    const at::Tensor& points, // (N, P, 3)
    const at::Tensor& lengths, // (N,)
    const at::Tensor& K, // (N,)
-    const at::Tensor& start_idxs,
-    const int64_t max_K_known = -1) {
+    const at::Tensor& start_idxs) {
  // Check inputs are on the same device
  at::TensorArg p_t{points, "points", 1}, lengths_t{lengths, "lengths", 2},
      k_t{K, "K", 3}, start_idxs_t{start_idxs, "start_idxs", 4};
@@ -130,12 +131,7 @@ at::Tensor FarthestPointSamplingCuda(

  const int64_t N = points.size(0);
  const int64_t P = points.size(1);
-  int64_t max_K;
-  if (max_K_known > 0) {
-    max_K = max_K_known;
-  } else {
-    max_K = at::max(K).item<int64_t>();
-  }
+  const int64_t max_K = at::max(K).item<int64_t>();

  // Initialize the output tensor with the sampled indices
  auto idxs = at::full({N, max_K}, -1, lengths.options());
--- a/pytorch3d/csrc/sample_farthest_points/sample_farthest_points.h
+++ b/pytorch3d/csrc/sample_farthest_points/sample_farthest_points.h
@@ -43,8 +43,7 @@ at::Tensor FarthestPointSamplingCuda(
    const at::Tensor& points,
    const at::Tensor& lengths,
    const at::Tensor& K,
-    const at::Tensor& start_idxs,
-    const int64_t max_K_known = -1);
+    const at::Tensor& start_idxs);

 at::Tensor FarthestPointSamplingCpu(
    const at::Tensor& points,
@@ -57,23 +56,17 @@ at::Tensor FarthestPointSampling(
    const at::Tensor& points,
    const at::Tensor& lengths,
    const at::Tensor& K,
-    const at::Tensor& start_idxs,
-    const int64_t max_K_known = -1) {
+    const at::Tensor& start_idxs) {
  if (points.is_cuda() || lengths.is_cuda() || K.is_cuda()) {
 #ifdef WITH_CUDA
    CHECK_CUDA(points);
    CHECK_CUDA(lengths);
    CHECK_CUDA(K);
    CHECK_CUDA(start_idxs);
-    return FarthestPointSamplingCuda(
-        points, lengths, K, start_idxs, max_K_known);
+    return FarthestPointSamplingCuda(points, lengths, K, start_idxs);
 #else
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
-  CHECK_CPU(points);
-  CHECK_CPU(lengths);
-  CHECK_CPU(K);
-  CHECK_CPU(start_idxs);
  return FarthestPointSamplingCpu(points, lengths, K, start_idxs);
 }
--- a/pytorch3d/csrc/sample_pdf/sample_pdf.h
+++ b/pytorch3d/csrc/sample_pdf/sample_pdf.h
@@ -71,8 +71,6 @@ inline void SamplePdf(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
-  CHECK_CPU(weights);
-  CHECK_CPU(outputs);
  CHECK_CONTIGUOUS(outputs);
  SamplePdfCpu(bins, weights, outputs, eps);
 }
--- a/pytorch3d/csrc/utils/dispatch.cuh
+++ b/pytorch3d/csrc/utils/dispatch.cuh
@@ -99,7 +99,8 @@ namespace {
 // and increment it via template recursion until it is equal to the run-time
 // argument N.
 template <
-    template <typename, int64_t> class Kernel,
+    template <typename, int64_t>
+    class Kernel,
    typename T,
    int64_t minN,
    int64_t maxN,
@@ -123,7 +124,8 @@ struct DispatchKernelHelper1D {
 // 1D dispatch: Specialization when curN == maxN
 // We need this base case to avoid infinite template recursion.
 template <
-    template <typename, int64_t> class Kernel,
+    template <typename, int64_t>
+    class Kernel,
    typename T,
    int64_t minN,
    int64_t maxN,
@@ -143,7 +145,8 @@ struct DispatchKernelHelper1D<Kernel, T, minN, maxN, maxN, Args...> {
 // the run-time values of N and M, at which point we dispatch to the run
 // method of the kernel.
 template <
-    template <typename, int64_t, int64_t> class Kernel,
+    template <typename, int64_t, int64_t>
+    class Kernel,
    typename T,
    int64_t minN,
    int64_t maxN,
@@ -200,7 +203,8 @@ struct DispatchKernelHelper2D {

 // 2D dispatch, specialization for curN == maxN
 template <
-    template <typename, int64_t, int64_t> class Kernel,
+    template <typename, int64_t, int64_t>
+    class Kernel,
    typename T,
    int64_t minN,
    int64_t maxN,
@@ -239,7 +243,8 @@ struct DispatchKernelHelper2D<

 // 2D dispatch, specialization for curM == maxM
 template <
-    template <typename, int64_t, int64_t> class Kernel,
+    template <typename, int64_t, int64_t>
+    class Kernel,
    typename T,
    int64_t minN,
    int64_t maxN,
@@ -278,7 +283,8 @@ struct DispatchKernelHelper2D<

 // 2D dispatch, specialization for curN == maxN, curM == maxM
 template <
-    template <typename, int64_t, int64_t> class Kernel,
+    template <typename, int64_t, int64_t>
+    class Kernel,
    typename T,
    int64_t minN,
    int64_t maxN,
@@ -307,7 +313,8 @@ struct DispatchKernelHelper2D<

 // This is the function we expect users to call to dispatch to 1D functions
 template <
-    template <typename, int64_t> class Kernel,
+    template <typename, int64_t>
+    class Kernel,
    typename T,
    int64_t minN,
    int64_t maxN,
@@ -323,7 +330,8 @@ void DispatchKernel1D(const int64_t N, Args... args) {

 // This is the function we expect users to call to dispatch to 2D functions
 template <
-    template <typename, int64_t, int64_t> class Kernel,
+    template <typename, int64_t, int64_t>
+    class Kernel,
    typename T,
    int64_t minN,
    int64_t maxN,
--- a/pytorch3d/csrc/utils/geometry_utils.cuh
+++ b/pytorch3d/csrc/utils/geometry_utils.cuh
@@ -376,6 +376,8 @@ PointLineDistanceBackward(
  float tt = t_top / t_bot;
  tt = __saturatef(tt);
  const float2 p_proj = (1.0f - tt) * v0 + tt * v1;
+  const float2 d = p - p_proj;
+  const float dist = sqrt(dot(d, d));

  const float2 grad_p = -1.0f * grad_dist * 2.0f * (p_proj - p);
  const float2 grad_v0 = grad_dist * (1.0f - tt) * 2.0f * (p_proj - p);
--- a/pytorch3d/csrc/utils/pytorch3d_cutils.h
+++ b/pytorch3d/csrc/utils/pytorch3d_cutils.h
@@ -15,7 +15,3 @@
 #define CHECK_CONTIGUOUS_CUDA(x) \
  CHECK_CUDA(x);                 \
  CHECK_CONTIGUOUS(x)
-#define CHECK_CPU(x)                    \
-  TORCH_CHECK(                          \
-      x.device().type() == torch::kCPU, \
-      "Cannot use CPU implementation: " #x " not on CPU.")
--- a/pytorch3d/datasets/shapenet/shapenet_core.py
+++ b/pytorch3d/datasets/shapenet/shapenet_core.py
@@ -83,7 +83,7 @@ class ShapeNetCore(ShapeNetBase):  # pragma: no cover
                ):
                    synset_set.add(synset)
                elif (synset in self.synset_inv.keys()) and (
-                    path.isdir(path.join(data_dir, self.synset_inv[synset]))
+                    (path.isdir(path.join(data_dir, self.synset_inv[synset])))
                ):
                    synset_set.add(self.synset_inv[synset])
                else:
--- a/pytorch3d/datasets/utils.py
+++ b/pytorch3d/datasets/utils.py
@@ -36,6 +36,7 @@ def collate_batched_meshes(batch: List[Dict]):  # pragma: no cover

    collated_dict["mesh"] = None
    if {"verts", "faces"}.issubset(collated_dict.keys()):
+
        textures = None
        if "textures" in collated_dict:
            textures = TexturesAtlas(atlas=collated_dict["textures"])
--- a/pytorch3d/implicitron/dataset/frame_data.py
+++ b/pytorch3d/implicitron/dataset/frame_data.py
@@ -26,7 +26,7 @@ from typing import (
 import numpy as np
 import torch

-from pytorch3d.implicitron.dataset import orm_types, types
+from pytorch3d.implicitron.dataset import types
 from pytorch3d.implicitron.dataset.utils import (
    adjust_camera_to_bbox_crop_,
    adjust_camera_to_image_scale_,
@@ -48,12 +48,8 @@ from pytorch3d.implicitron.dataset.utils import (
 from pytorch3d.implicitron.tools.config import registry, ReplaceableBase
 from pytorch3d.renderer.camera_utils import join_cameras_as_batch
 from pytorch3d.renderer.cameras import CamerasBase, PerspectiveCameras
-from pytorch3d.structures.meshes import join_meshes_as_batch, Meshes
 from pytorch3d.structures.pointclouds import join_pointclouds_as_batch, Pointclouds

-FrameAnnotationT = types.FrameAnnotation | orm_types.SqlFrameAnnotation
-SequenceAnnotationT = types.SequenceAnnotation | orm_types.SqlSequenceAnnotation
-

@dataclass
 class FrameData(Mapping[str, Any]):
@@ -126,9 +122,9 @@ class FrameData(Mapping[str, Any]):
        meta: A dict for storing additional frame information.
    """

-    frame_number: Optional[torch.LongTensor] = None
-    sequence_name: Union[str, List[str]] = ""
-    sequence_category: Union[str, List[str]] = ""
+    frame_number: Optional[torch.LongTensor]
+    sequence_name: Union[str, List[str]]
+    sequence_category: Union[str, List[str]]
    frame_timestamp: Optional[torch.Tensor] = None
    image_size_hw: Optional[torch.LongTensor] = None
    effective_image_size_hw: Optional[torch.LongTensor] = None
@@ -159,7 +155,7 @@ class FrameData(Mapping[str, Any]):
        new_params = {}
        for field_name in iter(self):
            value = getattr(self, field_name)
-            if isinstance(value, (torch.Tensor, Pointclouds, CamerasBase, Meshes)):
+            if isinstance(value, (torch.Tensor, Pointclouds, CamerasBase)):
                new_params[field_name] = value.to(*args, **kwargs)
            else:
                new_params[field_name] = value
@@ -421,6 +417,7 @@ class FrameData(Mapping[str, Any]):
            for f in fields(elem):
                if not f.init:
                    continue
+
                list_values = override_fields.get(
                    f.name, [getattr(d, f.name) for d in batch]
                )
@@ -429,7 +426,7 @@ class FrameData(Mapping[str, Any]):
                    if all(list_value is not None for list_value in list_values)
                    else None
                )
-            return type(elem)(**collated)
+            return cls(**collated)

        elif isinstance(elem, Pointclouds):
            return join_pointclouds_as_batch(batch)
@@ -437,8 +434,6 @@ class FrameData(Mapping[str, Any]):
        elif isinstance(elem, CamerasBase):
            # TODO: don't store K; enforce working in NDC space
            return join_cameras_as_batch(batch)
-        elif isinstance(elem, Meshes):
-            return join_meshes_as_batch(batch)
        else:
            return torch.utils.data.dataloader.default_collate(batch)

@@ -459,8 +454,8 @@ class FrameDataBuilderBase(ReplaceableBase, Generic[FrameDataSubtype], ABC):
    @abstractmethod
    def build(
        self,
-        frame_annotation: FrameAnnotationT,
-        sequence_annotation: SequenceAnnotationT,
+        frame_annotation: types.FrameAnnotation,
+        sequence_annotation: types.SequenceAnnotation,
        *,
        load_blobs: bool = True,
        **kwargs,
@@ -546,8 +541,8 @@ class GenericFrameDataBuilder(FrameDataBuilderBase[FrameDataSubtype], ABC):

    def build(
        self,
-        frame_annotation: FrameAnnotationT,
-        sequence_annotation: SequenceAnnotationT,
+        frame_annotation: types.FrameAnnotation,
+        sequence_annotation: types.SequenceAnnotation,
        *,
        load_blobs: bool = True,
        **kwargs,
@@ -591,81 +586,58 @@ class GenericFrameDataBuilder(FrameDataBuilderBase[FrameDataSubtype], ABC):
            ),
        )

-        dataset_root = self.dataset_root
+        fg_mask_np: Optional[np.ndarray] = None
        mask_annotation = frame_annotation.mask
-        depth_annotation = frame_annotation.depth
-        image_path: str | None = None
-        mask_path: str | None = None
-        depth_path: str | None = None
-        pcl_path: str | None = None
-        if dataset_root is not None:  # set all paths even if we won’t load blobs
-            if frame_annotation.image.path is not None:
-                image_path = os.path.join(dataset_root, frame_annotation.image.path)
-                frame_data.image_path = image_path
-
-            if mask_annotation is not None and mask_annotation.path:
-                mask_path = os.path.join(dataset_root, mask_annotation.path)
-                frame_data.mask_path = mask_path
-
-            if depth_annotation is not None and depth_annotation.path is not None:
-                depth_path = os.path.join(dataset_root, depth_annotation.path)
-                frame_data.depth_path = depth_path
-
-            if point_cloud is not None:
-                pcl_path = os.path.join(dataset_root, point_cloud.path)
-                frame_data.sequence_point_cloud_path = pcl_path
-
-        fg_mask_np: np.ndarray | None = None
-        bbox_xywh: tuple[float, float, float, float] | None = None
-
        if mask_annotation is not None:
-            if load_blobs and self.load_masks and mask_path:
-                fg_mask_np = self._load_fg_probability(frame_annotation, mask_path)
+            if load_blobs and self.load_masks:
+                fg_mask_np, mask_path = self._load_fg_probability(frame_annotation)
+                frame_data.mask_path = mask_path
                frame_data.fg_probability = safe_as_tensor(fg_mask_np, torch.float)

            bbox_xywh = mask_annotation.bounding_box_xywh
+            if bbox_xywh is None and fg_mask_np is not None:
+                bbox_xywh = get_bbox_from_mask(fg_mask_np, self.box_crop_mask_thr)
+
+            frame_data.bbox_xywh = safe_as_tensor(bbox_xywh, torch.float)

        if frame_annotation.image is not None:
            image_size_hw = safe_as_tensor(frame_annotation.image.size, torch.long)
            frame_data.image_size_hw = image_size_hw  # original image size
            # image size after crop/resize
            frame_data.effective_image_size_hw = image_size_hw
+            image_path = None
+            dataset_root = self.dataset_root
+            if frame_annotation.image.path is not None and dataset_root is not None:
+                image_path = os.path.join(dataset_root, frame_annotation.image.path)
+                frame_data.image_path = image_path

            if load_blobs and self.load_images:
                if image_path is None:
                    raise ValueError("Image path is required to load images.")

-                no_mask = fg_mask_np is None  # didn’t read the mask file
-                image_np = load_image(
-                    self._local_path(image_path), try_read_alpha=no_mask
-                )
-                if image_np.shape[0] == 4:  # RGBA image
-                    if no_mask:
-                        fg_mask_np = image_np[3:]
-                        frame_data.fg_probability = safe_as_tensor(
-                            fg_mask_np, torch.float
-                        )
-
-                    image_np = image_np[:3]
-
+                image_np = load_image(self._local_path(image_path))
                frame_data.image_rgb = self._postprocess_image(
                    image_np, frame_annotation.image.size, frame_data.fg_probability
                )

-        if bbox_xywh is None and fg_mask_np is not None:
-            bbox_xywh = get_bbox_from_mask(fg_mask_np, self.box_crop_mask_thr)
-        frame_data.bbox_xywh = safe_as_tensor(bbox_xywh, torch.float)
-
-        if load_blobs and self.load_depths and depth_path is not None:
-            frame_data.depth_map, frame_data.depth_mask = self._load_mask_depth(
-                frame_annotation, depth_path, fg_mask_np
-            )
+        if (
+            load_blobs
+            and self.load_depths
+            and frame_annotation.depth is not None
+            and frame_annotation.depth.path is not None
+        ):
+            (
+                frame_data.depth_map,
+                frame_data.depth_path,
+                frame_data.depth_mask,
+            ) = self._load_mask_depth(frame_annotation, fg_mask_np)

        if load_blobs and self.load_point_clouds and point_cloud is not None:
-            assert pcl_path is not None
+            pcl_path = self._fix_point_cloud_path(point_cloud.path)
            frame_data.sequence_point_cloud = load_pointcloud(
                self._local_path(pcl_path), max_points=self.max_points
            )
+            frame_data.sequence_point_cloud_path = pcl_path

        if frame_annotation.viewpoint is not None:
            frame_data.camera = self._get_pytorch3d_camera(frame_annotation)
@@ -681,14 +653,18 @@ class GenericFrameDataBuilder(FrameDataBuilderBase[FrameDataSubtype], ABC):

        return frame_data

-    def _load_fg_probability(self, entry: FrameAnnotationT, path: str) -> np.ndarray:
-        fg_probability = load_mask(self._local_path(path))
+    def _load_fg_probability(
+        self, entry: types.FrameAnnotation
+    ) -> Tuple[np.ndarray, str]:
+        assert self.dataset_root is not None and entry.mask is not None
+        full_path = os.path.join(self.dataset_root, entry.mask.path)
+        fg_probability = load_mask(self._local_path(full_path))
        if fg_probability.shape[-2:] != entry.image.size:
            raise ValueError(
                f"bad mask size: {fg_probability.shape[-2:]} vs {entry.image.size}!"
            )

-        return fg_probability
+        return fg_probability, full_path

    def _postprocess_image(
        self,
@@ -709,14 +685,14 @@ class GenericFrameDataBuilder(FrameDataBuilderBase[FrameDataSubtype], ABC):

    def _load_mask_depth(
        self,
-        entry: FrameAnnotationT,
-        path: str,
+        entry: types.FrameAnnotation,
        fg_mask: Optional[np.ndarray],
-    ) -> tuple[torch.Tensor, torch.Tensor]:
+    ) -> Tuple[torch.Tensor, str, torch.Tensor]:
        entry_depth = entry.depth
        dataset_root = self.dataset_root
        assert dataset_root is not None
-        assert entry_depth is not None
+        assert entry_depth is not None and entry_depth.path is not None
+        path = os.path.join(dataset_root, entry_depth.path)
        depth_map = load_depth(self._local_path(path), entry_depth.scale_adjustment)

        if self.mask_depths:
@@ -730,11 +706,11 @@ class GenericFrameDataBuilder(FrameDataBuilderBase[FrameDataSubtype], ABC):
        else:
            depth_mask = (depth_map > 0.0).astype(np.float32)

-        return torch.tensor(depth_map), torch.tensor(depth_mask)
+        return torch.tensor(depth_map), path, torch.tensor(depth_mask)

    def _get_pytorch3d_camera(
        self,
-        entry: FrameAnnotationT,
+        entry: types.FrameAnnotation,
    ) -> PerspectiveCameras:
        entry_viewpoint = entry.viewpoint
        assert entry_viewpoint is not None
@@ -763,6 +739,19 @@ class GenericFrameDataBuilder(FrameDataBuilderBase[FrameDataSubtype], ABC):
            T=torch.tensor(entry_viewpoint.T, dtype=torch.float)[None],
        )

+    def _fix_point_cloud_path(self, path: str) -> str:
+        """
+        Fix up a point cloud path from the dataset.
+        Some files in Co3Dv2 have an accidental absolute path stored.
+        """
+        unwanted_prefix = (
+            "/large_experiments/p3/replay/datasets/co3d/co3d45k_220512/export_v23/"
+        )
+        if path.startswith(unwanted_prefix):
+            path = path[len(unwanted_prefix) :]
+        assert self.dataset_root is not None
+        return os.path.join(self.dataset_root, path)
+
    def _local_path(self, path: str) -> str:
        if self.path_manager is None:
            return path
--- a/pytorch3d/implicitron/dataset/json_index_dataset_map_provider_v2.py
+++ b/pytorch3d/implicitron/dataset/json_index_dataset_map_provider_v2.py
@@ -222,6 +222,7 @@ class JsonIndexDatasetMapProviderV2(DatasetMapProviderBase):
        self.dataset_map = dataset_map

    def _load_category(self, category: str) -> DatasetMap:
+
        frame_file = os.path.join(self.dataset_root, category, "frame_annotations.jgz")
        sequence_file = os.path.join(
            self.dataset_root, category, "sequence_annotations.jgz"
--- a/pytorch3d/implicitron/dataset/load_llff.py
+++ b/pytorch3d/implicitron/dataset/load_llff.py
@@ -75,6 +75,7 @@ def _minify(basedir, path_manager, factors=(), resolutions=()):
 def _load_data(
    basedir, factor=None, width=None, height=None, load_imgs=True, path_manager=None
 ):
+
    poses_arr = np.load(
        _local_path(path_manager, os.path.join(basedir, "poses_bounds.npy"))
    )
@@ -163,6 +164,7 @@ def ptstocam(pts, c2w):


 def poses_avg(poses):
+
    hwf = poses[0, :3, -1:]

    center = poses[:, :3, 3].mean(0)
@@ -190,6 +192,7 @@ def render_path_spiral(c2w, up, rads, focal, zdelta, zrate, rots, N):


 def recenter_poses(poses):
+
    poses_ = poses + 0
    bottom = np.reshape([0, 0, 0, 1.0], [1, 4])
    c2w = poses_avg(poses)
@@ -253,6 +256,7 @@ def spherify_poses(poses, bds):
    new_poses = []

    for th in np.linspace(0.0, 2.0 * np.pi, 120):
+
        camorigin = np.array([radcircle * np.cos(th), radcircle * np.sin(th), zh])
        up = np.array([0, 0, -1.0])

@@ -307,6 +311,7 @@ def load_llff_data(
    path_zflat=False,
    path_manager=None,
 ):
+
    poses, bds, imgs = _load_data(
        basedir, factor=factor, path_manager=path_manager
    )  # factor=8 downsamples original imgs by 8x
--- a/pytorch3d/implicitron/dataset/orm_types.py
+++ b/pytorch3d/implicitron/dataset/orm_types.py
@@ -4,8 +4,6 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.

-# pyre-unsafe
-
 # This functionality requires SQLAlchemy 2.0 or later.

 import math
--- a/pytorch3d/implicitron/dataset/sql_dataset.py
+++ b/pytorch3d/implicitron/dataset/sql_dataset.py
@@ -4,15 +4,11 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.

-# pyre-unsafe
-
 import hashlib
 import json
 import logging
 import os
-
-import urllib
-from dataclasses import dataclass, Field, field
+from dataclasses import dataclass
 from typing import (
    Any,
    ClassVar,
@@ -33,18 +29,17 @@ import sqlalchemy as sa
 import torch
 from pytorch3d.implicitron.dataset.dataset_base import DatasetBase

-from pytorch3d.implicitron.dataset.frame_data import (
+from pytorch3d.implicitron.dataset.frame_data import (  # noqa
    FrameData,
-    FrameDataBuilder,  # noqa
+    FrameDataBuilder,
    FrameDataBuilderBase,
 )
-
 from pytorch3d.implicitron.tools.config import (
    registry,
    ReplaceableBase,
    run_auto_creation,
 )
-from sqlalchemy.orm import scoped_session, Session, sessionmaker
+from sqlalchemy.orm import Session

 from .orm_types import SqlFrameAnnotation, SqlSequenceAnnotation

@@ -56,7 +51,7 @@ _SET_LISTS_TABLE: str = "set_lists"


@registry.register
-class SqlIndexDataset(DatasetBase, ReplaceableBase):
+class SqlIndexDataset(DatasetBase, ReplaceableBase):  # pyre-ignore
    """
    A dataset with annotations stored as SQLite tables. This is an index-based dataset.
    The length is returned after all sequence and frame filters are applied (see param
@@ -93,7 +88,6 @@ class SqlIndexDataset(DatasetBase, ReplaceableBase):
            engine verbatim. Don’t expose it to end users of your application!
        pick_categories: Restrict the dataset to the given list of categories.
        pick_sequences: A Sequence of sequence names to restrict the dataset to.
-        pick_sequences_sql_clause: Custom SQL WHERE clause to constrain sequence annotations.
        exclude_sequences: A Sequence of the names of the sequences to exclude.
        limit_sequences_per_category_to: Limit the dataset to the first up to N
            sequences within each category (applies after all other sequence filters
@@ -108,16 +102,9 @@ class SqlIndexDataset(DatasetBase, ReplaceableBase):
            more frames than that; applied after other frame-level filters.
        seed: The seed of the random generator sampling `n_frames_per_sequence`
            random frames per sequence.
-        preload_metadata: If True, the metadata is preloaded into memory.
-        precompute_seq_to_idx: If True, precomputes the mapping from sequence name to indices.
-        scoped_session: If True, allows different parts of the code to share
-            a global session to access the database.
    """

    frame_annotations_type: ClassVar[Type[SqlFrameAnnotation]] = SqlFrameAnnotation
-    sequence_annotations_type: ClassVar[Type[SqlSequenceAnnotation]] = (
-        SqlSequenceAnnotation
-    )

    sqlite_metadata_file: str = ""
    dataset_root: Optional[str] = None
@@ -130,7 +117,6 @@ class SqlIndexDataset(DatasetBase, ReplaceableBase):
    pick_categories: Tuple[str, ...] = ()

    pick_sequences: Tuple[str, ...] = ()
-    pick_sequences_sql_clause: Optional[str] = None
    exclude_sequences: Tuple[str, ...] = ()
    limit_sequences_per_category_to: int = 0
    limit_sequences_to: int = 0
@@ -138,22 +124,12 @@ class SqlIndexDataset(DatasetBase, ReplaceableBase):
    n_frames_per_sequence: int = -1
    seed: int = 0
    remove_empty_masks_poll_whole_table_threshold: int = 300_000
-    preload_metadata: bool = False
-    precompute_seq_to_idx: bool = False
    # we set it manually in the constructor
-    _index: pd.DataFrame = field(init=False, metadata={"omegaconf_ignore": True})
-    _sql_engine: sa.engine.Engine = field(
-        init=False, metadata={"omegaconf_ignore": True}
-    )
-    eval_batches: Optional[List[Any]] = field(
-        init=False, metadata={"omegaconf_ignore": True}
-    )
+    # _index: pd.DataFrame = field(init=False)

-    frame_data_builder: FrameDataBuilderBase  # pyre-ignore[13]
+    frame_data_builder: FrameDataBuilderBase
    frame_data_builder_class_type: str = "FrameDataBuilder"

-    scoped_session: bool = False
-
    def __post_init__(self) -> None:
        if sa.__version__ < "2.0":
            raise ImportError("This class requires SQL Alchemy 2.0 or later")
@@ -162,28 +138,19 @@ class SqlIndexDataset(DatasetBase, ReplaceableBase):
            raise ValueError("sqlite_metadata_file must be set")

        if self.dataset_root:
-            frame_args = f"frame_data_builder_{self.frame_data_builder_class_type}_args"
-            getattr(self, frame_args)["dataset_root"] = self.dataset_root
-            getattr(self, frame_args)["path_manager"] = self.path_manager
+            frame_builder_type = self.frame_data_builder_class_type
+            getattr(self, f"frame_data_builder_{frame_builder_type}_args")[
+                "dataset_root"
+            ] = self.dataset_root

        run_auto_creation(self)
+        self.frame_data_builder.path_manager = self.path_manager

-        if self.path_manager is not None:
-            self.sqlite_metadata_file = self.path_manager.get_local_path(
-                self.sqlite_metadata_file
-            )
-            self.subset_lists_file = self.path_manager.get_local_path(
-                self.subset_lists_file
-            )
-
-        # NOTE: sqlite-specific args (read-only mode).
+        # pyre-ignore  # NOTE: sqlite-specific args (read-only mode).
        self._sql_engine = sa.create_engine(
-            f"sqlite:///file:{urllib.parse.quote(self.sqlite_metadata_file)}?mode=ro&uri=true"
+            f"sqlite:///file:{self.sqlite_metadata_file}?mode=ro&uri=true"
        )

-        if self.preload_metadata:
-            self._sql_engine = self._preload_database(self._sql_engine)
-
        sequences = self._get_filtered_sequences_if_any()

        if self.subsets:
@@ -199,29 +166,16 @@ class SqlIndexDataset(DatasetBase, ReplaceableBase):
        if len(index) == 0:
            raise ValueError(f"There are no frames in the subsets: {self.subsets}!")

-        self._index = index.set_index(["sequence_name", "frame_number"])
+        self._index = index.set_index(["sequence_name", "frame_number"])  # pyre-ignore

-        self.eval_batches = None
+        self.eval_batches = None  # pyre-ignore
        if self.eval_batches_file:
            self.eval_batches = self._load_filter_eval_batches()

        logger.info(str(self))

-        if self.scoped_session:
-            self._session_factory = sessionmaker(bind=self._sql_engine)  # pyre-ignore
-
-        if self.precompute_seq_to_idx:
-            # This is deprecated and will be removed in the future.
-            # After we backport https://github.com/facebookresearch/uco3d/pull/3
-            logger.warning(
-                "Using precompute_seq_to_idx is deprecated and will be removed in the future."
-            )
-            self._index["rowid"] = np.arange(len(self._index))
-            groupby = self._index.groupby("sequence_name", sort=False)["rowid"]
-            self._seq_to_indices = dict(groupby.apply(list))  # pyre-ignore
-            del self._index["rowid"]
-
    def __len__(self) -> int:
+        # pyre-ignore[16]
        return len(self._index)

    def __getitem__(self, frame_idx: Union[int, Tuple[str, int]]) -> FrameData:
@@ -278,18 +232,12 @@ class SqlIndexDataset(DatasetBase, ReplaceableBase):
            self.frame_annotations_type.frame_number
            == int(frame),  # cast from np.int64
        )
-        seq_stmt = sa.select(self.sequence_annotations_type).where(
-            self.sequence_annotations_type.sequence_name == seq
+        seq_stmt = sa.select(SqlSequenceAnnotation).where(
+            SqlSequenceAnnotation.sequence_name == seq
        )
-        if self.scoped_session:
-            # pyre-ignore
-            with scoped_session(self._session_factory)() as session:
-                entry = session.scalars(stmt).one()
-                seq_metadata = session.scalars(seq_stmt).one()
-        else:
-            with Session(self._sql_engine) as session:
-                entry = session.scalars(stmt).one()
-                seq_metadata = session.scalars(seq_stmt).one()
+        with Session(self._sql_engine) as session:
+            entry = session.scalars(stmt).one()
+            seq_metadata = session.scalars(seq_stmt).one()

        assert entry.image.path == self._index.loc[(seq, frame), "_image_path"]

@@ -302,6 +250,7 @@ class SqlIndexDataset(DatasetBase, ReplaceableBase):
        return frame_data

    def __str__(self) -> str:
+        # pyre-ignore[16]
        return f"SqlIndexDataset #frames={len(self._index)}"

    def sequence_names(self) -> Iterable[str]:
@@ -311,10 +260,9 @@ class SqlIndexDataset(DatasetBase, ReplaceableBase):
    # override
    def category_to_sequence_names(self) -> Dict[str, List[str]]:
        stmt = sa.select(
-            self.sequence_annotations_type.category,
-            self.sequence_annotations_type.sequence_name,
+            SqlSequenceAnnotation.category, SqlSequenceAnnotation.sequence_name
        ).where(  # we limit results to sequences that have frames after all filters
-            self.sequence_annotations_type.sequence_name.in_(self.sequence_names())
+            SqlSequenceAnnotation.sequence_name.in_(self.sequence_names())
        )
        with self._sql_engine.connect() as connection:
            cat_to_seqs = pd.read_sql(stmt, connection)
@@ -387,31 +335,17 @@ class SqlIndexDataset(DatasetBase, ReplaceableBase):
        rows = self._index.index.get_loc(seq_name)
        if isinstance(rows, slice):
            assert rows.stop is not None, "Unexpected result from pandas"
-            rows_seq = range(rows.start or 0, rows.stop, rows.step or 1)
+            rows = range(rows.start or 0, rows.stop, rows.step or 1)
        else:
-            rows_seq = list(np.where(rows)[0])
+            rows = np.where(rows)[0]

        index_slice, idx = self._get_frame_no_coalesced_ts_by_row_indices(
-            rows_seq, seq_name, subset_filter
+            rows, seq_name, subset_filter
        )
        index_slice["idx"] = idx

        yield from index_slice.itertuples(index=False)

-    # override
-    def sequence_indices_in_order(
-        self, seq_name: str, subset_filter: Optional[Sequence[str]] = None
-    ) -> Iterator[int]:
-        """Same as `sequence_frames_in_order` but returns the iterator over
-        only dataset indices.
-        """
-        if self.precompute_seq_to_idx and subset_filter is None:
-            # pyre-ignore
-            yield from self._seq_to_indices[seq_name]
-        else:
-            for _, _, idx in self.sequence_frames_in_order(seq_name, subset_filter):
-                yield idx
-
    # override
    def get_eval_batches(self) -> Optional[List[Any]]:
        """
@@ -445,35 +379,11 @@ class SqlIndexDataset(DatasetBase, ReplaceableBase):
            or self.limit_sequences_to > 0
            or self.limit_sequences_per_category_to > 0
            or len(self.pick_sequences) > 0
-            or self.pick_sequences_sql_clause is not None
            or len(self.exclude_sequences) > 0
            or len(self.pick_categories) > 0
            or self.n_frames_per_sequence > 0
        )

-    def _preload_database(
-        self, source_engine: sa.engine.base.Engine
-    ) -> sa.engine.base.Engine:
-        destination_engine = sa.create_engine("sqlite:///:memory:")
-        metadata = sa.MetaData()
-        metadata.reflect(bind=source_engine)
-        metadata.create_all(bind=destination_engine)
-
-        with source_engine.connect() as source_conn:
-            with destination_engine.connect() as destination_conn:
-                for table_obj in metadata.tables.values():
-                    # Select all rows from the source table
-                    source_rows = source_conn.execute(table_obj.select())
-
-                    # Insert rows into the destination table
-                    for row in source_rows:
-                        destination_conn.execute(table_obj.insert().values(row))
-
-                    # Commit the changes for each table
-                    destination_conn.commit()
-
-        return destination_engine
-
    def _get_filtered_sequences_if_any(self) -> Optional[pd.Series]:
        # maximum possible filter (if limit_sequences_per_category_to == 0):
        # WHERE category IN 'self.pick_categories'
@@ -486,22 +396,19 @@ class SqlIndexDataset(DatasetBase, ReplaceableBase):
            *self._get_pick_filters(),
            *self._get_exclude_filters(),
        ]
-        if self.pick_sequences_sql_clause:
-            print("Applying the custom SQL clause.")
-            where_conditions.append(sa.text(self.pick_sequences_sql_clause))

        def add_where(stmt):
            return stmt.where(*where_conditions) if where_conditions else stmt

        if self.limit_sequences_per_category_to <= 0:
-            stmt = add_where(sa.select(self.sequence_annotations_type.sequence_name))
+            stmt = add_where(sa.select(SqlSequenceAnnotation.sequence_name))
        else:
            subquery = sa.select(
-                self.sequence_annotations_type.sequence_name,
+                SqlSequenceAnnotation.sequence_name,
                sa.func.row_number()
                .over(
                    order_by=sa.text("ROWID"),  # NOTE: ROWID is SQLite-specific
-                    partition_by=self.sequence_annotations_type.category,
+                    partition_by=SqlSequenceAnnotation.category,
                )
                .label("row_number"),
            )
@@ -537,34 +444,31 @@ class SqlIndexDataset(DatasetBase, ReplaceableBase):
            return []

        logger.info(f"Limiting dataset to categories: {self.pick_categories}")
-        return [self.sequence_annotations_type.category.in_(self.pick_categories)]
+        return [SqlSequenceAnnotation.category.in_(self.pick_categories)]

    def _get_pick_filters(self) -> List[sa.ColumnElement]:
        if not self.pick_sequences:
            return []

        logger.info(f"Limiting dataset to sequences: {self.pick_sequences}")
-        return [self.sequence_annotations_type.sequence_name.in_(self.pick_sequences)]
+        return [SqlSequenceAnnotation.sequence_name.in_(self.pick_sequences)]

    def _get_exclude_filters(self) -> List[sa.ColumnOperators]:
        if not self.exclude_sequences:
            return []

        logger.info(f"Removing sequences from the dataset: {self.exclude_sequences}")
-        return [
-            self.sequence_annotations_type.sequence_name.notin_(self.exclude_sequences)
-        ]
+        return [SqlSequenceAnnotation.sequence_name.notin_(self.exclude_sequences)]

    def _load_subsets_from_json(self, subset_lists_path: str) -> pd.DataFrame:
-        subsets = self.subsets
-        assert subsets is not None
+        assert self.subsets is not None
        with open(subset_lists_path, "r") as f:
            subset_to_seq_frame = json.load(f)

        seq_frame_list = sum(
            (
                [(*row, subset) for row in subset_to_seq_frame[subset]]
-                for subset in subsets
+                for subset in self.subsets
            ),
            [],
        )
@@ -618,7 +522,7 @@ class SqlIndexDataset(DatasetBase, ReplaceableBase):
                stmt = sa.select(
                    self.frame_annotations_type.sequence_name,
                    self.frame_annotations_type.frame_number,
-                ).where(self.frame_annotations_type._mask_mass == 0)  # pyre-ignore[16]
+                ).where(self.frame_annotations_type._mask_mass == 0)
                with Session(self._sql_engine) as session:
                    to_remove = session.execute(stmt).all()

@@ -682,7 +586,7 @@ class SqlIndexDataset(DatasetBase, ReplaceableBase):
        stmt = sa.select(
            self.frame_annotations_type.sequence_name,
            self.frame_annotations_type.frame_number,
-            self.frame_annotations_type._image_path,  # pyre-ignore[16]
+            self.frame_annotations_type._image_path,
            sa.null().label("subset"),
        )
        where_conditions = []
@@ -696,7 +600,7 @@ class SqlIndexDataset(DatasetBase, ReplaceableBase):
            logger.info("  excluding samples with empty masks")
            where_conditions.append(
                sa.or_(
-                    self.frame_annotations_type._mask_mass.is_(None),  # pyre-ignore[16]
+                    self.frame_annotations_type._mask_mass.is_(None),
                    self.frame_annotations_type._mask_mass != 0,
                )
            )
@@ -730,9 +634,7 @@ class SqlIndexDataset(DatasetBase, ReplaceableBase):
        assert self.eval_batches_file
        logger.info(f"Loading eval batches from {self.eval_batches_file}")

-        if (
-            self.path_manager and not self.path_manager.isfile(self.eval_batches_file)
-        ) or (not self.path_manager and not os.path.isfile(self.eval_batches_file)):
+        if not os.path.isfile(self.eval_batches_file):
            # The batch indices file does not exist.
            # Most probably the user has not specified the root folder.
            raise ValueError(
@@ -740,8 +642,7 @@ class SqlIndexDataset(DatasetBase, ReplaceableBase):
                + "Please specify a correct dataset_root folder."
            )

-        eval_batches_file = self._local_path(self.eval_batches_file)
-        with open(eval_batches_file, "r") as f:
+        with open(self.eval_batches_file, "r") as f:
            eval_batches = json.load(f)

        # limit the dataset to sequences to allow multiple evaluations in one file
@@ -755,7 +656,7 @@ class SqlIndexDataset(DatasetBase, ReplaceableBase):
        if pick_sequences:
            old_len = len(eval_batches)
            eval_batches = [b for b in eval_batches if b[0][0] in pick_sequences]
-            logger.warning(
+            logger.warn(
                f"Picked eval batches by sequence/cat: {old_len} -> {len(eval_batches)}"
            )

@@ -763,7 +664,7 @@ class SqlIndexDataset(DatasetBase, ReplaceableBase):
            old_len = len(eval_batches)
            exclude_sequences = set(self.exclude_sequences)
            eval_batches = [b for b in eval_batches if b[0][0] not in exclude_sequences]
-            logger.warning(
+            logger.warn(
                f"Excluded eval batches by sequence: {old_len} -> {len(eval_batches)}"
            )

@@ -825,15 +726,9 @@ class SqlIndexDataset(DatasetBase, ReplaceableBase):
            self.frame_annotations_type.sequence_name == seq_name,
            self.frame_annotations_type.frame_number.in_(frames),
        )
-        frame_no_ts = None

-        if self.scoped_session:
-            stmt_text = str(stmt.compile(compile_kwargs={"literal_binds": True}))
-            with scoped_session(self._session_factory)() as session:  # pyre-ignore
-                frame_no_ts = pd.read_sql_query(stmt_text, session.connection())
-        else:
-            with self._sql_engine.connect() as connection:
-                frame_no_ts = pd.read_sql_query(stmt, connection)
+        with self._sql_engine.connect() as connection:
+            frame_no_ts = pd.read_sql_query(stmt, connection)

        if len(frame_no_ts) != len(index_slice):
            raise ValueError(
@@ -863,18 +758,11 @@ class SqlIndexDataset(DatasetBase, ReplaceableBase):
            prefixes=["TEMP"],  # NOTE SQLite specific!
        )

-    @classmethod
-    def pre_expand(cls) -> None:
-        # remove dataclass annotations that are not meant to be init params
-        # because they cause troubles for OmegaConf
-        for attr, attr_value in list(cls.__dict__.items()):  # need to copy as we mutate
-            if isinstance(attr_value, Field) and attr_value.metadata.get(
-                "omegaconf_ignore", False
-            ):
-                delattr(cls, attr)
-                del cls.__annotations__[attr]
-

 def _seq_name_to_seed(seq_name) -> int:
    """Generates numbers in [0, 2 ** 28)"""
    return int(hashlib.sha1(seq_name.encode("utf-8")).hexdigest()[:7], 16)
+
+
+def _safe_as_tensor(data, dtype):
+    return torch.tensor(data, dtype=dtype) if data is not None else None
--- a/pytorch3d/implicitron/dataset/sql_dataset_provider.py
+++ b/pytorch3d/implicitron/dataset/sql_dataset_provider.py
@@ -4,8 +4,6 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.

-# pyre-unsafe
-

 import logging
 import os
@@ -45,7 +43,7 @@ logger = logging.getLogger(__name__)


@registry.register
-class SqlIndexDatasetMapProvider(DatasetMapProviderBase):
+class SqlIndexDatasetMapProvider(DatasetMapProviderBase):  # pyre-ignore [13]
    """
    Generates the training, validation, and testing dataset objects for
    a dataset laid out on disk like SQL-CO3D, with annotations in an SQLite data base.
@@ -195,9 +193,9 @@ class SqlIndexDatasetMapProvider(DatasetMapProviderBase):

    # this is a mould that is never constructed, used to build self._dataset_map values
    dataset_class_type: str = "SqlIndexDataset"
-    dataset: SqlIndexDataset  # pyre-ignore [13]
+    dataset: SqlIndexDataset

-    path_manager_factory: PathManagerFactory  # pyre-ignore [13]
+    path_manager_factory: PathManagerFactory
    path_manager_factory_class_type: str = "PathManagerFactory"

    def __post_init__(self):
@@ -284,14 +282,8 @@ class SqlIndexDatasetMapProvider(DatasetMapProviderBase):
                logger.info(f"Val dataset: {str(val_dataset)}")

            logger.debug("Extracting test dataset.")
-            if self.eval_batches_path is None:
-                eval_batches_file = None
-            else:
-                eval_batches_file = self._get_lists_file("eval_batches")
-
-            if "eval_batches_file" in common_dataset_kwargs:
-                common_dataset_kwargs.pop("eval_batches_file", None)
-
+            eval_batches_file = self._get_lists_file("eval_batches")
+            del common_dataset_kwargs["eval_batches_file"]
            test_dataset = dataset_type(
                **common_dataset_kwargs,
                subsets=self._get_subsets(self.test_subsets, True),
--- a/pytorch3d/implicitron/dataset/utils.py
+++ b/pytorch3d/implicitron/dataset/utils.py
@@ -87,15 +87,6 @@ def is_train_frame(
 def get_bbox_from_mask(
    mask: np.ndarray, thr: float, decrease_quant: float = 0.05
 ) -> Tuple[int, int, int, int]:
-    # these corner cases need to be handled in order to avoid an infinite loop
-    if mask.size == 0:
-        warnings.warn("Empty mask is provided for bbox extraction.", stacklevel=1)
-        return 0, 0, 1, 1
-
-    if not mask.min() >= 0.0:
-        warnings.warn("Negative values in the mask for bbox extraction.", stacklevel=1)
-        mask = mask.clip(min=0.0)
-
    # bbox in xywh
    masks_for_box = np.zeros_like(mask)
    while masks_for_box.sum() <= 1.0:
@@ -143,15 +134,7 @@ T = TypeVar("T", bound=torch.Tensor)
 def bbox_xyxy_to_xywh(xyxy: T) -> T:
    wh = xyxy[2:] - xyxy[:2]
    xywh = torch.cat([xyxy[:2], wh])
-    return xywh  # pyre-ignore[7]
-
-
-def bbox_xywh_to_xyxy(xywh: T, clamp_size: float | int | None = None) -> T:
-    wh = xywh[2:]
-    if clamp_size is not None:
-        wh = wh.clamp(min=clamp_size)
-    xyxy = torch.cat([xywh[:2], xywh[:2] + wh])
-    return xyxy  # pyre-ignore[7]
+    return xywh  # pyre-ignore


 def get_clamp_bbox(
@@ -197,6 +180,16 @@ def rescale_bbox(
    return bbox * rel_size


+def bbox_xywh_to_xyxy(
+    xywh: torch.Tensor, clamp_size: Optional[int] = None
+) -> torch.Tensor:
+    xyxy = xywh.clone()
+    if clamp_size is not None:
+        xyxy[2:] = torch.clamp(xyxy[2:], clamp_size)
+    xyxy[2:] += xyxy[:2]
+    return xyxy
+
+
 def get_1d_bounds(arr: np.ndarray) -> Tuple[int, int]:
    nz = np.flatnonzero(arr)
    return nz[0], nz[-1] + 1
@@ -208,24 +201,18 @@ def resize_image(
    image_width: Optional[int],
    mode: str = "bilinear",
 ) -> Tuple[torch.Tensor, float, torch.Tensor]:
+
    if isinstance(image, np.ndarray):
        image = torch.from_numpy(image)

-    if (
-        image_height is None
-        or image_width is None
-        or image.shape[-2] == 0
-        or image.shape[-1] == 0
-    ):
+    if image_height is None or image_width is None:
        # skip the resizing
        return image, 1.0, torch.ones_like(image[:1])
-
    # takes numpy array or tensor, returns pytorch tensor
    minscale = min(
        image_height / image.shape[-2],
        image_width / image.shape[-1],
    )
-
    imre = torch.nn.functional.interpolate(
        image[None],
        scale_factor=minscale,
@@ -233,7 +220,6 @@ def resize_image(
        align_corners=False if mode == "bilinear" else None,
        recompute_scale_factor=True,
    )[0]
-
    imre_ = torch.zeros(image.shape[0], image_height, image_width)
    imre_[:, 0 : imre.shape[1], 0 : imre.shape[2]] = imre
    mask = torch.zeros(1, image_height, image_width)
@@ -246,21 +232,9 @@ def transpose_normalize_image(image: np.ndarray) -> np.ndarray:
    return im.astype(np.float32) / 255.0


-def load_image(
-    path: str, try_read_alpha: bool = False, pil_format: str = "RGB"
-) -> np.ndarray:
-    """
-    Load an image from a path and return it as a numpy array.
-    If try_read_alpha is True, the image is read as RGBA and the alpha channel is
-    returned as the fourth channel.
-    Otherwise, the image is read as RGB and a three-channel image is returned.
-    """
+def load_image(path: str) -> np.ndarray:
    with Image.open(path) as pil_im:
-        # Check if the image has an alpha channel
-        if try_read_alpha and pil_im.mode == "RGBA":
-            im = np.array(pil_im)
-        else:
-            im = np.array(pil_im.convert(pil_format))
+        im = np.array(pil_im.convert("RGB"))

    return transpose_normalize_image(im)

@@ -355,7 +329,6 @@ def adjust_camera_to_bbox_crop_(

    focal_length_px, principal_point_px = _convert_ndc_to_pixels(
        camera.focal_length[0],
-        # pyre-fixme[29]: `Union[(self: TensorBase, indices: Union[None, slice[Any, A...
        camera.principal_point[0],
        image_size_wh,
    )
@@ -368,7 +341,6 @@ def adjust_camera_to_bbox_crop_(
    )

    camera.focal_length = focal_length[None]
-    # pyre-fixme[16]: `PerspectiveCameras` has no attribute `principal_point`.
    camera.principal_point = principal_point_cropped[None]


@@ -380,7 +352,6 @@ def adjust_camera_to_image_scale_(
 ) -> PerspectiveCameras:
    focal_length_px, principal_point_px = _convert_ndc_to_pixels(
        camera.focal_length[0],
-        # pyre-fixme[29]: `Union[(self: TensorBase, indices: Union[None, slice[Any, A...
        camera.principal_point[0],
        original_size_wh,
    )
@@ -397,8 +368,7 @@ def adjust_camera_to_image_scale_(
        image_size_wh_output,
    )
    camera.focal_length = focal_length_scaled[None]
-    # pyre-fixme[16]: `PerspectiveCameras` has no attribute `principal_point`.
-    camera.principal_point = principal_point_scaled[None]  # pyre-ignore[16]
+    camera.principal_point = principal_point_scaled[None]


 # NOTE this cache is per-worker; they are implemented as processes.
--- a/pytorch3d/implicitron/evaluation/evaluate_new_view_synthesis.py
+++ b/pytorch3d/implicitron/evaluation/evaluate_new_view_synthesis.py
@@ -299,6 +299,7 @@ def eval_batch(
    )

    for loss_fg_mask, name_postfix in zip((mask_crop, mask_fg), ("_masked", "_fg")):
+
        loss_mask_now = mask_crop * loss_fg_mask

        for rgb_metric_name, rgb_metric_fun in zip(
--- a/pytorch3d/implicitron/models/feature_extractor/resnet_feature_extractor.py
+++ b/pytorch3d/implicitron/models/feature_extractor/resnet_feature_extractor.py
@@ -106,7 +106,7 @@ class ResNetFeatureExtractor(FeatureExtractorBase):
            self.layers = torch.nn.ModuleList()
            self.proj_layers = torch.nn.ModuleList()
            for stage in range(self.max_stage):
-                stage_name = f"layer{stage + 1}"
+                stage_name = f"layer{stage+1}"
                feature_name = self._get_resnet_stage_feature_name(stage)
                if (stage + 1) in self.stages:
                    if (
@@ -139,18 +139,12 @@ class ResNetFeatureExtractor(FeatureExtractorBase):
        self.stages = set(self.stages)  # convert to set for faster "in"

    def _get_resnet_stage_feature_name(self, stage) -> str:
-        return f"res_layer_{stage + 1}"
+        return f"res_layer_{stage+1}"

    def _resnet_normalize_image(self, img: torch.Tensor) -> torch.Tensor:
-        # pyre-fixme[58]: `-` is not supported for operand types `Tensor` and
-        #  `Union[Tensor, Module]`.
-        # pyre-fixme[58]: `/` is not supported for operand types `Tensor` and
-        #  `Union[Tensor, Module]`.
        return (img - self._resnet_mean) / self._resnet_std

    def get_feat_dims(self) -> int:
-        # pyre-fixme[29]: `Union[(self: TensorBase) -> Tensor, Tensor, Module]` is
-        #  not a function.
        return sum(self._feat_dim.values())

    def forward(
@@ -189,12 +183,7 @@ class ResNetFeatureExtractor(FeatureExtractorBase):
            else:
                imgs_normed = imgs_resized
            #  is not a function.
-            # pyre-fixme[29]: `Union[Tensor, Module]` is not a function.
            feats = self.stem(imgs_normed)
-            # pyre-fixme[6]: For 1st argument expected `Iterable[_T1]` but got
-            #  `Union[Tensor, Module]`.
-            # pyre-fixme[6]: For 2nd argument expected `Iterable[_T2]` but got
-            #  `Union[Tensor, Module]`.
            for stage, (layer, proj) in enumerate(zip(self.layers, self.proj_layers)):
                feats = layer(feats)
                # just a sanity check below
--- a/pytorch3d/implicitron/models/generic_model.py
+++ b/pytorch3d/implicitron/models/generic_model.py
@@ -478,8 +478,6 @@ class GenericModel(ImplicitronModelBase):
            )
        custom_args["global_code"] = global_code

-        # pyre-fixme[29]: `Union[(self: Tensor) -> Any, Tensor, Module]` is not a
-        #  function.
        for func in self._implicit_functions:
            func.bind_args(**custom_args)

@@ -502,8 +500,6 @@ class GenericModel(ImplicitronModelBase):
        # Unbind the custom arguments to prevent pytorch from storing
        # large buffers of intermediate results due to points in the
        # bound arguments.
-        # pyre-fixme[29]: `Union[(self: Tensor) -> Any, Tensor, Module]` is not a
-        #  function.
        for func in self._implicit_functions:
            func.unbind_args()

--- a/pytorch3d/implicitron/models/global_encoder/autodecoder.py
+++ b/pytorch3d/implicitron/models/global_encoder/autodecoder.py
@@ -71,7 +71,6 @@ class Autodecoder(Configurable, torch.nn.Module):
        return key_map

    def calculate_squared_encoding_norm(self) -> Optional[torch.Tensor]:
-        # pyre-fixme[16]: Item `Tensor` of `Tensor | Module` has no attribute `weight`.
        return (self._autodecoder_codes.weight**2).mean()

    def get_encoding_dim(self) -> int:
@@ -96,7 +95,6 @@ class Autodecoder(Configurable, torch.nn.Module):
                # pyre-fixme[9]: x has type `Union[List[str], LongTensor]`; used as
                #  `Tensor`.
                x = torch.tensor(
-                    # pyre-fixme[29]: `Union[(self: TensorBase, indices: Union[None, ...
                    [self._key_map[elem] for elem in x],
                    dtype=torch.long,
                    device=next(self.parameters()).device,
@@ -104,7 +102,6 @@ class Autodecoder(Configurable, torch.nn.Module):
            except StopIteration:
                raise ValueError("Not enough n_instances in the autodecoder") from None

-        # pyre-fixme[29]: `Union[Tensor, Module]` is not a function.
        return self._autodecoder_codes(x)

    def _load_key_map_hook(
--- a/pytorch3d/implicitron/models/global_encoder/global_encoder.py
+++ b/pytorch3d/implicitron/models/global_encoder/global_encoder.py
@@ -122,7 +122,6 @@ class HarmonicTimeEncoder(GlobalEncoderBase, torch.nn.Module):
        if frame_timestamp.shape[-1] != 1:
            raise ValueError("Frame timestamp's last dimensions should be one.")
        time = frame_timestamp / self.time_divisor
-        # pyre-fixme[29]: `Union[Tensor, Module]` is not a function.
        return self._harmonic_embedding(time)

    def calculate_squared_encoding_norm(self) -> Optional[torch.Tensor]:
--- a/pytorch3d/implicitron/models/implicit_function/decoding_functions.py
+++ b/pytorch3d/implicitron/models/implicit_function/decoding_functions.py
@@ -232,14 +232,9 @@ class MLPWithInputSkips(Configurable, torch.nn.Module):
            # if the skip tensor is None, we use `x` instead.
            z = x
        skipi = 0
-        # pyre-fixme[6]: For 1st argument expected `Iterable[_T]` but got
-        #  `Union[Tensor, Module]`.
        for li, layer in enumerate(self.mlp):
-            # pyre-fixme[58]: `in` is not supported for right operand type
-            #  `Union[Tensor, Module]`.
            if li in self._input_skips:
                if self._skip_affine_trans:
-                    # pyre-fixme[29]: `Union[(self: TensorBase, indices: Union[None, ...
                    y = self._apply_affine_layer(self.skip_affines[skipi], y, z)
                else:
                    y = torch.cat((y, z), dim=-1)
--- a/pytorch3d/implicitron/models/implicit_function/idr_feature_field.py
+++ b/pytorch3d/implicitron/models/implicit_function/idr_feature_field.py
@@ -141,16 +141,11 @@ class IdrFeatureField(ImplicitFunctionBase, torch.nn.Module):
            self.embed_fn is None and fun_viewpool is None and global_code is None
        ):
            return torch.tensor(
-                [],
-                device=rays_points_world.device,
-                dtype=rays_points_world.dtype,
-                # pyre-fixme[6]: For 2nd argument expected `Union[int, SymInt]` but got
-                #  `Union[Module, Tensor]`.
+                [], device=rays_points_world.device, dtype=rays_points_world.dtype
            ).view(0, self.out_dim)

        embeddings = []
        if self.embed_fn is not None:
-            # pyre-fixme[29]: `Union[Module, Tensor]` is not a function.
            embeddings.append(self.embed_fn(rays_points_world))

        if fun_viewpool is not None:
@@ -169,19 +164,13 @@ class IdrFeatureField(ImplicitFunctionBase, torch.nn.Module):

        embedding = torch.cat(embeddings, dim=-1)
        x = embedding
-        # pyre-fixme[29]: `Union[(self: TensorBase, other: Union[bool, complex,
-        #  float, int, Tensor]) -> Tensor, Module, Tensor]` is not a function.
        for layer_idx in range(self.num_layers - 1):
            if layer_idx in self.skip_in:
                x = torch.cat([x, embedding], dim=-1) / 2**0.5

-            # pyre-fixme[29]: `Union[(self: TensorBase, indices: Union[None, slice[An...
            x = self.linear_layers[layer_idx](x)

-            # pyre-fixme[29]: `Union[(self: TensorBase, other: Union[bool, complex,
-            #  float, int, Tensor]) -> Tensor, Module, Tensor]` is not a function.
            if layer_idx < self.num_layers - 2:
-                # pyre-fixme[29]: `Union[Module, Tensor]` is not a function.
                x = self.softplus(x)

        return x
--- a/pytorch3d/implicitron/models/implicit_function/neural_radiance_field.py
+++ b/pytorch3d/implicitron/models/implicit_function/neural_radiance_field.py
@@ -123,10 +123,8 @@ class NeuralRadianceFieldBase(ImplicitFunctionBase, torch.nn.Module):
        # Normalize the ray_directions to unit l2 norm.
        rays_directions_normed = torch.nn.functional.normalize(rays_directions, dim=-1)
        # Obtain the harmonic embedding of the normalized ray directions.
-        # pyre-fixme[29]: `Union[Tensor, Module]` is not a function.
        rays_embedding = self.harmonic_embedding_dir(rays_directions_normed)

-        # pyre-fixme[29]: `Union[Tensor, Module]` is not a function.
        return self.color_layer((self.intermediate_linear(features), rays_embedding))

    @staticmethod
@@ -197,8 +195,6 @@ class NeuralRadianceFieldBase(ImplicitFunctionBase, torch.nn.Module):
        embeds = create_embeddings_for_implicit_function(
            xyz_world=rays_points_world,
            #  for 2nd param but got `Union[None, torch.Tensor, torch.nn.Module]`.
-            # pyre-fixme[6]: For 2nd argument expected `Optional[(...) -> Any]` but
-            #  got `Union[None, Tensor, Module]`.
            xyz_embedding_function=(
                self.harmonic_embedding_xyz if self.input_xyz else None
            ),
@@ -210,14 +206,12 @@ class NeuralRadianceFieldBase(ImplicitFunctionBase, torch.nn.Module):
        )

        # embeds.shape = [minibatch x n_src x n_rays x n_pts x self.n_harmonic_functions*6+3]
-        # pyre-fixme[29]: `Union[Tensor, Module]` is not a function.
        features = self.xyz_encoder(embeds)
        # features.shape = [minibatch x ... x self.n_hidden_neurons_xyz]
        # NNs operate on the flattenned rays; reshaping to the correct spatial size
        # TODO: maybe make the transformer work on non-flattened tensors to avoid this reshape
        features = features.reshape(*rays_points_world.shape[:-1], -1)

-        # pyre-fixme[29]: `Union[Tensor, Module]` is not a function.
        raw_densities = self.density_layer(features)
        # raw_densities.shape = [minibatch x ... x 1] in [0-1]

@@ -225,8 +219,6 @@ class NeuralRadianceFieldBase(ImplicitFunctionBase, torch.nn.Module):
            if camera is None:
                raise ValueError("Camera must be given if xyz_ray_dir_in_camera_coords")

-            # pyre-fixme[58]: `@` is not supported for operand types `Tensor` and
-            #  `Union[Tensor, Module]`.
            directions = ray_bundle.directions @ camera.R
        else:
            directions = ray_bundle.directions
--- a/pytorch3d/implicitron/models/implicit_function/scene_representation_networks.py
+++ b/pytorch3d/implicitron/models/implicit_function/scene_representation_networks.py
@@ -103,8 +103,6 @@ class SRNRaymarchFunction(Configurable, torch.nn.Module):

        embeds = create_embeddings_for_implicit_function(
            xyz_world=rays_points_world,
-            # pyre-fixme[6]: For 2nd argument expected `Optional[(...) -> Any]` but
-            #  got `Union[Tensor, Module]`.
            xyz_embedding_function=self._harmonic_embedding,
            global_code=global_code,
            fun_viewpool=fun_viewpool,
@@ -114,7 +112,6 @@ class SRNRaymarchFunction(Configurable, torch.nn.Module):

        # Before running the network, we have to resize embeds to ndims=3,
        # otherwise the SRN layers consume huge amounts of memory.
-        # pyre-fixme[29]: `Union[Tensor, Module]` is not a function.
        raymarch_features = self._net(
            embeds.view(embeds.shape[0], -1, embeds.shape[-1])
        )
@@ -169,9 +166,7 @@ class SRNPixelGenerator(Configurable, torch.nn.Module):
        # Normalize the ray_directions to unit l2 norm.
        rays_directions_normed = torch.nn.functional.normalize(rays_directions, dim=-1)
        # Obtain the harmonic embedding of the normalized ray directions.
-        # pyre-fixme[29]: `Union[Tensor, Module]` is not a function.
        rays_embedding = self._harmonic_embedding(rays_directions_normed)
-        # pyre-fixme[29]: `Union[Tensor, Module]` is not a function.
        return self._color_layer((features, rays_embedding))

    def forward(
@@ -200,7 +195,6 @@ class SRNPixelGenerator(Configurable, torch.nn.Module):
                denoting the color of each ray point.
        """
        # raymarch_features.shape = [minibatch x ... x pts_per_ray x 3]
-        # pyre-fixme[29]: `Union[Tensor, Module]` is not a function.
        features = self._net(raymarch_features)
        # features.shape = [minibatch x ... x self.n_hidden_units]

@@ -208,8 +202,6 @@ class SRNPixelGenerator(Configurable, torch.nn.Module):
            if camera is None:
                raise ValueError("Camera must be given if xyz_ray_dir_in_camera_coords")

-            # pyre-fixme[58]: `@` is not supported for operand types `Tensor` and
-            #  `Union[Tensor, Module]`.
            directions = ray_bundle.directions @ camera.R
        else:
            directions = ray_bundle.directions
@@ -217,7 +209,6 @@ class SRNPixelGenerator(Configurable, torch.nn.Module):
        # NNs operate on the flattenned rays; reshaping to the correct spatial size
        features = features.reshape(*raymarch_features.shape[:-1], -1)

-        # pyre-fixme[29]: `Union[Tensor, Module]` is not a function.
        raw_densities = self._density_layer(features)

        rays_colors = self._get_colors(features, directions)
@@ -278,7 +269,6 @@ class SRNRaymarchHyperNet(Configurable, torch.nn.Module):
        srn_raymarch_function.
        """

-        # pyre-fixme[29]: `Union[Tensor, Module]` is not a function.
        net = self._hypernet(global_code)

        # use the hyper-net generated network to instantiate the raymarch module
@@ -306,6 +296,7 @@ class SRNRaymarchHyperNet(Configurable, torch.nn.Module):
        global_code=None,
        **kwargs,
    ):
+
        if global_code is None:
            raise ValueError("SRN Hypernetwork requires a non-trivial global code.")

@@ -313,8 +304,6 @@ class SRNRaymarchHyperNet(Configurable, torch.nn.Module):
        # across LSTM iterations for the same global_code.
        if self.cached_srn_raymarch_function is None:
            # generate the raymarching network from the hypernet
-            # pyre-fixme[16]: `SRNRaymarchHyperNet` has no attribute
-            #  `cached_srn_raymarch_function`.
            self.cached_srn_raymarch_function = self._run_hypernet(global_code)
        (srn_raymarch_function,) = cast(
            Tuple[SRNRaymarchFunction], self.cached_srn_raymarch_function
@@ -342,7 +331,6 @@ class SRNImplicitFunction(ImplicitFunctionBase, torch.nn.Module):
    def create_raymarch_function(self) -> None:
        self.raymarch_function = SRNRaymarchFunction(
            latent_dim=self.latent_dim,
-            # pyre-fixme[32]: Keyword argument must be a mapping with string keys.
            **self.raymarch_function_args,
        )

@@ -401,7 +389,6 @@ class SRNHyperNetImplicitFunction(ImplicitFunctionBase, torch.nn.Module):
        self.hypernet = SRNRaymarchHyperNet(
            latent_dim=self.latent_dim,
            latent_dim_hypernet=self.latent_dim_hypernet,
-            # pyre-fixme[32]: Keyword argument must be a mapping with string keys.
            **self.hypernet_args,
        )

--- a/pytorch3d/implicitron/models/implicit_function/utils.py
+++ b/pytorch3d/implicitron/models/implicit_function/utils.py
@@ -40,6 +40,7 @@ def create_embeddings_for_implicit_function(
    xyz_embedding_function: Optional[Callable],
    diag_cov: Optional[torch.Tensor] = None,
 ) -> torch.Tensor:
+
    bs, *spatial_size, pts_per_ray, _ = xyz_world.shape

    if xyz_in_camera_coords:
@@ -63,6 +64,7 @@ def create_embeddings_for_implicit_function(
            0,
        )
    else:
+
        embeds = xyz_embedding_function(ray_points_for_embed, diag_cov=diag_cov)
        embeds = embeds.reshape(
            bs,
--- a/pytorch3d/implicitron/models/implicit_function/voxel_grid.py
+++ b/pytorch3d/implicitron/models/implicit_function/voxel_grid.py
@@ -21,6 +21,8 @@ import logging
 import warnings
 from collections.abc import Mapping
 from dataclasses import dataclass, field
+
+from distutils.version import LooseVersion
 from typing import Any, Callable, ClassVar, Dict, Iterator, List, Optional, Tuple, Type

 import torch
@@ -220,8 +222,7 @@ class VoxelGridBase(ReplaceableBase, torch.nn.Module):
                + "| 'bicubic' | 'linear' | 'area' | 'nearest-exact'"
            )

-        # We assume PyTorch 1.11 and newer.
-        interpolate_has_antialias = True
+        interpolate_has_antialias = LooseVersion(torch.__version__) >= "1.11"

        if antialias and not interpolate_has_antialias:
            warnings.warn("Antialiased interpolation requires PyTorch 1.11+; ignoring")
@@ -268,7 +269,6 @@ class VoxelGridBase(ReplaceableBase, torch.nn.Module):
                for name, tensor in vars(grid_values_with_wanted_resolution).items()
            }

-        # pyre-fixme[29]: `Union[Tensor, Module]` is not a function.
        return self.values_type(**params), True

    def get_resolution_change_epochs(self) -> Tuple[int, ...]:
@@ -882,7 +882,6 @@ class VoxelGridModule(Configurable, torch.nn.Module):
            torch.Tensor of shape (..., n_features)
        """
        locator = self._get_volume_locator()
-        # pyre-fixme[29]: `Union[Tensor, Module]` is not a function.
        grid_values = self.voxel_grid.values_type(**self.params)
        # voxel grids operate with extra n_grids dimension, which we fix to one
        return self.voxel_grid.evaluate_world(points[None], grid_values, locator)[0]
@@ -896,7 +895,6 @@ class VoxelGridModule(Configurable, torch.nn.Module):
                replace current parameters
        """
        if self.hold_voxel_grid_as_parameters:
-            # pyre-fixme[16]: `VoxelGridModule` has no attribute `params`.
            self.params = torch.nn.ParameterDict(
                {
                    k: torch.nn.Parameter(val)
@@ -947,7 +945,6 @@ class VoxelGridModule(Configurable, torch.nn.Module):
        Returns:
            True if parameter change has happened else False.
        """
-        # pyre-fixme[29]: `Union[Tensor, Module]` is not a function.
        grid_values = self.voxel_grid.values_type(**self.params)
        grid_values, change = self.voxel_grid.change_resolution(
            grid_values, epoch=epoch
@@ -995,21 +992,16 @@ class VoxelGridModule(Configurable, torch.nn.Module):
        """
        '''
        new_params = {}
-        # pyre-fixme[29]: `Union[(self: Tensor) -> Any, Tensor, Module]` is not a
-        #  function.
        for name in self.params:
            key = prefix + "params." + name
            if key in state_dict:
                new_params[name] = torch.zeros_like(state_dict[key])
-        # pyre-fixme[29]: `Union[Tensor, Module]` is not a function.
        self.set_voxel_grid_parameters(self.voxel_grid.values_type(**new_params))

    def get_device(self) -> torch.device:
        """
        Returns torch.device on which module parameters are located
        """
-        # pyre-fixme[29]: `Union[(self: TensorBase) -> Tensor, Tensor, Module]` is
-        #  not a function.
        return next(val for val in self.params.values() if val is not None).device

    def crop_self(self, min_point: torch.Tensor, max_point: torch.Tensor) -> None:
@@ -1026,7 +1018,6 @@ class VoxelGridModule(Configurable, torch.nn.Module):
        """
        locator = self._get_volume_locator()
        #  torch.nn.modules.module.Module]` is not a function.
-        # pyre-fixme[29]: `Union[Tensor, Module]` is not a function.
        old_grid_values = self.voxel_grid.values_type(**self.params)
        new_grid_values = self.voxel_grid.crop_world(
            min_point, max_point, old_grid_values, locator
@@ -1034,7 +1025,6 @@ class VoxelGridModule(Configurable, torch.nn.Module):
        grid_values, _ = self.voxel_grid.change_resolution(
            new_grid_values, grid_values_with_wanted_resolution=old_grid_values
        )
-        # pyre-fixme[16]: `VoxelGridModule` has no attribute `params`.
        self.params = torch.nn.ParameterDict(
            {
                k: torch.nn.Parameter(val)
--- a/pytorch3d/implicitron/models/implicit_function/voxel_grid_implicit_function.py
+++ b/pytorch3d/implicitron/models/implicit_function/voxel_grid_implicit_function.py
@@ -192,26 +192,16 @@ class VoxelGridImplicitFunction(ImplicitFunctionBase, torch.nn.Module):

    def __post_init__(self) -> None:
        run_auto_creation(self)
-        # pyre-fixme[16]: `VoxelGridImplicitFunction` has no attribute
-        #  `voxel_grid_scaffold`.
        self.voxel_grid_scaffold = self._create_voxel_grid_scaffold()
-        # pyre-fixme[16]: `VoxelGridImplicitFunction` has no attribute
-        #  `harmonic_embedder_xyz_density`.
        self.harmonic_embedder_xyz_density = HarmonicEmbedding(
            **self.harmonic_embedder_xyz_density_args
        )
-        # pyre-fixme[16]: `VoxelGridImplicitFunction` has no attribute
-        #  `harmonic_embedder_xyz_color`.
        self.harmonic_embedder_xyz_color = HarmonicEmbedding(
            **self.harmonic_embedder_xyz_color_args
        )
-        # pyre-fixme[16]: `VoxelGridImplicitFunction` has no attribute
-        #  `harmonic_embedder_dir_color`.
        self.harmonic_embedder_dir_color = HarmonicEmbedding(
            **self.harmonic_embedder_dir_color_args
        )
-        # pyre-fixme[16]: `VoxelGridImplicitFunction` has no attribute
-        #  `_scaffold_ready`.
        self._scaffold_ready = False

    def forward(
@@ -262,7 +252,6 @@ class VoxelGridImplicitFunction(ImplicitFunctionBase, torch.nn.Module):
        # ########## filter the points using the scaffold ########## #
        if self._scaffold_ready and self.scaffold_filter_points:
            with torch.no_grad():
-                # pyre-fixme[29]: `Union[Tensor, Module]` is not a function.
                non_empty_points = self.voxel_grid_scaffold(points)[..., 0] > 0
            points = points[non_empty_points]
            if len(points) == 0:
@@ -374,7 +363,6 @@ class VoxelGridImplicitFunction(ImplicitFunctionBase, torch.nn.Module):
                feature dimensionality which `decoder_density` returns
        """
        embeds_density = self.voxel_grid_density(points)
-        # pyre-fixme[29]: `Union[Tensor, Module]` is not a function.
        harmonic_embedding_density = self.harmonic_embedder_xyz_density(embeds_density)
        # shape = [..., density_dim]
        return self.decoder_density(harmonic_embedding_density)
@@ -409,8 +397,6 @@ class VoxelGridImplicitFunction(ImplicitFunctionBase, torch.nn.Module):
        if self.xyz_ray_dir_in_camera_coords:
            if camera is None:
                raise ValueError("Camera must be given if xyz_ray_dir_in_camera_coords")
-            # pyre-fixme[58]: `@` is not supported for operand types `Tensor` and
-            #  `Union[Tensor, Module]`.
            directions = directions @ camera.R

        # ########## get voxel grid output ########## #
@@ -419,13 +405,11 @@ class VoxelGridImplicitFunction(ImplicitFunctionBase, torch.nn.Module):

        # ########## embed with the harmonic function ########## #
        # Obtain the harmonic embedding of the voxel grid output.
-        # pyre-fixme[29]: `Union[Tensor, Module]` is not a function.
        harmonic_embedding_color = self.harmonic_embedder_xyz_color(embeds_color)

        # Normalize the ray_directions to unit l2 norm.
        rays_directions_normed = torch.nn.functional.normalize(directions, dim=-1)
        # Obtain the harmonic embedding of the normalized ray directions.
-        # pyre-fixme[29]: `Union[Tensor, Module]` is not a function.
        harmonic_embedding_dir = self.harmonic_embedder_dir_color(
            rays_directions_normed
        )
@@ -494,11 +478,8 @@ class VoxelGridImplicitFunction(ImplicitFunctionBase, torch.nn.Module):
            an object inside, else False.
        """
        # find bounding box
-        # pyre-fixme[16]: Item `Tensor` of `Tensor | Module` has no attribute
-        #  `get_grid_points`.
        points = self.voxel_grid_scaffold.get_grid_points(epoch=epoch)
        assert self._scaffold_ready, "Scaffold has to be calculated before cropping."
-        # pyre-fixme[29]: `Union[Tensor, Module]` is not a function.
        occupancy = self.voxel_grid_scaffold(points)[..., 0] > 0
        non_zero_idxs = torch.nonzero(occupancy)
        if len(non_zero_idxs) == 0:
@@ -530,8 +511,6 @@ class VoxelGridImplicitFunction(ImplicitFunctionBase, torch.nn.Module):
        """

        planes = []
-        # pyre-fixme[16]: Item `Tensor` of `Tensor | Module` has no attribute
-        #  `get_grid_points`.
        points = self.voxel_grid_scaffold.get_grid_points(epoch=epoch)

        chunk_size = (
@@ -551,10 +530,7 @@ class VoxelGridImplicitFunction(ImplicitFunctionBase, torch.nn.Module):
            stride=1,
        )
        occupancy_cube = density_cube > self.scaffold_empty_space_threshold
-        # pyre-fixme[16]: Item `Tensor` of `Tensor | Module` has no attribute `params`.
        self.voxel_grid_scaffold.params["voxel_grid"] = occupancy_cube.float()
-        # pyre-fixme[16]: `VoxelGridImplicitFunction` has no attribute
-        #  `_scaffold_ready`.
        self._scaffold_ready = True

        return False
@@ -571,8 +547,6 @@ class VoxelGridImplicitFunction(ImplicitFunctionBase, torch.nn.Module):
        decoding function to this value.
        """
        grid_args = self.voxel_grid_density_args
-        # pyre-fixme[6]: For 1st argument expected `DictConfig` but got
-        #  `Union[Tensor, Module]`.
        grid_output_dim = VoxelGridModule.get_output_dim(grid_args)

        embedder_args = self.harmonic_embedder_xyz_density_args
@@ -601,8 +575,6 @@ class VoxelGridImplicitFunction(ImplicitFunctionBase, torch.nn.Module):
        decoding function to this value.
        """
        grid_args = self.voxel_grid_color_args
-        # pyre-fixme[6]: For 1st argument expected `DictConfig` but got
-        #  `Union[Tensor, Module]`.
        grid_output_dim = VoxelGridModule.get_output_dim(grid_args)

        embedder_args = self.harmonic_embedder_xyz_color_args
@@ -636,9 +608,7 @@ class VoxelGridImplicitFunction(ImplicitFunctionBase, torch.nn.Module):
                    `self.voxel_grid_density`
        """
        return VoxelGridModule(
-            # pyre-fixme[29]: `Union[(self: TensorBase, indices: Union[None, slice[An...
            extents=self.voxel_grid_density_args["extents"],
-            # pyre-fixme[29]: `Union[(self: TensorBase, indices: Union[None, slice[An...
            translation=self.voxel_grid_density_args["translation"],
            voxel_grid_class_type="FullResolutionVoxelGrid",
            hold_voxel_grid_as_parameters=False,
--- a/pytorch3d/implicitron/models/metrics.py
+++ b/pytorch3d/implicitron/models/metrics.py
@@ -6,6 +6,7 @@

 # pyre-unsafe

+
 import warnings
 from typing import Any, Dict, Optional

@@ -297,8 +298,9 @@ class ViewMetrics(ViewMetricsBase):
                _rgb_metrics(
                    image_rgb,
                    image_rgb_pred,
-                    masks=fg_probability,
-                    masks_crop=mask_crop,
+                    fg_probability,
+                    fg_probability_pred,
+                    mask_crop,
                )
            )

@@ -308,21 +310,9 @@ class ViewMetrics(ViewMetricsBase):
            metrics["mask_neg_iou"] = utils.neg_iou_loss(
                fg_probability_pred, fg_probability, mask=mask_crop
            )
-            if torch.is_autocast_enabled():
-                # To avoid issues with mixed precision
-                metrics["mask_bce"] = utils.calc_bce(
-                    fg_probability_pred.logit(),
-                    fg_probability,
-                    mask=mask_crop,
-                    pred_logits=True,
-                )
-            else:
-                metrics["mask_bce"] = utils.calc_bce(
-                    fg_probability_pred,
-                    fg_probability,
-                    mask=mask_crop,
-                    pred_logits=False,
-                )
+            metrics["mask_bce"] = utils.calc_bce(
+                fg_probability_pred, fg_probability, mask=mask_crop
+            )

        if depth_map is not None and depth_map_pred is not None:
            assert mask_crop is not None
@@ -334,11 +324,7 @@ class ViewMetrics(ViewMetricsBase):
            if fg_probability is not None:
                mask = fg_probability * mask_crop
                _, abs_ = utils.eval_depth(
-                    depth_map_pred,
-                    depth_map,
-                    get_best_scale=True,
-                    mask=mask,
-                    crop=0,
+                    depth_map_pred, depth_map, get_best_scale=True, mask=mask, crop=0
                )
                metrics["depth_abs_fg"] = abs_.mean()

@@ -360,26 +346,18 @@ class ViewMetrics(ViewMetricsBase):
        return metrics


-def _rgb_metrics(
-    images,
-    images_pred,
-    masks=None,
-    masks_crop=None,
-    huber_scaling: float = 0.03,
-):
+def _rgb_metrics(images, images_pred, masks, masks_pred, masks_crop):
    assert masks_crop is not None
    if images.shape[1] != images_pred.shape[1]:
        raise ValueError(
            f"Network output's RGB images had {images_pred.shape[1]} "
            f"channels. {images.shape[1]} expected."
        )
-    rgb_abs = ((images_pred - images).abs()).mean(dim=1, keepdim=True)
    rgb_squared = ((images_pred - images) ** 2).mean(dim=1, keepdim=True)
-    rgb_loss = utils.huber(rgb_squared, scaling=huber_scaling)
+    rgb_loss = utils.huber(rgb_squared, scaling=0.03)
    crop_mass = masks_crop.sum().clamp(1.0)
    results = {
        "rgb_huber": (rgb_loss * masks_crop).sum() / crop_mass,
-        "rgb_l1": (rgb_abs * masks_crop).sum() / crop_mass,
        "rgb_mse": (rgb_squared * masks_crop).sum() / crop_mass,
        "rgb_psnr": utils.calc_psnr(images_pred, images, mask=masks_crop),
    }
--- a/pytorch3d/implicitron/models/renderer/lstm_renderer.py
+++ b/pytorch3d/implicitron/models/renderer/lstm_renderer.py
@@ -135,7 +135,6 @@ class LSTMRenderer(BaseRenderer, torch.nn.Module):
                break

            # run the lstm marcher
-            # pyre-fixme[29]: `Union[Tensor, Module]` is not a function.
            state_h, state_c = self._lstm(
                raymarch_features.view(-1, raymarch_features.shape[-1]),
                states[-1],
@@ -143,7 +142,6 @@ class LSTMRenderer(BaseRenderer, torch.nn.Module):
            if state_h.requires_grad:
                state_h.register_hook(lambda x: x.clamp(min=-10, max=10))
            # predict the next step size
-            # pyre-fixme[29]: `Union[Tensor, Module]` is not a function.
            signed_distance = self._out_layer(state_h).view(ray_bundle_t.lengths.shape)
            # log the lstm states
            states.append((state_h, state_c))
--- a/pytorch3d/implicitron/models/renderer/ray_sampler.py
+++ b/pytorch3d/implicitron/models/renderer/ray_sampler.py
@@ -207,7 +207,6 @@ class AbstractMaskRaySampler(RaySamplerBase, torch.nn.Module):
        """
        sample_mask = None
        if (
-            # pyre-fixme[29]: `Union[(self: TensorBase, indices: Union[None, slice[An...
            self._sampling_mode[evaluation_mode] == RenderSamplingMode.MASK_SAMPLE
            and mask is not None
        ):
@@ -224,7 +223,6 @@ class AbstractMaskRaySampler(RaySamplerBase, torch.nn.Module):
            EvaluationMode.EVALUATION: self._evaluation_raysampler,
        }[evaluation_mode]

-        # pyre-fixme[29]: `Union[Tensor, Module]` is not a function.
        ray_bundle = raysampler(
            cameras=cameras,
            mask=sample_mask,
@@ -242,8 +240,6 @@ class AbstractMaskRaySampler(RaySamplerBase, torch.nn.Module):
                "Heterogeneous ray bundle is not supported for conical frustum computation yet"
            )
        elif self.cast_ray_bundle_as_cone:
-            # pyre-fixme[9]: pixel_hw has type `Tuple[float, float]`; used as
-            #  `Tuple[Union[Tensor, Module], Union[Tensor, Module]]`.
            pixel_hw: Tuple[float, float] = (self.pixel_height, self.pixel_width)
            pixel_radii_2d = compute_radii(cameras, ray_bundle.xys[..., :2], pixel_hw)
            return ImplicitronRayBundle(
--- a/pytorch3d/implicitron/models/renderer/raymarcher.py
+++ b/pytorch3d/implicitron/models/renderer/raymarcher.py
@@ -179,10 +179,8 @@ class AccumulativeRaymarcherBase(RaymarcherBase, torch.nn.Module):
            rays_densities = torch.relu(rays_densities)

        weighted_densities = deltas * rays_densities
-        # pyre-fixme[29]: `Union[Tensor, Module]` is not a function.
        capped_densities = self._capping_function(weighted_densities)

-        # pyre-fixme[29]: `Union[Tensor, Module]` is not a function.
        rays_opacities = self._capping_function(
            torch.cumsum(weighted_densities, dim=-1)
        )
@@ -192,7 +190,6 @@ class AccumulativeRaymarcherBase(RaymarcherBase, torch.nn.Module):
        )
        absorption_shifted[..., : self.surface_thickness] = 1.0

-        # pyre-fixme[29]: `Union[Tensor, Module]` is not a function.
        weights = self._weight_function(capped_densities, absorption_shifted)
        features = (weights[..., None] * rays_features).sum(dim=-2)
        depth = (weights * ray_lengths)[..., None].sum(dim=-2)
@@ -200,8 +197,6 @@ class AccumulativeRaymarcherBase(RaymarcherBase, torch.nn.Module):
        alpha = opacities if self.blend_output else 1
        if self._bg_color.shape[-1] not in [1, features.shape[-1]]:
            raise ValueError("Wrong number of background color channels.")
-        # pyre-fixme[58]: `*` is not supported for operand types `int` and
-        #  `Union[Tensor, Module]`.
        features = alpha * features + (1 - opacities) * self._bg_color

        return RendererOutput(
--- a/Show More
+++ b/Show More