Revert "Fix CUDA kernel index data type in vision/fair/pytorch3d/pytorch3d/csrc/compositing/alpha_composite.cu +10"

This reverts commit 3987612062.
2026-04-18 10:55:59 +08:00 · 2025-03-27 05:28:03 -07:00
169 changed files with 1570 additions and 832 deletions
--- a/dev/linter.sh
+++ b/dev/linter.sh
@@ -10,7 +10,7 @@
 DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 DIR=$(dirname "${DIR}")
-if [[ -f "${DIR}/BUCK" ]]
+if [[ -f "${DIR}/TARGETS" ]]
 then
  pyfmt "${DIR}"
 else
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -19,6 +19,7 @@
 #
 import os
 import sys
 import unittest.mock as mock
 from recommonmark.parser import CommonMarkParser
--- a/docs/modules/implicitron/datasets.rst
+++ b/docs/modules/implicitron/datasets.rst
@@ -3,6 +3,11 @@ pytorch3d.implicitron.dataset specific datasets
 specific datasets
 .. automodule:: pytorch3d.implicitron.dataset.blender_dataset_map_provider
    :members:
    :undoc-members:
    :show-inheritance:
 .. automodule:: pytorch3d.implicitron.dataset.json_index_dataset_map_provider
    :members:
    :undoc-members:
@@ -13,6 +18,11 @@ specific datasets
    :undoc-members:
    :show-inheritance:
 .. automodule:: pytorch3d.implicitron.dataset.llff_dataset_map_provider
    :members:
    :undoc-members:
    :show-inheritance:
 .. automodule:: pytorch3d.implicitron.dataset.rendered_mesh_dataset_map_provider
    :members:
    :undoc-members:
--- a/projects/implicitron_trainer/configs/overfit_singleseq_nerf_blender.yaml
+++ b/projects/implicitron_trainer/configs/overfit_singleseq_nerf_blender.yaml
@@ -0,0 +1,56 @@
 defaults:
 - overfit_singleseq_base
 - _self_
 exp_dir: "./data/overfit_nerf_blender_repro/${oc.env:BLENDER_SINGLESEQ_CLASS}"
 data_source_ImplicitronDataSource_args:
  data_loader_map_provider_SequenceDataLoaderMapProvider_args:
    dataset_length_train: 100
  dataset_map_provider_class_type: BlenderDatasetMapProvider
  dataset_map_provider_BlenderDatasetMapProvider_args:
    base_dir: ${oc.env:BLENDER_DATASET_ROOT}/${oc.env:BLENDER_SINGLESEQ_CLASS}
    n_known_frames_for_test: null
    object_name: ${oc.env:BLENDER_SINGLESEQ_CLASS}
    path_manager_factory_class_type: PathManagerFactory
    path_manager_factory_PathManagerFactory_args:
      silence_logs: true
 model_factory_ImplicitronModelFactory_args:
  model_class_type: "OverfitModel"
  model_OverfitModel_args:
    mask_images: false
    raysampler_class_type: AdaptiveRaySampler
    raysampler_AdaptiveRaySampler_args:
      n_pts_per_ray_training: 64
      n_pts_per_ray_evaluation: 64
      n_rays_per_image_sampled_from_mask: 4096
      stratified_point_sampling_training: true
      stratified_point_sampling_evaluation: false
      scene_extent: 2.0
      scene_center:
      - 0.0
      - 0.0
      - 0.0
    renderer_MultiPassEmissionAbsorptionRenderer_args:
      density_noise_std_train: 0.0
      n_pts_per_ray_fine_training: 128
      n_pts_per_ray_fine_evaluation: 128
      raymarcher_EmissionAbsorptionRaymarcher_args:
        blend_output: false
    loss_weights:
      loss_rgb_mse: 1.0
      loss_prev_stage_rgb_mse: 1.0
      loss_mask_bce: 0.0
      loss_prev_stage_mask_bce: 0.0
      loss_autodecoder_norm: 0.00
 optimizer_factory_ImplicitronOptimizerFactory_args:
  exponential_lr_step_size: 3001
  lr_policy: LinearExponential
  linear_exponential_lr_milestone: 200
 training_loop_ImplicitronTrainingLoop_args:
  max_epochs: 6000
  metric_print_interval: 10
  store_checkpoints_purge: 3
  test_when_finished: true
  validation_interval: 100
--- a/projects/implicitron_trainer/configs/repro_singleseq_nerf_blender.yaml
+++ b/projects/implicitron_trainer/configs/repro_singleseq_nerf_blender.yaml
@@ -0,0 +1,55 @@
 defaults:
 - repro_singleseq_base
 - _self_
 exp_dir: "./data/nerf_blender_repro/${oc.env:BLENDER_SINGLESEQ_CLASS}"
 data_source_ImplicitronDataSource_args:
  data_loader_map_provider_SequenceDataLoaderMapProvider_args:
    dataset_length_train: 100
  dataset_map_provider_class_type: BlenderDatasetMapProvider
  dataset_map_provider_BlenderDatasetMapProvider_args:
    base_dir: ${oc.env:BLENDER_DATASET_ROOT}/${oc.env:BLENDER_SINGLESEQ_CLASS}
    n_known_frames_for_test: null
    object_name: ${oc.env:BLENDER_SINGLESEQ_CLASS}
    path_manager_factory_class_type: PathManagerFactory
    path_manager_factory_PathManagerFactory_args:
      silence_logs: true
 model_factory_ImplicitronModelFactory_args:
  model_GenericModel_args:
    mask_images: false
    raysampler_class_type: AdaptiveRaySampler
    raysampler_AdaptiveRaySampler_args:
      n_pts_per_ray_training: 64
      n_pts_per_ray_evaluation: 64
      n_rays_per_image_sampled_from_mask: 4096
      stratified_point_sampling_training: true
      stratified_point_sampling_evaluation: false
      scene_extent: 2.0
      scene_center:
      - 0.0
      - 0.0
      - 0.0
    renderer_MultiPassEmissionAbsorptionRenderer_args:
      density_noise_std_train: 0.0
      n_pts_per_ray_fine_training: 128
      n_pts_per_ray_fine_evaluation: 128
      raymarcher_EmissionAbsorptionRaymarcher_args:
        blend_output: false
    loss_weights:
      loss_rgb_mse: 1.0
      loss_prev_stage_rgb_mse: 1.0
      loss_mask_bce: 0.0
      loss_prev_stage_mask_bce: 0.0
      loss_autodecoder_norm: 0.00
 optimizer_factory_ImplicitronOptimizerFactory_args:
  exponential_lr_step_size: 3001
  lr_policy: LinearExponential
  linear_exponential_lr_milestone: 200
 training_loop_ImplicitronTrainingLoop_args:
  max_epochs: 6000
  metric_print_interval: 10
  store_checkpoints_purge: 3
  test_when_finished: true
  validation_interval: 100
--- a/projects/implicitron_trainer/experiment.py
+++ b/projects/implicitron_trainer/experiment.py
@@ -48,18 +48,22 @@ The outputs of the experiment are saved and logged in multiple ways:
 import logging
 import os
 import warnings
 from dataclasses import field
 import hydra
 import torch
 from accelerate import Accelerator
 from omegaconf import DictConfig, OmegaConf
 from packaging import version
 from pytorch3d.implicitron.dataset.data_source import (
    DataSourceBase,
    ImplicitronDataSource,
 )
 from pytorch3d.implicitron.models.base_model import ImplicitronModelBase
 from pytorch3d.implicitron.models.renderer.multipass_ea import (
    MultiPassEmissionAbsorptionRenderer,
 )
--- a/projects/implicitron_trainer/impl/model_factory.py
+++ b/projects/implicitron_trainer/impl/model_factory.py
@@ -11,6 +11,7 @@ import os
 from typing import Optional
 import torch.optim
 from accelerate import Accelerator
 from pytorch3d.implicitron.models.base_model import ImplicitronModelBase
 from pytorch3d.implicitron.tools import model_io
--- a/projects/implicitron_trainer/impl/optimizer_factory.py
+++ b/projects/implicitron_trainer/impl/optimizer_factory.py
@@ -14,7 +14,9 @@ from dataclasses import field
 from typing import Any, Dict, List, Optional, Tuple
 import torch.optim
 from accelerate import Accelerator
 from pytorch3d.implicitron.models.base_model import ImplicitronModelBase
 from pytorch3d.implicitron.tools import model_io
 from pytorch3d.implicitron.tools.config import (
--- a/projects/implicitron_trainer/tests/experiment.yaml
+++ b/projects/implicitron_trainer/tests/experiment.yaml
@@ -13,6 +13,13 @@ hydra:
 data_source_ImplicitronDataSource_args:
  dataset_map_provider_class_type: ???
  data_loader_map_provider_class_type: SequenceDataLoaderMapProvider
  dataset_map_provider_BlenderDatasetMapProvider_args:
    base_dir: ???
    object_name: ???
    path_manager_factory_class_type: PathManagerFactory
    n_known_frames_for_test: null
    path_manager_factory_PathManagerFactory_args:
      silence_logs: true
  dataset_map_provider_JsonIndexDatasetMapProvider_args:
    category: ???
    task_str: singlesequence
@@ -84,6 +91,14 @@ data_source_ImplicitronDataSource_args:
      sort_frames: false
    path_manager_factory_PathManagerFactory_args:
      silence_logs: true
  dataset_map_provider_LlffDatasetMapProvider_args:
    base_dir: ???
    object_name: ???
    path_manager_factory_class_type: PathManagerFactory
    n_known_frames_for_test: null
    path_manager_factory_PathManagerFactory_args:
      silence_logs: true
    downscale_factor: 4
  dataset_map_provider_RenderedMeshDatasetMapProvider_args:
    num_views: 40
    data_file: null
--- a/projects/implicitron_trainer/tests/test_experiment.py
+++ b/projects/implicitron_trainer/tests/test_experiment.py
@@ -12,6 +12,7 @@ import unittest
 from pathlib import Path
 import torch
 from hydra import compose, initialize_config_dir
 from omegaconf import OmegaConf
 from projects.implicitron_trainer.impl.optimizer_factory import (
--- a/pytorch3d/init.py
+++ b/pytorch3d/init.py
@@ -6,4 +6,4 @@
 # pyre-unsafe
-__version__ = "0.7.9"
+__version__ = "0.7.8"
--- a/pytorch3d/common/workaround/symeig3x3.py
+++ b/pytorch3d/common/workaround/symeig3x3.py
@@ -82,12 +82,10 @@ class _SymEig3x3(nn.Module):
        q = inputs_trace / 3.0
        # Calculate squared sum of elements outside the main diagonal / 2
-        p1 = (
+        # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and `int`.
-            torch.square(inputs).sum(dim=(-1, -2)) - torch.square(inputs_diag).sum(-1)
+        p1 = ((inputs**2).sum(dim=(-1, -2)) - (inputs_diag**2).sum(-1)) / 2
-        ) / 2
+        # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and `int`.
-        p2 = torch.square(inputs_diag - q[..., None]).sum(dim=-1) + 2.0 * p1.clamp(
+        p2 = ((inputs_diag - q[..., None]) ** 2).sum(dim=-1) + 2.0 * p1.clamp(self._eps)
            self._eps
        )
        p = torch.sqrt(p2 / 6.0)
        B = (inputs - q[..., None, None] * self._identity) / p[..., None, None]
@@ -106,9 +104,7 @@ class _SymEig3x3(nn.Module):
        # Soft dispatch between the degenerate case (diagonal A) and general.
        # diag_soft_cond -> 1.0 when p1 < 6 * eps and diag_soft_cond -> 0.0 otherwise.
        # We use 6 * eps to take into account the error accumulated during the p1 summation
-        diag_soft_cond = torch.exp(-torch.square(p1 / (6 * self._eps))).detach()[
+        diag_soft_cond = torch.exp(-((p1 / (6 * self._eps)) ** 2)).detach()[..., None]
            ..., None
        ]
        # Eigenvalues are the ordered elements of main diagonal in the degenerate case
        diag_eigenvals, _ = torch.sort(inputs_diag, dim=-1)
@@ -203,7 +199,8 @@ class _SymEig3x3(nn.Module):
            cross_products[..., :1, :]
        )
-        norms_sq = torch.square(cross_products).sum(dim=-1)
+        # pyre-fixme[58]: `**` is not supported for operand types `Tensor` and `int`.
        norms_sq = (cross_products**2).sum(dim=-1)
        max_norms_index = norms_sq.argmax(dim=-1)
        # Pick only the cross-product with highest squared norm for each input
--- a/pytorch3d/csrc/ball_query/ball_query.cu
+++ b/pytorch3d/csrc/ball_query/ball_query.cu
@@ -32,9 +32,7 @@ __global__ void BallQueryKernel(
    at::PackedTensorAccessor64<int64_t, 3, at::RestrictPtrTraits> idxs,
    at::PackedTensorAccessor64<scalar_t, 3, at::RestrictPtrTraits> dists,
    const int64_t K,
-    const float radius,
+    const float radius2) {
    const float radius2,
    const bool skip_points_outside_cube) {
  const int64_t N = p1.size(0);
  const int64_t chunks_per_cloud = (1 + (p1.size(1) - 1) / blockDim.x);
  const int64_t chunks_to_do = N * chunks_per_cloud;
@@ -53,19 +51,7 @@ __global__ void BallQueryKernel(
    // Iterate over points in p2 until desired count is reached or
    // all points have been considered
    for (int64_t j = 0, count = 0; j < lengths2[n] && count < K; ++j) {
-      if (skip_points_outside_cube) {
+      // Calculate the distance between the points
        bool is_within_radius = true;
        // Filter when any one coordinate is already outside the radius
        for (int d = 0; is_within_radius && d < D; ++d) {
          scalar_t abs_diff = fabs(p1[n][i][d] - p2[n][j][d]);
          is_within_radius = (abs_diff <= radius);
        }
        if (!is_within_radius) {
          continue;
        }
      }
      // Else, calculate the distance between the points and compare
      scalar_t dist2 = 0.0;
      for (int d = 0; d < D; ++d) {
        scalar_t diff = p1[n][i][d] - p2[n][j][d];
@@ -91,8 +77,7 @@ std::tuple<at::Tensor, at::Tensor> BallQueryCuda(
    const at::Tensor& lengths1, // (N,)
    const at::Tensor& lengths2, // (N,)
    int K,
-    float radius,
+    float radius) {
    bool skip_points_outside_cube) {
  // Check inputs are on the same device
  at::TensorArg p1_t{p1, "p1", 1}, p2_t{p2, "p2", 2},
      lengths1_t{lengths1, "lengths1", 3}, lengths2_t{lengths2, "lengths2", 4};
@@ -135,9 +120,7 @@ std::tuple<at::Tensor, at::Tensor> BallQueryCuda(
            idxs.packed_accessor64<int64_t, 3, at::RestrictPtrTraits>(),
            dists.packed_accessor64<float, 3, at::RestrictPtrTraits>(),
            K_64,
-            radius,
+            radius2);
            radius2,
            skip_points_outside_cube);
      }));
  AT_CUDA_CHECK(cudaGetLastError());
--- a/pytorch3d/csrc/ball_query/ball_query.h
+++ b/pytorch3d/csrc/ball_query/ball_query.h
@@ -25,9 +25,6 @@
 //      within the radius
 //    radius: the radius around each point within which the neighbors need to be
 //      located
 //    skip_points_outside_cube: If true, reduce multiplications of float values
 //      by not explicitly calculating distances to points that fall outside the
 //      D-cube with side length (2*radius) centered at each point in p1.
 //
 // Returns:
 //    p1_neighbor_idx: LongTensor of shape (N, P1, K), where
@@ -49,8 +46,7 @@ std::tuple<at::Tensor, at::Tensor> BallQueryCpu(
    const at::Tensor& lengths1,
    const at::Tensor& lengths2,
    const int K,
-    const float radius,
+    const float radius);
    const bool skip_points_outside_cube);
 // CUDA implementation
 std::tuple<at::Tensor, at::Tensor> BallQueryCuda(
@@ -59,8 +55,7 @@ std::tuple<at::Tensor, at::Tensor> BallQueryCuda(
    const at::Tensor& lengths1,
    const at::Tensor& lengths2,
    const int K,
-    const float radius,
+    const float radius);
    const bool skip_points_outside_cube);
 // Implementation which is exposed
 // Note: the backward pass reuses the KNearestNeighborBackward kernel
@@ -70,8 +65,7 @@ inline std::tuple<at::Tensor, at::Tensor> BallQuery(
    const at::Tensor& lengths1,
    const at::Tensor& lengths2,
    int K,
-    float radius,
+    float radius) {
    bool skip_points_outside_cube) {
  if (p1.is_cuda() || p2.is_cuda()) {
 #ifdef WITH_CUDA
    CHECK_CUDA(p1);
@@ -82,20 +76,16 @@ inline std::tuple<at::Tensor, at::Tensor> BallQuery(
        lengths1.contiguous(),
        lengths2.contiguous(),
        K,
-        radius,
+        radius);
        skip_points_outside_cube);
 #else
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
  CHECK_CPU(p1);
  CHECK_CPU(p2);
  return BallQueryCpu(
      p1.contiguous(),
      p2.contiguous(),
      lengths1.contiguous(),
      lengths2.contiguous(),
      K,
-      radius,
+      radius);
      skip_points_outside_cube);
 }
--- a/pytorch3d/csrc/ball_query/ball_query_cpu.cpp
+++ b/pytorch3d/csrc/ball_query/ball_query_cpu.cpp
@@ -6,7 +6,6 @@
 * LICENSE file in the root directory of this source tree.
 */
 #include <math.h>
 #include <torch/extension.h>
 #include <tuple>
@@ -16,8 +15,7 @@ std::tuple<at::Tensor, at::Tensor> BallQueryCpu(
    const at::Tensor& lengths1,
    const at::Tensor& lengths2,
    int K,
-    float radius,
+    float radius) {
    bool skip_points_outside_cube) {
  const int N = p1.size(0);
  const int P1 = p1.size(1);
  const int D = p1.size(2);
@@ -39,16 +37,6 @@ std::tuple<at::Tensor, at::Tensor> BallQueryCpu(
    const int64_t length2 = lengths2_a[n];
    for (int64_t i = 0; i < length1; ++i) {
      for (int64_t j = 0, count = 0; j < length2 && count < K; ++j) {
        if (skip_points_outside_cube) {
          bool is_within_radius = true;
          for (int d = 0; is_within_radius && d < D; ++d) {
            float abs_diff = fabs(p1_a[n][i][d] - p2_a[n][j][d]);
            is_within_radius = (abs_diff <= radius);
          }
          if (!is_within_radius) {
            continue;
          }
        }
        float dist2 = 0;
        for (int d = 0; d < D; ++d) {
          float diff = p1_a[n][i][d] - p2_a[n][j][d];
--- a/pytorch3d/csrc/blending/sigmoid_alpha_blend.h
+++ b/pytorch3d/csrc/blending/sigmoid_alpha_blend.h
@@ -98,11 +98,6 @@ at::Tensor SigmoidAlphaBlendBackward(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
  CHECK_CPU(distances);
  CHECK_CPU(pix_to_face);
  CHECK_CPU(alphas);
  CHECK_CPU(grad_alphas);
  return SigmoidAlphaBlendBackwardCpu(
      grad_alphas, alphas, distances, pix_to_face, sigma);
 }
--- a/pytorch3d/csrc/compositing/alpha_composite.cu
+++ b/pytorch3d/csrc/compositing/alpha_composite.cu
@@ -33,11 +33,11 @@ __global__ void alphaCompositeCudaForwardKernel(
  const int64_t W = points_idx.size(3);
  // Get the batch and index
-  const auto batch = blockIdx.x;
+  const int batch = blockIdx.x;
  const int num_pixels = C * H * W;
-  const auto num_threads = gridDim.y * blockDim.x;
+  const int num_threads = gridDim.y * blockDim.x;
-  const auto tid = blockIdx.y * blockDim.x + threadIdx.x;
+  const int tid = blockIdx.y * blockDim.x + threadIdx.x;
  // Iterate over each feature in each pixel
  for (int pid = tid; pid < num_pixels; pid += num_threads) {
@@ -83,11 +83,11 @@ __global__ void alphaCompositeCudaBackwardKernel(
  const int64_t W = points_idx.size(3);
  // Get the batch and index
-  const auto batch = blockIdx.x;
+  const int batch = blockIdx.x;
  const int num_pixels = C * H * W;
-  const auto num_threads = gridDim.y * blockDim.x;
+  const int num_threads = gridDim.y * blockDim.x;
-  const auto tid = blockIdx.y * blockDim.x + threadIdx.x;
+  const int tid = blockIdx.y * blockDim.x + threadIdx.x;
  // Parallelize over each feature in each pixel in images of size H * W,
  // for each image in the batch of size batch_size
--- a/pytorch3d/csrc/compositing/alpha_composite.h
+++ b/pytorch3d/csrc/compositing/alpha_composite.h
@@ -74,9 +74,6 @@ torch::Tensor alphaCompositeForward(
    AT_ERROR("Not compiled with GPU support");
 #endif
  } else {
    CHECK_CPU(features);
    CHECK_CPU(alphas);
    CHECK_CPU(points_idx);
    return alphaCompositeCpuForward(features, alphas, points_idx);
  }
 }
@@ -104,11 +101,6 @@ std::tuple<torch::Tensor, torch::Tensor> alphaCompositeBackward(
    AT_ERROR("Not compiled with GPU support");
 #endif
  } else {
    CHECK_CPU(grad_outputs);
    CHECK_CPU(features);
    CHECK_CPU(alphas);
    CHECK_CPU(points_idx);
    return alphaCompositeCpuBackward(
        grad_outputs, features, alphas, points_idx);
  }
--- a/pytorch3d/csrc/compositing/norm_weighted_sum.cu
+++ b/pytorch3d/csrc/compositing/norm_weighted_sum.cu
@@ -33,11 +33,11 @@ __global__ void weightedSumNormCudaForwardKernel(
  const int64_t W = points_idx.size(3);
  // Get the batch and index
-  const auto batch = blockIdx.x;
+  const int batch = blockIdx.x;
  const int num_pixels = C * H * W;
-  const auto num_threads = gridDim.y * blockDim.x;
+  const int num_threads = gridDim.y * blockDim.x;
-  const auto tid = blockIdx.y * blockDim.x + threadIdx.x;
+  const int tid = blockIdx.y * blockDim.x + threadIdx.x;
  // Parallelize over each feature in each pixel in images of size H * W,
  // for each image in the batch of size batch_size
@@ -96,11 +96,11 @@ __global__ void weightedSumNormCudaBackwardKernel(
  const int64_t W = points_idx.size(3);
  // Get the batch and index
-  const auto batch = blockIdx.x;
+  const int batch = blockIdx.x;
  const int num_pixels = C * W * H;
-  const auto num_threads = gridDim.y * blockDim.x;
+  const int num_threads = gridDim.y * blockDim.x;
-  const auto tid = blockIdx.y * blockDim.x + threadIdx.x;
+  const int tid = blockIdx.y * blockDim.x + threadIdx.x;
  // Parallelize over each feature in each pixel in images of size H * W,
  // for each image in the batch of size batch_size
--- a/pytorch3d/csrc/compositing/norm_weighted_sum.h
+++ b/pytorch3d/csrc/compositing/norm_weighted_sum.h
@@ -73,10 +73,6 @@ torch::Tensor weightedSumNormForward(
    AT_ERROR("Not compiled with GPU support");
 #endif
  } else {
    CHECK_CPU(features);
    CHECK_CPU(alphas);
    CHECK_CPU(points_idx);
    return weightedSumNormCpuForward(features, alphas, points_idx);
  }
 }
@@ -104,11 +100,6 @@ std::tuple<torch::Tensor, torch::Tensor> weightedSumNormBackward(
    AT_ERROR("Not compiled with GPU support");
 #endif
  } else {
    CHECK_CPU(grad_outputs);
    CHECK_CPU(features);
    CHECK_CPU(alphas);
    CHECK_CPU(points_idx);
    return weightedSumNormCpuBackward(
        grad_outputs, features, alphas, points_idx);
  }
--- a/pytorch3d/csrc/compositing/weighted_sum.cu
+++ b/pytorch3d/csrc/compositing/weighted_sum.cu
@@ -31,11 +31,11 @@ __global__ void weightedSumCudaForwardKernel(
  const int64_t W = points_idx.size(3);
  // Get the batch and index
-  const auto batch = blockIdx.x;
+  const int batch = blockIdx.x;
  const int num_pixels = C * H * W;
-  const auto num_threads = gridDim.y * blockDim.x;
+  const int num_threads = gridDim.y * blockDim.x;
-  const auto tid = blockIdx.y * blockDim.x + threadIdx.x;
+  const int tid = blockIdx.y * blockDim.x + threadIdx.x;
  // Parallelize over each feature in each pixel in images of size H * W,
  // for each image in the batch of size batch_size
@@ -78,11 +78,11 @@ __global__ void weightedSumCudaBackwardKernel(
  const int64_t W = points_idx.size(3);
  // Get the batch and index
-  const auto batch = blockIdx.x;
+  const int batch = blockIdx.x;
  const int num_pixels = C * H * W;
-  const auto num_threads = gridDim.y * blockDim.x;
+  const int num_threads = gridDim.y * blockDim.x;
-  const auto tid = blockIdx.y * blockDim.x + threadIdx.x;
+  const int tid = blockIdx.y * blockDim.x + threadIdx.x;
  // Iterate over each pixel to compute the contribution to the
  // gradient for the features and weights
--- a/pytorch3d/csrc/compositing/weighted_sum.h
+++ b/pytorch3d/csrc/compositing/weighted_sum.h
@@ -72,9 +72,6 @@ torch::Tensor weightedSumForward(
    AT_ERROR("Not compiled with GPU support");
 #endif
  } else {
    CHECK_CPU(features);
    CHECK_CPU(alphas);
    CHECK_CPU(points_idx);
    return weightedSumCpuForward(features, alphas, points_idx);
  }
 }
@@ -101,11 +98,6 @@ std::tuple<torch::Tensor, torch::Tensor> weightedSumBackward(
    AT_ERROR("Not compiled with GPU support");
 #endif
  } else {
    CHECK_CPU(grad_outputs);
    CHECK_CPU(features);
    CHECK_CPU(alphas);
    CHECK_CPU(points_idx);
    return weightedSumCpuBackward(grad_outputs, features, alphas, points_idx);
  }
 }
--- a/pytorch3d/csrc/ext.cpp
+++ b/pytorch3d/csrc/ext.cpp
@@ -8,6 +8,7 @@
 // clang-format off
 #include "./pulsar/global.h" // Include before <torch/extension.h>.
 #include <torch/extension.h>
 // clang-format on
 #include "./pulsar/pytorch/renderer.h"
 #include "./pulsar/pytorch/tensor_util.h"
@@ -105,16 +106,15 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
  py::class_<
      pulsar::pytorch::Renderer,
      std::shared_ptr<pulsar::pytorch::Renderer>>(m, "PulsarRenderer")
-      .def(
+      .def(py::init<
-          py::init<
+           const uint&,
-              const uint&,
+           const uint&,
-              const uint&,
+           const uint&,
-              const uint&,
+           const bool&,
-              const bool&,
+           const bool&,
-              const bool&,
+           const float&,
-              const float&,
+           const uint&,
-              const uint&,
+           const uint&>())
              const uint&>())
      .def(
          "__eq__",
          [](const pulsar::pytorch::Renderer& a,
--- a/pytorch3d/csrc/face_areas_normals/face_areas_normals.h
+++ b/pytorch3d/csrc/face_areas_normals/face_areas_normals.h
@@ -60,8 +60,6 @@ std::tuple<at::Tensor, at::Tensor> FaceAreasNormalsForward(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
  CHECK_CPU(verts);
  CHECK_CPU(faces);
  return FaceAreasNormalsForwardCpu(verts, faces);
 }
@@ -82,9 +80,5 @@ at::Tensor FaceAreasNormalsBackward(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
  CHECK_CPU(grad_areas);
  CHECK_CPU(grad_normals);
  CHECK_CPU(verts);
  CHECK_CPU(faces);
  return FaceAreasNormalsBackwardCpu(grad_areas, grad_normals, verts, faces);
 }
--- a/pytorch3d/csrc/gather_scatter/gather_scatter.cu
+++ b/pytorch3d/csrc/gather_scatter/gather_scatter.cu
@@ -20,14 +20,14 @@ __global__ void GatherScatterCudaKernel(
    const size_t V,
    const size_t D,
    const size_t E) {
-  const auto tid = threadIdx.x;
+  const int tid = threadIdx.x;
  // Reverse the vertex order if backward.
  const int v0_idx = backward ? 1 : 0;
  const int v1_idx = backward ? 0 : 1;
  // Edges are split evenly across the blocks.
-  for (auto e = blockIdx.x; e < E; e += gridDim.x) {
+  for (int e = blockIdx.x; e < E; e += gridDim.x) {
    // Get indices of vertices which form the edge.
    const int64_t v0 = edges[2 * e + v0_idx];
    const int64_t v1 = edges[2 * e + v1_idx];
@@ -35,7 +35,7 @@ __global__ void GatherScatterCudaKernel(
    // Split vertex features evenly across threads.
    // This implementation will be quite wasteful when D<128 since there will be
    // a lot of threads doing nothing.
-    for (auto d = tid; d < D; d += blockDim.x) {
+    for (int d = tid; d < D; d += blockDim.x) {
      const float val = input[v1 * D + d];
      float* address = output + v0 * D + d;
      atomicAdd(address, val);
--- a/pytorch3d/csrc/gather_scatter/gather_scatter.h
+++ b/pytorch3d/csrc/gather_scatter/gather_scatter.h
@@ -53,7 +53,5 @@ at::Tensor GatherScatter(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
  CHECK_CPU(input);
  CHECK_CPU(edges);
  return GatherScatterCpu(input, edges, directed, backward);
 }
--- a/pytorch3d/csrc/interp_face_attrs/interp_face_attrs.cu
+++ b/pytorch3d/csrc/interp_face_attrs/interp_face_attrs.cu
@@ -20,8 +20,8 @@ __global__ void InterpFaceAttrsForwardKernel(
    const size_t P,
    const size_t F,
    const size_t D) {
-  const auto tid = threadIdx.x + blockIdx.x * blockDim.x;
+  const int tid = threadIdx.x + blockIdx.x * blockDim.x;
-  const auto num_threads = blockDim.x * gridDim.x;
+  const int num_threads = blockDim.x * gridDim.x;
  for (int pd = tid; pd < P * D; pd += num_threads) {
    const int p = pd / D;
    const int d = pd % D;
@@ -93,8 +93,8 @@ __global__ void InterpFaceAttrsBackwardKernel(
    const size_t P,
    const size_t F,
    const size_t D) {
-  const auto tid = threadIdx.x + blockIdx.x * blockDim.x;
+  const int tid = threadIdx.x + blockIdx.x * blockDim.x;
-  const auto num_threads = blockDim.x * gridDim.x;
+  const int num_threads = blockDim.x * gridDim.x;
  for (int pd = tid; pd < P * D; pd += num_threads) {
    const int p = pd / D;
    const int d = pd % D;
--- a/pytorch3d/csrc/interp_face_attrs/interp_face_attrs.h
+++ b/pytorch3d/csrc/interp_face_attrs/interp_face_attrs.h
@@ -57,8 +57,6 @@ at::Tensor InterpFaceAttrsForward(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
  CHECK_CPU(face_attrs);
  CHECK_CPU(barycentric_coords);
  return InterpFaceAttrsForwardCpu(pix_to_face, barycentric_coords, face_attrs);
 }
@@ -108,9 +106,6 @@ std::tuple<at::Tensor, at::Tensor> InterpFaceAttrsBackward(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
  CHECK_CPU(face_attrs);
  CHECK_CPU(barycentric_coords);
  CHECK_CPU(grad_pix_attrs);
  return InterpFaceAttrsBackwardCpu(
      pix_to_face, barycentric_coords, face_attrs, grad_pix_attrs);
 }
--- a/pytorch3d/csrc/iou_box3d/iou_box3d.h
+++ b/pytorch3d/csrc/iou_box3d/iou_box3d.h
@@ -44,7 +44,5 @@ inline std::tuple<at::Tensor, at::Tensor> IoUBox3D(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
  CHECK_CPU(boxes1);
  CHECK_CPU(boxes2);
  return IoUBox3DCpu(boxes1.contiguous(), boxes2.contiguous());
 }
--- a/pytorch3d/csrc/knn/knn.h
+++ b/pytorch3d/csrc/knn/knn.h
@@ -74,8 +74,6 @@ std::tuple<at::Tensor, at::Tensor> KNearestNeighborIdx(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
  CHECK_CPU(p1);
  CHECK_CPU(p2);
  return KNearestNeighborIdxCpu(p1, p2, lengths1, lengths2, norm, K);
 }
@@ -142,8 +140,6 @@ std::tuple<at::Tensor, at::Tensor> KNearestNeighborBackward(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
  CHECK_CPU(p1);
  CHECK_CPU(p2);
  return KNearestNeighborBackwardCpu(
      p1, p2, lengths1, lengths2, idxs, norm, grad_dists);
 }
--- a/pytorch3d/csrc/marching_cubes/marching_cubes.h
+++ b/pytorch3d/csrc/marching_cubes/marching_cubes.h
@@ -58,6 +58,5 @@ inline std::tuple<at::Tensor, at::Tensor, at::Tensor> MarchingCubes(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
  CHECK_CPU(vol);
  return MarchingCubesCpu(vol.contiguous(), isolevel);
 }
--- a/pytorch3d/csrc/packed_to_padded_tensor/packed_to_padded_tensor.h
+++ b/pytorch3d/csrc/packed_to_padded_tensor/packed_to_padded_tensor.h
@@ -88,8 +88,6 @@ at::Tensor PackedToPadded(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
  CHECK_CPU(inputs_packed);
  CHECK_CPU(first_idxs);
  return PackedToPaddedCpu(inputs_packed, first_idxs, max_size);
 }
@@ -107,7 +105,5 @@ at::Tensor PaddedToPacked(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
  CHECK_CPU(inputs_padded);
  CHECK_CPU(first_idxs);
  return PaddedToPackedCpu(inputs_padded, first_idxs, num_inputs);
 }
--- a/pytorch3d/csrc/point_mesh/point_mesh_cpu.cpp
+++ b/pytorch3d/csrc/point_mesh/point_mesh_cpu.cpp
@@ -174,8 +174,8 @@ std::tuple<at::Tensor, at::Tensor> HullHullDistanceForwardCpu(
  at::Tensor idxs = at::zeros({A_N,}, as_first_idx.options());
  // clang-format on
-  auto as_a = as.accessor<float, H1 == 1 ? 2 : 3>();
+  auto as_a = as.accessor < float, H1 == 1 ? 2 : 3 > ();
-  auto bs_a = bs.accessor<float, H2 == 1 ? 2 : 3>();
+  auto bs_a = bs.accessor < float, H2 == 1 ? 2 : 3 > ();
  auto as_first_idx_a = as_first_idx.accessor<int64_t, 1>();
  auto bs_first_idx_a = bs_first_idx.accessor<int64_t, 1>();
  auto dists_a = dists.accessor<float, 1>();
@@ -230,10 +230,10 @@ std::tuple<at::Tensor, at::Tensor> HullHullDistanceBackwardCpu(
  at::Tensor grad_as = at::zeros_like(as);
  at::Tensor grad_bs = at::zeros_like(bs);
-  auto as_a = as.accessor<float, H1 == 1 ? 2 : 3>();
+  auto as_a = as.accessor < float, H1 == 1 ? 2 : 3 > ();
-  auto bs_a = bs.accessor<float, H2 == 1 ? 2 : 3>();
+  auto bs_a = bs.accessor < float, H2 == 1 ? 2 : 3 > ();
-  auto grad_as_a = grad_as.accessor<float, H1 == 1 ? 2 : 3>();
+  auto grad_as_a = grad_as.accessor < float, H1 == 1 ? 2 : 3 > ();
-  auto grad_bs_a = grad_bs.accessor<float, H2 == 1 ? 2 : 3>();
+  auto grad_bs_a = grad_bs.accessor < float, H2 == 1 ? 2 : 3 > ();
  auto idx_bs_a = idx_bs.accessor<int64_t, 1>();
  auto grad_dists_a = grad_dists.accessor<float, 1>();
--- a/pytorch3d/csrc/point_mesh/point_mesh_cuda.cu
+++ b/pytorch3d/csrc/point_mesh/point_mesh_cuda.cu
@@ -110,7 +110,7 @@ __global__ void DistanceForwardKernel(
    __syncthreads();
    // Perform reduction in shared memory.
-    for (auto s = blockDim.x / 2; s > 32; s >>= 1) {
+    for (int s = blockDim.x / 2; s > 32; s >>= 1) {
      if (tid < s) {
        if (min_dists[tid] > min_dists[tid + s]) {
          min_dists[tid] = min_dists[tid + s];
@@ -502,8 +502,8 @@ __global__ void PointFaceArrayForwardKernel(
  const float3* tris_f3 = (float3*)tris;
  // Parallelize over P * S computations
-  const auto num_threads = gridDim.x * blockDim.x;
+  const int num_threads = gridDim.x * blockDim.x;
-  const auto tid = blockIdx.x * blockDim.x + threadIdx.x;
+  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
  for (int t_i = tid; t_i < P * T; t_i += num_threads) {
    const int t = t_i / P; // segment index.
@@ -576,8 +576,8 @@ __global__ void PointFaceArrayBackwardKernel(
  const float3* tris_f3 = (float3*)tris;
  // Parallelize over P * S computations
-  const auto num_threads = gridDim.x * blockDim.x;
+  const int num_threads = gridDim.x * blockDim.x;
-  const auto tid = blockIdx.x * blockDim.x + threadIdx.x;
+  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
  for (int t_i = tid; t_i < P * T; t_i += num_threads) {
    const int t = t_i / P; // triangle index.
@@ -683,8 +683,8 @@ __global__ void PointEdgeArrayForwardKernel(
  float3* segms_f3 = (float3*)segms;
  // Parallelize over P * S computations
-  const auto num_threads = gridDim.x * blockDim.x;
+  const int num_threads = gridDim.x * blockDim.x;
-  const auto tid = blockIdx.x * blockDim.x + threadIdx.x;
+  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
  for (int t_i = tid; t_i < P * S; t_i += num_threads) {
    const int s = t_i / P; // segment index.
@@ -752,8 +752,8 @@ __global__ void PointEdgeArrayBackwardKernel(
  float3* segms_f3 = (float3*)segms;
  // Parallelize over P * S computations
-  const auto num_threads = gridDim.x * blockDim.x;
+  const int num_threads = gridDim.x * blockDim.x;
-  const auto tid = blockIdx.x * blockDim.x + threadIdx.x;
+  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
  for (int t_i = tid; t_i < P * S; t_i += num_threads) {
    const int s = t_i / P; // segment index.
--- a/pytorch3d/csrc/point_mesh/point_mesh_cuda.h
+++ b/pytorch3d/csrc/point_mesh/point_mesh_cuda.h
@@ -88,10 +88,6 @@ std::tuple<torch::Tensor, torch::Tensor> PointFaceDistanceForward(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
  CHECK_CPU(points);
  CHECK_CPU(points_first_idx);
  CHECK_CPU(tris);
  CHECK_CPU(tris_first_idx);
  return PointFaceDistanceForwardCpu(
      points, points_first_idx, tris, tris_first_idx, min_triangle_area);
 }
@@ -147,10 +143,6 @@ std::tuple<torch::Tensor, torch::Tensor> PointFaceDistanceBackward(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
  CHECK_CPU(points);
  CHECK_CPU(tris);
  CHECK_CPU(idx_points);
  CHECK_CPU(grad_dists);
  return PointFaceDistanceBackwardCpu(
      points, tris, idx_points, grad_dists, min_triangle_area);
 }
@@ -229,10 +221,6 @@ std::tuple<torch::Tensor, torch::Tensor> FacePointDistanceForward(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
  CHECK_CPU(points);
  CHECK_CPU(points_first_idx);
  CHECK_CPU(tris);
  CHECK_CPU(tris_first_idx);
  return FacePointDistanceForwardCpu(
      points, points_first_idx, tris, tris_first_idx, min_triangle_area);
 }
@@ -289,10 +277,6 @@ std::tuple<torch::Tensor, torch::Tensor> FacePointDistanceBackward(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
  CHECK_CPU(points);
  CHECK_CPU(tris);
  CHECK_CPU(idx_tris);
  CHECK_CPU(grad_dists);
  return FacePointDistanceBackwardCpu(
      points, tris, idx_tris, grad_dists, min_triangle_area);
 }
@@ -362,10 +346,6 @@ std::tuple<torch::Tensor, torch::Tensor> PointEdgeDistanceForward(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
  CHECK_CPU(points);
  CHECK_CPU(points_first_idx);
  CHECK_CPU(segms);
  CHECK_CPU(segms_first_idx);
  return PointEdgeDistanceForwardCpu(
      points, points_first_idx, segms, segms_first_idx, max_points);
 }
@@ -416,10 +396,6 @@ std::tuple<torch::Tensor, torch::Tensor> PointEdgeDistanceBackward(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
  CHECK_CPU(points);
  CHECK_CPU(segms);
  CHECK_CPU(idx_points);
  CHECK_CPU(grad_dists);
  return PointEdgeDistanceBackwardCpu(points, segms, idx_points, grad_dists);
 }
@@ -488,10 +464,6 @@ std::tuple<torch::Tensor, torch::Tensor> EdgePointDistanceForward(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
  CHECK_CPU(points);
  CHECK_CPU(points_first_idx);
  CHECK_CPU(segms);
  CHECK_CPU(segms_first_idx);
  return EdgePointDistanceForwardCpu(
      points, points_first_idx, segms, segms_first_idx, max_segms);
 }
@@ -542,10 +514,6 @@ std::tuple<torch::Tensor, torch::Tensor> EdgePointDistanceBackward(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
  CHECK_CPU(points);
  CHECK_CPU(segms);
  CHECK_CPU(idx_segms);
  CHECK_CPU(grad_dists);
  return EdgePointDistanceBackwardCpu(points, segms, idx_segms, grad_dists);
 }
@@ -599,8 +567,6 @@ torch::Tensor PointFaceArrayDistanceForward(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
  CHECK_CPU(points);
  CHECK_CPU(tris);
  return PointFaceArrayDistanceForwardCpu(points, tris, min_triangle_area);
 }
@@ -647,9 +613,6 @@ std::tuple<torch::Tensor, torch::Tensor> PointFaceArrayDistanceBackward(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
  CHECK_CPU(points);
  CHECK_CPU(tris);
  CHECK_CPU(grad_dists);
  return PointFaceArrayDistanceBackwardCpu(
      points, tris, grad_dists, min_triangle_area);
 }
@@ -698,8 +661,6 @@ torch::Tensor PointEdgeArrayDistanceForward(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
  CHECK_CPU(points);
  CHECK_CPU(segms);
  return PointEdgeArrayDistanceForwardCpu(points, segms);
 }
@@ -742,8 +703,5 @@ std::tuple<torch::Tensor, torch::Tensor> PointEdgeArrayDistanceBackward(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
  CHECK_CPU(points);
  CHECK_CPU(segms);
  CHECK_CPU(grad_dists);
  return PointEdgeArrayDistanceBackwardCpu(points, segms, grad_dists);
 }
--- a/pytorch3d/csrc/points_to_volumes/points_to_volumes.h
+++ b/pytorch3d/csrc/points_to_volumes/points_to_volumes.h
@@ -104,12 +104,6 @@ inline void PointsToVolumesForward(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
  CHECK_CPU(points_3d);
  CHECK_CPU(points_features);
  CHECK_CPU(volume_densities);
  CHECK_CPU(volume_features);
  CHECK_CPU(grid_sizes);
  CHECK_CPU(mask);
  PointsToVolumesForwardCpu(
      points_3d,
      points_features,
@@ -189,14 +183,6 @@ inline void PointsToVolumesBackward(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
  CHECK_CPU(points_3d);
  CHECK_CPU(points_features);
  CHECK_CPU(grid_sizes);
  CHECK_CPU(mask);
  CHECK_CPU(grad_volume_densities);
  CHECK_CPU(grad_volume_features);
  CHECK_CPU(grad_points_3d);
  CHECK_CPU(grad_points_features);
  PointsToVolumesBackwardCpu(
      points_3d,
      points_features,
--- a/pytorch3d/csrc/pulsar/global.h
+++ b/pytorch3d/csrc/pulsar/global.h
@@ -15,8 +15,8 @@
 #endif
 #if defined(_WIN64) || defined(_WIN32)
-using uint = unsigned int;
+#define uint unsigned int
-using ushort = unsigned short;
+#define ushort unsigned short
 #endif
 #include "./logging.h" // <- include before torch/extension.h
--- a/pytorch3d/csrc/pulsar/gpu/commands.h
+++ b/pytorch3d/csrc/pulsar/gpu/commands.h
@@ -417,7 +417,7 @@ __device__ static float atomicMin(float* address, float val) {
      (OUT_PTR),              \
      (NUM_SELECTED_PTR),     \
      (NUM_ITEMS),            \
-      (STREAM));
+      stream = (STREAM));
 #define COPY_HOST_DEV(PTR_D, PTR_H, TYPE, SIZE) \
  HANDLECUDA(cudaMemcpy(                        \
--- a/pytorch3d/csrc/pulsar/host/commands.h
+++ b/pytorch3d/csrc/pulsar/host/commands.h
@@ -357,11 +357,11 @@ void MAX_WS(
 //
 //
 #define END_PARALLEL() \
-  end_parallel:;       \
+  end_parallel :;      \
  }
 #define END_PARALLEL_NORET() }
 #define END_PARALLEL_2D() \
-  end_parallel:;          \
+  end_parallel :;         \
  }                       \
  }
 #define END_PARALLEL_2D_NORET() \
--- a/pytorch3d/csrc/pulsar/include/camera.h
+++ b/pytorch3d/csrc/pulsar/include/camera.h
@@ -70,6 +70,11 @@ struct CamGradInfo {
  float3 pixel_dir_y;
 };
 // TODO: remove once https://github.com/NVlabs/cub/issues/172 is resolved.
 struct IntWrapper {
  int val;
 };
 } // namespace pulsar
 #endif
--- a/pytorch3d/csrc/pulsar/include/math.h
+++ b/pytorch3d/csrc/pulsar/include/math.h
@@ -149,6 +149,11 @@ IHD CamGradInfo operator*(const CamGradInfo& a, const float& b) {
  return res;
 }
 IHD IntWrapper operator+(const IntWrapper& a, const IntWrapper& b) {
  IntWrapper res;
  res.val = a.val + b.val;
  return res;
 }
 } // namespace pulsar
 #endif
--- a/pytorch3d/csrc/pulsar/include/renderer.backward.device.h
+++ b/pytorch3d/csrc/pulsar/include/renderer.backward.device.h
@@ -155,8 +155,8 @@ void backward(
        stream);
    CHECKLAUNCH();
    SUM_WS(
-        self->ids_sorted_d,
+        (IntWrapper*)(self->ids_sorted_d),
-        self->n_grad_contributions_d,
+        (IntWrapper*)(self->n_grad_contributions_d),
        static_cast<int>(num_balls),
        self->workspace_d,
        self->workspace_size,
--- a/pytorch3d/csrc/pulsar/include/renderer.construct.device.h
+++ b/pytorch3d/csrc/pulsar/include/renderer.construct.device.h
@@ -52,7 +52,7 @@ HOST void construct(
  self->cam.film_width = width;
  self->cam.film_height = height;
  self->max_num_balls = max_num_balls;
-  MALLOC(self->result_d, float, width * height * n_channels);
+  MALLOC(self->result_d, float, width* height* n_channels);
  self->cam.orthogonal_projection = orthogonal_projection;
  self->cam.right_handed = right_handed_system;
  self->cam.background_normalization_depth = background_normalization_depth;
@@ -93,7 +93,7 @@ HOST void construct(
  MALLOC(self->di_sorted_d, DrawInfo, max_num_balls);
  MALLOC(self->region_flags_d, char, max_num_balls);
  MALLOC(self->num_selected_d, size_t, 1);
-  MALLOC(self->forw_info_d, float, width * height * (3 + 2 * n_track));
+  MALLOC(self->forw_info_d, float, width* height * (3 + 2 * n_track));
  MALLOC(self->min_max_pixels_d, IntersectInfo, 1);
  MALLOC(self->grad_pos_d, float3, max_num_balls);
  MALLOC(self->grad_col_d, float, max_num_balls* n_channels);
--- a/pytorch3d/csrc/pulsar/include/renderer.destruct.device.h
+++ b/pytorch3d/csrc/pulsar/include/renderer.destruct.device.h
@@ -18,89 +18,68 @@ namespace Renderer {
 template <bool DEV>
 HOST void destruct(Renderer* self) {
-  if (self->result_d != NULL) {
+  if (self->result_d != NULL)
    FREE(self->result_d);
  }
  self->result_d = NULL;
-  if (self->min_depth_d != NULL) {
+  if (self->min_depth_d != NULL)
    FREE(self->min_depth_d);
  }
  self->min_depth_d = NULL;
-  if (self->min_depth_sorted_d != NULL) {
+  if (self->min_depth_sorted_d != NULL)
    FREE(self->min_depth_sorted_d);
  }
  self->min_depth_sorted_d = NULL;
-  if (self->ii_d != NULL) {
+  if (self->ii_d != NULL)
    FREE(self->ii_d);
  }
  self->ii_d = NULL;
-  if (self->ii_sorted_d != NULL) {
+  if (self->ii_sorted_d != NULL)
    FREE(self->ii_sorted_d);
  }
  self->ii_sorted_d = NULL;
-  if (self->ids_d != NULL) {
+  if (self->ids_d != NULL)
    FREE(self->ids_d);
  }
  self->ids_d = NULL;
-  if (self->ids_sorted_d != NULL) {
+  if (self->ids_sorted_d != NULL)
    FREE(self->ids_sorted_d);
  }
  self->ids_sorted_d = NULL;
-  if (self->workspace_d != NULL) {
+  if (self->workspace_d != NULL)
    FREE(self->workspace_d);
  }
  self->workspace_d = NULL;
-  if (self->di_d != NULL) {
+  if (self->di_d != NULL)
    FREE(self->di_d);
  }
  self->di_d = NULL;
-  if (self->di_sorted_d != NULL) {
+  if (self->di_sorted_d != NULL)
    FREE(self->di_sorted_d);
  }
  self->di_sorted_d = NULL;
-  if (self->region_flags_d != NULL) {
+  if (self->region_flags_d != NULL)
    FREE(self->region_flags_d);
  }
  self->region_flags_d = NULL;
-  if (self->num_selected_d != NULL) {
+  if (self->num_selected_d != NULL)
    FREE(self->num_selected_d);
  }
  self->num_selected_d = NULL;
-  if (self->forw_info_d != NULL) {
+  if (self->forw_info_d != NULL)
    FREE(self->forw_info_d);
  }
  self->forw_info_d = NULL;
-  if (self->min_max_pixels_d != NULL) {
+  if (self->min_max_pixels_d != NULL)
    FREE(self->min_max_pixels_d);
  }
  self->min_max_pixels_d = NULL;
-  if (self->grad_pos_d != NULL) {
+  if (self->grad_pos_d != NULL)
    FREE(self->grad_pos_d);
  }
  self->grad_pos_d = NULL;
-  if (self->grad_col_d != NULL) {
+  if (self->grad_col_d != NULL)
    FREE(self->grad_col_d);
  }
  self->grad_col_d = NULL;
-  if (self->grad_rad_d != NULL) {
+  if (self->grad_rad_d != NULL)
    FREE(self->grad_rad_d);
  }
  self->grad_rad_d = NULL;
-  if (self->grad_cam_d != NULL) {
+  if (self->grad_cam_d != NULL)
    FREE(self->grad_cam_d);
  }
  self->grad_cam_d = NULL;
-  if (self->grad_cam_buf_d != NULL) {
+  if (self->grad_cam_buf_d != NULL)
    FREE(self->grad_cam_buf_d);
  }
  self->grad_cam_buf_d = NULL;
-  if (self->grad_opy_d != NULL) {
+  if (self->grad_opy_d != NULL)
    FREE(self->grad_opy_d);
  }
  self->grad_opy_d = NULL;
-  if (self->n_grad_contributions_d != NULL) {
+  if (self->n_grad_contributions_d != NULL)
    FREE(self->n_grad_contributions_d);
  }
  self->n_grad_contributions_d = NULL;
 }
--- a/pytorch3d/csrc/pulsar/include/renderer.h
+++ b/pytorch3d/csrc/pulsar/include/renderer.h
@@ -255,7 +255,7 @@ GLOBAL void calc_signature(
 * for every iteration through the loading loop every thread could add a
 * 'hit' to the buffer.
 */
-#define RENDER_BUFFER_SIZE RENDER_BLOCK_SIZE * RENDER_BLOCK_SIZE * 2
+#define RENDER_BUFFER_SIZE RENDER_BLOCK_SIZE* RENDER_BLOCK_SIZE * 2
 /**
 * The threshold after which the spheres that are in the render buffer
 * are rendered and the buffer is flushed.
--- a/pytorch3d/csrc/pulsar/include/renderer.norm_sphere_gradients.device.h
+++ b/pytorch3d/csrc/pulsar/include/renderer.norm_sphere_gradients.device.h
@@ -64,9 +64,8 @@ GLOBAL void norm_sphere_gradients(Renderer renderer, const int num_balls) {
  // The sphere only contributes to the camera gradients if it is
  // large enough in screen space.
  if (renderer.ids_sorted_d[idx] > 0 && ii.max.x >= ii.min.x + 3 &&
-      ii.max.y >= ii.min.y + 3) {
+      ii.max.y >= ii.min.y + 3)
    renderer.ids_sorted_d[idx] = 1;
  }
  END_PARALLEL_NORET();
 };
--- a/pytorch3d/csrc/pulsar/include/renderer.render.device.h
+++ b/pytorch3d/csrc/pulsar/include/renderer.render.device.h
@@ -139,9 +139,8 @@ GLOBAL void render(
      coord_y < cam_norm.film_border_top + cam_norm.film_height) {
    // Initialize the result.
    if (mode == 0u) {
-      for (uint c_id = 0; c_id < cam_norm.n_channels; ++c_id) {
+      for (uint c_id = 0; c_id < cam_norm.n_channels; ++c_id)
        result[c_id] = bg_col[c_id];
      }
    } else {
      result[0] = 0.f;
    }
@@ -191,22 +190,20 @@ GLOBAL void render(
            "render|found intersection with sphere %u.\n",
            sphere_id_l[write_idx]);
      }
-      if (ii.min.x == MAX_USHORT) {
+      if (ii.min.x == MAX_USHORT)
        // This is an invalid sphere (out of image). These spheres have
        // maximum depth. Since we ordered the spheres by earliest possible
        // intersection depth we re certain that there will no other sphere
        // that is relevant after this one.
        loading_done = true;
      }
    }
    // Reset n_pixels_done.
    n_pixels_done = 0;
    thread_block.sync(); // Make sure n_loaded is updated.
    if (n_loaded > RENDER_BUFFER_LOAD_THRESH) {
      // The load buffer is full enough. Draw.
-      if (thread_block.thread_rank() == 0) {
+      if (thread_block.thread_rank() == 0)
        n_balls_loaded += n_loaded;
      }
      max_closest_possible_intersection = 0.f;
      // This excludes threads outside of the image boundary. Also, it reduces
      // block artifacts.
@@ -293,9 +290,8 @@ GLOBAL void render(
      uint warp_done = thread_warp.ballot(done);
      int warp_done_bit_cnt = POPC(warp_done);
 #endif //__CUDACC__ && __HIP_PLATFORM_AMD__
-      if (thread_warp.thread_rank() == 0) {
+      if (thread_warp.thread_rank() == 0)
        ATOMICADD_B(&n_pixels_done, warp_done_bit_cnt);
      }
      // This sync is necessary to keep n_loaded until all threads are done with
      // painting.
      thread_block.sync();
@@ -303,9 +299,8 @@ GLOBAL void render(
    }
    thread_block.sync();
  }
-  if (thread_block.thread_rank() == 0) {
+  if (thread_block.thread_rank() == 0)
    n_balls_loaded += n_loaded;
  }
  PULSAR_LOG_DEV_PIX(
      PULSAR_LOG_RENDER_PIX,
      "render|loaded %d balls in total.\n",
@@ -391,9 +386,8 @@ GLOBAL void render(
            static_cast<float>(tracker.get_n_hits());
  } else {
    float sm_d_normfac = FRCP(FMAX(sm_d, FEPS));
-    for (uint c_id = 0; c_id < cam_norm.n_channels; ++c_id) {
+    for (uint c_id = 0; c_id < cam_norm.n_channels; ++c_id)
      result[c_id] *= sm_d_normfac;
    }
    int write_loc = (coord_y - cam_norm.film_border_top) * cam_norm.film_width *
            (3 + 2 * n_track) +
        (coord_x - cam_norm.film_border_left) * (3 + 2 * n_track);
--- a/pytorch3d/csrc/pulsar/pytorch/renderer.cpp
+++ b/pytorch3d/csrc/pulsar/pytorch/renderer.cpp
@@ -860,9 +860,8 @@ std::tuple<torch::Tensor, torch::Tensor> Renderer::forward(
            ? (cudaStream_t) nullptr
 #endif
            : (cudaStream_t) nullptr);
-    if (mode == 1) {
+    if (mode == 1)
      results[batch_i] = results[batch_i].slice(2, 0, 1, 1);
    }
    forw_infos[batch_i] = from_blob(
        this->renderer_vec[batch_i].forw_info_d,
        {this->renderer_vec[0].cam.film_height,
--- a/pytorch3d/csrc/pulsar/pytorch/renderer.h
+++ b/pytorch3d/csrc/pulsar/pytorch/renderer.h
@@ -128,9 +128,8 @@ struct Renderer {
    stream << "pulsar::Renderer[";
    // Device info.
    stream << self.device_type;
-    if (self.device_index != -1) {
+    if (self.device_index != -1)
      stream << ", ID " << self.device_index;
    }
    stream << "]";
    return stream;
  }
--- a/pytorch3d/csrc/pulsar/warnings.cpp
+++ b/pytorch3d/csrc/pulsar/warnings.cpp
@@ -6,6 +6,9 @@
 * LICENSE file in the root directory of this source tree.
 */
 #include "./global.h"
 #include "./logging.h"
 /**
 * A compilation unit to provide warnings about the code and avoid
 * repeated messages.
--- a/pytorch3d/csrc/rasterize_coarse/bitmask.cuh
+++ b/pytorch3d/csrc/rasterize_coarse/bitmask.cuh
@@ -25,7 +25,7 @@ class BitMask {
  // Use all threads in the current block to clear all bits of this BitMask
  __device__ void block_clear() {
-    for (auto i = threadIdx.x; i < H * W * D; i += blockDim.x) {
+    for (int i = threadIdx.x; i < H * W * D; i += blockDim.x) {
      data[i] = 0;
    }
    __syncthreads();
--- a/pytorch3d/csrc/rasterize_coarse/rasterize_coarse.cu
+++ b/pytorch3d/csrc/rasterize_coarse/rasterize_coarse.cu
@@ -23,8 +23,8 @@ __global__ void TriangleBoundingBoxKernel(
    const float blur_radius,
    float* bboxes, // (4, F)
    bool* skip_face) { // (F,)
-  const auto tid = blockIdx.x * blockDim.x + threadIdx.x;
+  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
-  const auto num_threads = blockDim.x * gridDim.x;
+  const int num_threads = blockDim.x * gridDim.x;
  const float sqrt_radius = sqrt(blur_radius);
  for (int f = tid; f < F; f += num_threads) {
    const float v0x = face_verts[f * 9 + 0 * 3 + 0];
@@ -56,8 +56,8 @@ __global__ void PointBoundingBoxKernel(
    const int P,
    float* bboxes, // (4, P)
    bool* skip_points) {
-  const auto tid = blockIdx.x * blockDim.x + threadIdx.x;
+  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
-  const auto num_threads = blockDim.x * gridDim.x;
+  const int num_threads = blockDim.x * gridDim.x;
  for (int p = tid; p < P; p += num_threads) {
    const float x = points[p * 3 + 0];
    const float y = points[p * 3 + 1];
@@ -113,7 +113,7 @@ __global__ void RasterizeCoarseCudaKernel(
  const int chunks_per_batch = 1 + (E - 1) / chunk_size;
  const int num_chunks = N * chunks_per_batch;
-  for (auto chunk = blockIdx.x; chunk < num_chunks; chunk += gridDim.x) {
+  for (int chunk = blockIdx.x; chunk < num_chunks; chunk += gridDim.x) {
    const int batch_idx = chunk / chunks_per_batch; // batch index
    const int chunk_idx = chunk % chunks_per_batch;
    const int elem_chunk_start_idx = chunk_idx * chunk_size;
@@ -123,7 +123,7 @@ __global__ void RasterizeCoarseCudaKernel(
    const int64_t elem_stop_idx = elem_start_idx + elems_per_batch[batch_idx];
    // Have each thread handle a different face within the chunk
-    for (auto e = threadIdx.x; e < chunk_size; e += blockDim.x) {
+    for (int e = threadIdx.x; e < chunk_size; e += blockDim.x) {
      const int e_idx = elem_chunk_start_idx + e;
      // Check that we are still within the same element of the batch
@@ -170,7 +170,7 @@ __global__ void RasterizeCoarseCudaKernel(
    // Now we have processed every elem in the current chunk. We need to
    // count the number of elems in each bin so we can write the indices
    // out to global memory. We have each thread handle a different bin.
-    for (auto byx = threadIdx.x; byx < num_bins_y * num_bins_x;
+    for (int byx = threadIdx.x; byx < num_bins_y * num_bins_x;
         byx += blockDim.x) {
      const int by = byx / num_bins_x;
      const int bx = byx % num_bins_x;
--- a/pytorch3d/csrc/rasterize_meshes/rasterize_meshes.cu
+++ b/pytorch3d/csrc/rasterize_meshes/rasterize_meshes.cu
@@ -260,8 +260,8 @@ __global__ void RasterizeMeshesNaiveCudaKernel(
    float* pix_dists,
    float* bary) {
  // Simple version: One thread per output pixel
-  auto num_threads = gridDim.x * blockDim.x;
+  int num_threads = gridDim.x * blockDim.x;
-  auto tid = blockDim.x * blockIdx.x + threadIdx.x;
+  int tid = blockDim.x * blockIdx.x + threadIdx.x;
  for (int i = tid; i < N * H * W; i += num_threads) {
    // Convert linear index to 3D index
@@ -446,8 +446,8 @@ __global__ void RasterizeMeshesBackwardCudaKernel(
  // Parallelize over each pixel in images of
  // size H * W, for each image in the batch of size N.
-  const auto num_threads = gridDim.x * blockDim.x;
+  const int num_threads = gridDim.x * blockDim.x;
-  const auto tid = blockIdx.x * blockDim.x + threadIdx.x;
+  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
  for (int t_i = tid; t_i < N * H * W; t_i += num_threads) {
    // Convert linear index to 3D index
@@ -650,8 +650,8 @@ __global__ void RasterizeMeshesFineCudaKernel(
 ) {
  // This can be more than H * W if H or W are not divisible by bin_size.
  int num_pixels = N * BH * BW * bin_size * bin_size;
-  auto num_threads = gridDim.x * blockDim.x;
+  int num_threads = gridDim.x * blockDim.x;
-  auto tid = blockIdx.x * blockDim.x + threadIdx.x;
+  int tid = blockIdx.x * blockDim.x + threadIdx.x;
  for (int pid = tid; pid < num_pixels; pid += num_threads) {
    // Convert linear index into bin and pixel indices. We make the within
--- a/pytorch3d/csrc/rasterize_meshes/rasterize_meshes.h
+++ b/pytorch3d/csrc/rasterize_meshes/rasterize_meshes.h
@@ -138,9 +138,6 @@ RasterizeMeshesNaive(
    AT_ERROR("Not compiled with GPU support");
 #endif
  } else {
    CHECK_CPU(face_verts);
    CHECK_CPU(mesh_to_face_first_idx);
    CHECK_CPU(num_faces_per_mesh);
    return RasterizeMeshesNaiveCpu(
        face_verts,
        mesh_to_face_first_idx,
@@ -235,11 +232,6 @@ torch::Tensor RasterizeMeshesBackward(
    AT_ERROR("Not compiled with GPU support");
 #endif
  } else {
    CHECK_CPU(face_verts);
    CHECK_CPU(pix_to_face);
    CHECK_CPU(grad_zbuf);
    CHECK_CPU(grad_bary);
    CHECK_CPU(grad_dists);
    return RasterizeMeshesBackwardCpu(
        face_verts,
        pix_to_face,
@@ -314,9 +306,6 @@ torch::Tensor RasterizeMeshesCoarse(
    AT_ERROR("Not compiled with GPU support");
 #endif
  } else {
    CHECK_CPU(face_verts);
    CHECK_CPU(mesh_to_face_first_idx);
    CHECK_CPU(num_faces_per_mesh);
    return RasterizeMeshesCoarseCpu(
        face_verts,
        mesh_to_face_first_idx,
@@ -434,8 +423,6 @@ RasterizeMeshesFine(
    AT_ERROR("Not compiled with GPU support");
 #endif
  } else {
    CHECK_CPU(face_verts);
    CHECK_CPU(bin_faces);
    AT_ERROR("NOT IMPLEMENTED");
  }
 }
--- a/pytorch3d/csrc/rasterize_meshes/rasterize_meshes_cpu.cpp
+++ b/pytorch3d/csrc/rasterize_meshes/rasterize_meshes_cpu.cpp
@@ -106,8 +106,6 @@ auto ComputeFaceAreas(const torch::Tensor& face_verts) {
  return face_areas;
 }
 namespace {
 // Helper function to use with std::find_if to find the index of any
 // values in the top k struct which match a given idx.
 struct IsNeighbor {
@@ -120,6 +118,7 @@ struct IsNeighbor {
  int neighbor_idx;
 };
 namespace {
 void RasterizeMeshesNaiveCpu_worker(
    const int start_yi,
    const int end_yi,
--- a/pytorch3d/csrc/rasterize_points/rasterize_points.cu
+++ b/pytorch3d/csrc/rasterize_points/rasterize_points.cu
@@ -97,8 +97,8 @@ __global__ void RasterizePointsNaiveCudaKernel(
    float* zbuf, // (N, H, W, K)
    float* pix_dists) { // (N, H, W, K)
  // Simple version: One thread per output pixel
-  const auto num_threads = gridDim.x * blockDim.x;
+  const int num_threads = gridDim.x * blockDim.x;
-  const auto tid = blockDim.x * blockIdx.x + threadIdx.x;
+  const int tid = blockDim.x * blockIdx.x + threadIdx.x;
  for (int i = tid; i < N * H * W; i += num_threads) {
    // Convert linear index to 3D index
    const int n = i / (H * W); // Batch index
@@ -237,8 +237,8 @@ __global__ void RasterizePointsFineCudaKernel(
    float* pix_dists) { // (N, H, W, K)
  // This can be more than H * W if H or W are not divisible by bin_size.
  const int num_pixels = N * BH * BW * bin_size * bin_size;
-  const auto num_threads = gridDim.x * blockDim.x;
+  const int num_threads = gridDim.x * blockDim.x;
-  const auto tid = blockIdx.x * blockDim.x + threadIdx.x;
+  const int tid = blockIdx.x * blockDim.x + threadIdx.x;
  for (int pid = tid; pid < num_pixels; pid += num_threads) {
    // Convert linear index into bin and pixel indices. We make the within
@@ -376,8 +376,8 @@ __global__ void RasterizePointsBackwardCudaKernel(
    float* grad_points) { // (P, 3)
  // Parallelized over each of K points per pixel, for each pixel in images of
  // size H * W, for each image in the batch of size N.
-  auto num_threads = gridDim.x * blockDim.x;
+  int num_threads = gridDim.x * blockDim.x;
-  auto tid = blockIdx.x * blockDim.x + threadIdx.x;
+  int tid = blockIdx.x * blockDim.x + threadIdx.x;
  for (int i = tid; i < N * H * W * K; i += num_threads) {
    // const int n = i / (H * W * K); // batch index (not needed).
    const int yxk = i % (H * W * K);
--- a/pytorch3d/csrc/rasterize_points/rasterize_points.h
+++ b/pytorch3d/csrc/rasterize_points/rasterize_points.h
@@ -91,10 +91,6 @@ std::tuple<torch::Tensor, torch::Tensor, torch::Tensor> RasterizePointsNaive(
    AT_ERROR("Not compiled with GPU support");
 #endif
  } else {
    CHECK_CPU(points);
    CHECK_CPU(cloud_to_packed_first_idx);
    CHECK_CPU(num_points_per_cloud);
    CHECK_CPU(radius);
    return RasterizePointsNaiveCpu(
        points,
        cloud_to_packed_first_idx,
@@ -170,10 +166,6 @@ torch::Tensor RasterizePointsCoarse(
    AT_ERROR("Not compiled with GPU support");
 #endif
  } else {
    CHECK_CPU(points);
    CHECK_CPU(cloud_to_packed_first_idx);
    CHECK_CPU(num_points_per_cloud);
    CHECK_CPU(radius);
    return RasterizePointsCoarseCpu(
        points,
        cloud_to_packed_first_idx,
@@ -240,8 +232,6 @@ std::tuple<torch::Tensor, torch::Tensor, torch::Tensor> RasterizePointsFine(
    AT_ERROR("Not compiled with GPU support");
 #endif
  } else {
    CHECK_CPU(points);
    CHECK_CPU(bin_points);
    AT_ERROR("NOT IMPLEMENTED");
  }
 }
@@ -294,10 +284,6 @@ torch::Tensor RasterizePointsBackward(
    AT_ERROR("Not compiled with GPU support");
 #endif
  } else {
    CHECK_CPU(points);
    CHECK_CPU(idxs);
    CHECK_CPU(grad_zbuf);
    CHECK_CPU(grad_dists);
    return RasterizePointsBackwardCpu(points, idxs, grad_zbuf, grad_dists);
  }
 }
--- a/pytorch3d/csrc/sample_farthest_points/sample_farthest_points.cu
+++ b/pytorch3d/csrc/sample_farthest_points/sample_farthest_points.cu
@@ -107,8 +107,7 @@ at::Tensor FarthestPointSamplingCuda(
    const at::Tensor& points, // (N, P, 3)
    const at::Tensor& lengths, // (N,)
    const at::Tensor& K, // (N,)
-    const at::Tensor& start_idxs,
+    const at::Tensor& start_idxs) {
    const int64_t max_K_known = -1) {
  // Check inputs are on the same device
  at::TensorArg p_t{points, "points", 1}, lengths_t{lengths, "lengths", 2},
      k_t{K, "K", 3}, start_idxs_t{start_idxs, "start_idxs", 4};
@@ -130,12 +129,7 @@ at::Tensor FarthestPointSamplingCuda(
  const int64_t N = points.size(0);
  const int64_t P = points.size(1);
-  int64_t max_K;
+  const int64_t max_K = at::max(K).item<int64_t>();
  if (max_K_known > 0) {
    max_K = max_K_known;
  } else {
    max_K = at::max(K).item<int64_t>();
  }
  // Initialize the output tensor with the sampled indices
  auto idxs = at::full({N, max_K}, -1, lengths.options());
--- a/pytorch3d/csrc/sample_farthest_points/sample_farthest_points.h
+++ b/pytorch3d/csrc/sample_farthest_points/sample_farthest_points.h
@@ -43,8 +43,7 @@ at::Tensor FarthestPointSamplingCuda(
    const at::Tensor& points,
    const at::Tensor& lengths,
    const at::Tensor& K,
-    const at::Tensor& start_idxs,
+    const at::Tensor& start_idxs);
    const int64_t max_K_known = -1);
 at::Tensor FarthestPointSamplingCpu(
    const at::Tensor& points,
@@ -57,23 +56,17 @@ at::Tensor FarthestPointSampling(
    const at::Tensor& points,
    const at::Tensor& lengths,
    const at::Tensor& K,
-    const at::Tensor& start_idxs,
+    const at::Tensor& start_idxs) {
    const int64_t max_K_known = -1) {
  if (points.is_cuda() || lengths.is_cuda() || K.is_cuda()) {
 #ifdef WITH_CUDA
    CHECK_CUDA(points);
    CHECK_CUDA(lengths);
    CHECK_CUDA(K);
    CHECK_CUDA(start_idxs);
-    return FarthestPointSamplingCuda(
+    return FarthestPointSamplingCuda(points, lengths, K, start_idxs);
        points, lengths, K, start_idxs, max_K_known);
 #else
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
  CHECK_CPU(points);
  CHECK_CPU(lengths);
  CHECK_CPU(K);
  CHECK_CPU(start_idxs);
  return FarthestPointSamplingCpu(points, lengths, K, start_idxs);
 }
--- a/pytorch3d/csrc/sample_pdf/sample_pdf.h
+++ b/pytorch3d/csrc/sample_pdf/sample_pdf.h
@@ -71,8 +71,6 @@ inline void SamplePdf(
    AT_ERROR("Not compiled with GPU support.");
 #endif
  }
  CHECK_CPU(weights);
  CHECK_CPU(outputs);
  CHECK_CONTIGUOUS(outputs);
  SamplePdfCpu(bins, weights, outputs, eps);
 }
--- a/pytorch3d/csrc/utils/dispatch.cuh
+++ b/pytorch3d/csrc/utils/dispatch.cuh
@@ -99,7 +99,8 @@ namespace {
 // and increment it via template recursion until it is equal to the run-time
 // argument N.
 template <
-    template <typename, int64_t> class Kernel,
+    template <typename, int64_t>
    class Kernel,
    typename T,
    int64_t minN,
    int64_t maxN,
@@ -123,7 +124,8 @@ struct DispatchKernelHelper1D {
 // 1D dispatch: Specialization when curN == maxN
 // We need this base case to avoid infinite template recursion.
 template <
-    template <typename, int64_t> class Kernel,
+    template <typename, int64_t>
    class Kernel,
    typename T,
    int64_t minN,
    int64_t maxN,
@@ -143,7 +145,8 @@ struct DispatchKernelHelper1D<Kernel, T, minN, maxN, maxN, Args...> {
 // the run-time values of N and M, at which point we dispatch to the run
 // method of the kernel.
 template <
-    template <typename, int64_t, int64_t> class Kernel,
+    template <typename, int64_t, int64_t>
    class Kernel,
    typename T,
    int64_t minN,
    int64_t maxN,
@@ -200,7 +203,8 @@ struct DispatchKernelHelper2D {
 // 2D dispatch, specialization for curN == maxN
 template <
-    template <typename, int64_t, int64_t> class Kernel,
+    template <typename, int64_t, int64_t>
    class Kernel,
    typename T,
    int64_t minN,
    int64_t maxN,
@@ -239,7 +243,8 @@ struct DispatchKernelHelper2D<
 // 2D dispatch, specialization for curM == maxM
 template <
-    template <typename, int64_t, int64_t> class Kernel,
+    template <typename, int64_t, int64_t>
    class Kernel,
    typename T,
    int64_t minN,
    int64_t maxN,
@@ -278,7 +283,8 @@ struct DispatchKernelHelper2D<
 // 2D dispatch, specialization for curN == maxN, curM == maxM
 template <
-    template <typename, int64_t, int64_t> class Kernel,
+    template <typename, int64_t, int64_t>
    class Kernel,
    typename T,
    int64_t minN,
    int64_t maxN,
@@ -307,7 +313,8 @@ struct DispatchKernelHelper2D<
 // This is the function we expect users to call to dispatch to 1D functions
 template <
-    template <typename, int64_t> class Kernel,
+    template <typename, int64_t>
    class Kernel,
    typename T,
    int64_t minN,
    int64_t maxN,
@@ -323,7 +330,8 @@ void DispatchKernel1D(const int64_t N, Args... args) {
 // This is the function we expect users to call to dispatch to 2D functions
 template <
-    template <typename, int64_t, int64_t> class Kernel,
+    template <typename, int64_t, int64_t>
    class Kernel,
    typename T,
    int64_t minN,
    int64_t maxN,
--- a/pytorch3d/csrc/utils/pytorch3d_cutils.h
+++ b/pytorch3d/csrc/utils/pytorch3d_cutils.h
@@ -15,7 +15,3 @@
 #define CHECK_CONTIGUOUS_CUDA(x) \
  CHECK_CUDA(x);                 \
  CHECK_CONTIGUOUS(x)
 #define CHECK_CPU(x)                    \
  TORCH_CHECK(                          \
      x.device().type() == torch::kCPU, \
      "Cannot use CPU implementation: " #x " not on CPU.")
--- a/pytorch3d/csrc/utils/vec2.h
+++ b/pytorch3d/csrc/utils/vec2.h
@@ -19,7 +19,7 @@ template <
        std::is_same<T, double>::value || std::is_same<T, float>::value>>
 struct vec2 {
  T x, y;
-  using scalar_t = T;
+  typedef T scalar_t;
  vec2(T x, T y) : x(x), y(y) {}
 };
--- a/pytorch3d/csrc/utils/vec3.h
+++ b/pytorch3d/csrc/utils/vec3.h
@@ -18,7 +18,7 @@ template <
        std::is_same<T, double>::value || std::is_same<T, float>::value>>
 struct vec3 {
  T x, y, z;
-  using scalar_t = T;
+  typedef T scalar_t;
  vec3(T x, T y, T z) : x(x), y(y), z(z) {}
 };
--- a/pytorch3d/implicitron/dataset/blender_dataset_map_provider.py
+++ b/pytorch3d/implicitron/dataset/blender_dataset_map_provider.py
@@ -0,0 +1,55 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 # pyre-unsafe
 import torch
 from pytorch3d.implicitron.tools.config import registry
 from .load_blender import load_blender_data
 from .single_sequence_dataset import (
    _interpret_blender_cameras,
    SingleSceneDatasetMapProviderBase,
 )
@registry.register
 class BlenderDatasetMapProvider(SingleSceneDatasetMapProviderBase):
    """
    Provides data for one scene from Blender synthetic dataset.
    Uses the code in load_blender.py
    Members:
        base_dir: directory holding the data for the scene.
        object_name: The name of the scene (e.g. "lego"). This is just used as a label.
            It will typically be equal to the name of the directory self.base_dir.
        path_manager_factory: Creates path manager which may be used for
            interpreting paths.
        n_known_frames_for_test: If set, training frames are included in the val
            and test datasets, and this many random training frames are added to
            each test batch. If not set, test batches each contain just a single
            testing frame.
    """
    def _load_data(self) -> None:
        path_manager = self.path_manager_factory.get()
        images, poses, _, hwf, i_split = load_blender_data(
            self.base_dir,
            testskip=1,
            path_manager=path_manager,
        )
        H, W, focal = hwf
        images_masks = torch.from_numpy(images).permute(0, 3, 1, 2)
        # pyre-ignore[16]
        self.poses = _interpret_blender_cameras(poses, focal)
        # pyre-ignore[16]
        self.images = images_masks[:, :3]
        # pyre-ignore[16]
        self.fg_probabilities = images_masks[:, 3:4]
        # pyre-ignore[16]
        self.i_split = i_split
--- a/pytorch3d/implicitron/dataset/data_source.py
+++ b/pytorch3d/implicitron/dataset/data_source.py
@@ -64,12 +64,16 @@ class ImplicitronDataSource(DataSourceBase):
    def pre_expand(cls) -> None:
        # use try/finally to bypass cinder's lazy imports
        try:
            from .blender_dataset_map_provider import (  # noqa: F401
                BlenderDatasetMapProvider,
            )
            from .json_index_dataset_map_provider import (  # noqa: F401
                JsonIndexDatasetMapProvider,
            )
            from .json_index_dataset_map_provider_v2 import (  # noqa: F401
                JsonIndexDatasetMapProviderV2,
            )
            from .llff_dataset_map_provider import LlffDatasetMapProvider  # noqa: F401
            from .rendered_mesh_dataset_map_provider import (  # noqa: F401
                RenderedMeshDatasetMapProvider,
            )
--- a/pytorch3d/implicitron/dataset/dataset_base.py
+++ b/pytorch3d/implicitron/dataset/dataset_base.py
@@ -21,6 +21,7 @@ from typing import (
 )
 import torch
 from pytorch3d.implicitron.dataset.frame_data import FrameData
 from pytorch3d.implicitron.dataset.utils import GenericWorkaround
--- a/pytorch3d/implicitron/dataset/frame_data.py
+++ b/pytorch3d/implicitron/dataset/frame_data.py
@@ -25,6 +25,7 @@ from typing import (
 import numpy as np
 import torch
 from pytorch3d.implicitron.dataset import orm_types, types
 from pytorch3d.implicitron.dataset.utils import (
    adjust_camera_to_bbox_crop_,
--- a/pytorch3d/implicitron/dataset/json_index_dataset.py
+++ b/pytorch3d/implicitron/dataset/json_index_dataset.py
@@ -38,6 +38,7 @@ from pytorch3d.implicitron.dataset.utils import is_known_frame_scalar
 from pytorch3d.implicitron.tools.config import registry, ReplaceableBase
 from pytorch3d.renderer.camera_utils import join_cameras_as_batch
 from pytorch3d.renderer.cameras import CamerasBase
 from tqdm import tqdm
@@ -326,9 +327,9 @@ class JsonIndexDataset(DatasetBase, ReplaceableBase):
                assert os.path.normpath(
                    # pyre-ignore[16]
                    self.frame_annots[idx]["frame_annotation"].image.path
-                ) == os.path.normpath(path), (
+                ) == os.path.normpath(
-                    f"Inconsistent frame indices {seq_name, frame_no, path}."
+                    path
-                )
+                ), f"Inconsistent frame indices {seq_name, frame_no, path}."
            return idx
        dataset_idx = [
--- a/pytorch3d/implicitron/dataset/json_index_dataset_map_provider.py
+++ b/pytorch3d/implicitron/dataset/json_index_dataset_map_provider.py
@@ -21,6 +21,7 @@ from pytorch3d.renderer.cameras import CamerasBase
 from .dataset_map_provider import DatasetMap, DatasetMapProviderBase, PathManagerFactory
 from .json_index_dataset import JsonIndexDataset
 from .utils import (
    DATASET_TYPE_KNOWN,
    DATASET_TYPE_TEST,
--- a/pytorch3d/implicitron/dataset/json_index_dataset_map_provider_v2.py
+++ b/pytorch3d/implicitron/dataset/json_index_dataset_map_provider_v2.py
@@ -18,6 +18,7 @@ from typing import Dict, List, Optional, Tuple, Type, Union
 import numpy as np
 from iopath.common.file_io import PathManager
 from omegaconf import DictConfig
 from pytorch3d.implicitron.dataset.dataset_map_provider import (
    DatasetMap,
@@ -30,6 +31,7 @@ from pytorch3d.implicitron.tools.config import (
    registry,
    run_auto_creation,
 )
 from pytorch3d.renderer.cameras import CamerasBase
 from tqdm import tqdm
--- a/pytorch3d/implicitron/dataset/llff_dataset_map_provider.py
+++ b/pytorch3d/implicitron/dataset/llff_dataset_map_provider.py
@@ -0,0 +1,69 @@
 # Copyright (c) Meta Platforms, Inc. and affiliates.
 # All rights reserved.
 #
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 # pyre-unsafe
 import numpy as np
 import torch
 from pytorch3d.implicitron.tools.config import registry
 from .load_llff import load_llff_data
 from .single_sequence_dataset import (
    _interpret_blender_cameras,
    SingleSceneDatasetMapProviderBase,
 )
@registry.register
 class LlffDatasetMapProvider(SingleSceneDatasetMapProviderBase):
    """
    Provides data for one scene from the LLFF dataset.
    Members:
        base_dir: directory holding the data for the scene.
        object_name: The name of the scene (e.g. "fern"). This is just used as a label.
            It will typically be equal to the name of the directory self.base_dir.
        path_manager_factory: Creates path manager which may be used for
            interpreting paths.
        n_known_frames_for_test: If set, training frames are included in the val
            and test datasets, and this many random training frames are added to
            each test batch. If not set, test batches each contain just a single
            testing frame.
        downscale_factor: determines image sizes.
    """
    downscale_factor: int = 4
    def _load_data(self) -> None:
        path_manager = self.path_manager_factory.get()
        images, poses, _ = load_llff_data(
            self.base_dir, factor=self.downscale_factor, path_manager=path_manager
        )
        hwf = poses[0, :3, -1]
        poses = poses[:, :3, :4]
        llffhold = 8
        i_test = np.arange(images.shape[0])[::llffhold]
        i_test_index = set(i_test.tolist())
        i_train = np.array(
            [i for i in np.arange(images.shape[0]) if i not in i_test_index]
        )
        i_split = (i_train, i_test, i_test)
        H, W, focal = hwf
        focal_ndc = 2 * focal / min(H, W)
        images = torch.from_numpy(images).permute(0, 3, 1, 2)
        poses = torch.from_numpy(poses)
        # pyre-ignore[16]
        self.poses = _interpret_blender_cameras(poses, focal_ndc)
        # pyre-ignore[16]
        self.images = images
        # pyre-ignore[16]
        self.fg_probabilities = None
        # pyre-ignore[16]
        self.i_split = i_split
--- a/pytorch3d/implicitron/dataset/load_blender.py
+++ b/pytorch3d/implicitron/dataset/load_blender.py
@@ -0,0 +1,143 @@
 # @lint-ignore-every LICENSELINT
 # Adapted from https://github.com/bmild/nerf/blob/master/load_blender.py
 # Copyright (c) 2020 bmild
 # pyre-unsafe
 import json
 import os
 import numpy as np
 import torch
 from PIL import Image
 def translate_by_t_along_z(t):
    tform = np.eye(4).astype(np.float32)
    tform[2][3] = t
    return tform
 def rotate_by_phi_along_x(phi):
    tform = np.eye(4).astype(np.float32)
    tform[1, 1] = tform[2, 2] = np.cos(phi)
    tform[1, 2] = -np.sin(phi)
    tform[2, 1] = -tform[1, 2]
    return tform
 def rotate_by_theta_along_y(theta):
    tform = np.eye(4).astype(np.float32)
    tform[0, 0] = tform[2, 2] = np.cos(theta)
    tform[0, 2] = -np.sin(theta)
    tform[2, 0] = -tform[0, 2]
    return tform
 def pose_spherical(theta, phi, radius):
    c2w = translate_by_t_along_z(radius)
    c2w = rotate_by_phi_along_x(phi / 180.0 * np.pi) @ c2w
    c2w = rotate_by_theta_along_y(theta / 180 * np.pi) @ c2w
    c2w = np.array([[-1, 0, 0, 0], [0, 0, 1, 0], [0, 1, 0, 0], [0, 0, 0, 1]]) @ c2w
    return c2w
 def _local_path(path_manager, path):
    if path_manager is None:
        return path
    return path_manager.get_local_path(path)
 def load_blender_data(
    basedir,
    half_res=False,
    testskip=1,
    debug=False,
    path_manager=None,
    focal_length_in_screen_space=False,
 ):
    splits = ["train", "val", "test"]
    metas = {}
    for s in splits:
        path = os.path.join(basedir, f"transforms_{s}.json")
        with open(_local_path(path_manager, path)) as fp:
            metas[s] = json.load(fp)
    all_imgs = []
    all_poses = []
    counts = [0]
    for s in splits:
        meta = metas[s]
        imgs = []
        poses = []
        if s == "train" or testskip == 0:
            skip = 1
        else:
            skip = testskip
        for frame in meta["frames"][::skip]:
            fname = os.path.join(basedir, frame["file_path"] + ".png")
            imgs.append(np.array(Image.open(_local_path(path_manager, fname))))
            poses.append(np.array(frame["transform_matrix"]))
        imgs = (np.array(imgs) / 255.0).astype(np.float32)
        poses = np.array(poses).astype(np.float32)
        counts.append(counts[-1] + imgs.shape[0])
        all_imgs.append(imgs)
        all_poses.append(poses)
    i_split = [np.arange(counts[i], counts[i + 1]) for i in range(3)]
    imgs = np.concatenate(all_imgs, 0)
    poses = np.concatenate(all_poses, 0)
    H, W = imgs[0].shape[:2]
    camera_angle_x = float(meta["camera_angle_x"])
    if focal_length_in_screen_space:
        focal = 0.5 * W / np.tan(0.5 * camera_angle_x)
    else:
        focal = 1 / np.tan(0.5 * camera_angle_x)
    render_poses = torch.stack(
        [
            torch.from_numpy(pose_spherical(angle, -30.0, 4.0))
            for angle in np.linspace(-180, 180, 40 + 1)[:-1]
        ],
        0,
    )
    # In debug mode, return extremely tiny images
    if debug:
        import cv2
        H = H // 32
        W = W // 32
        if focal_length_in_screen_space:
            focal = focal / 32.0
        imgs = [
            torch.from_numpy(
                cv2.resize(imgs[i], dsize=(25, 25), interpolation=cv2.INTER_AREA)
            )
            for i in range(imgs.shape[0])
        ]
        imgs = torch.stack(imgs, 0)
        poses = torch.from_numpy(poses)
        return imgs, poses, render_poses, [H, W, focal], i_split
    if half_res:
        import cv2
        # TODO: resize images using INTER_AREA (cv2)
        H = H // 2
        W = W // 2
        if focal_length_in_screen_space:
            focal = focal / 2.0
        imgs = [
            torch.from_numpy(
                cv2.resize(imgs[i], dsize=(400, 400), interpolation=cv2.INTER_AREA)
            )
            for i in range(imgs.shape[0])
        ]
        imgs = torch.stack(imgs, 0)
    poses = torch.from_numpy(poses)
    return imgs, poses, render_poses, [H, W, focal], i_split
--- a/pytorch3d/implicitron/dataset/load_llff.py
+++ b/pytorch3d/implicitron/dataset/load_llff.py
@@ -0,0 +1,336 @@
 # @lint-ignore-every LICENSELINT
 # Adapted from https://github.com/bmild/nerf/blob/master/load_llff.py
 # Copyright (c) 2020 bmild
 # pyre-unsafe
 import logging
 import os
 import warnings
 import numpy as np
 from PIL import Image
 # Slightly modified version of LLFF data loading code
 #  see https://github.com/Fyusion/LLFF for original
 logger = logging.getLogger(__name__)
 def _minify(basedir, path_manager, factors=(), resolutions=()):
    needtoload = False
    for r in factors:
        imgdir = os.path.join(basedir, "images_{}".format(r))
        if not _exists(path_manager, imgdir):
            needtoload = True
    for r in resolutions:
        imgdir = os.path.join(basedir, "images_{}x{}".format(r[1], r[0]))
        if not _exists(path_manager, imgdir):
            needtoload = True
    if not needtoload:
        return
    assert path_manager is None
    from subprocess import check_output
    imgdir = os.path.join(basedir, "images")
    imgs = [os.path.join(imgdir, f) for f in sorted(_ls(path_manager, imgdir))]
    imgs = [f for f in imgs if f.endswith("JPG", "jpg", "png", "jpeg", "PNG")]
    imgdir_orig = imgdir
    wd = os.getcwd()
    for r in factors + resolutions:
        if isinstance(r, int):
            name = "images_{}".format(r)
            resizearg = "{}%".format(100.0 / r)
        else:
            name = "images_{}x{}".format(r[1], r[0])
            resizearg = "{}x{}".format(r[1], r[0])
        imgdir = os.path.join(basedir, name)
        if os.path.exists(imgdir):
            continue
        logger.info(f"Minifying {r}, {basedir}")
        os.makedirs(imgdir)
        check_output("cp {}/* {}".format(imgdir_orig, imgdir), shell=True)
        ext = imgs[0].split(".")[-1]
        args = " ".join(
            ["mogrify", "-resize", resizearg, "-format", "png", "*.{}".format(ext)]
        )
        logger.info(args)
        os.chdir(imgdir)
        check_output(args, shell=True)
        os.chdir(wd)
        if ext != "png":
            check_output("rm {}/*.{}".format(imgdir, ext), shell=True)
            logger.info("Removed duplicates")
        logger.info("Done")
 def _load_data(
    basedir, factor=None, width=None, height=None, load_imgs=True, path_manager=None
 ):
    poses_arr = np.load(
        _local_path(path_manager, os.path.join(basedir, "poses_bounds.npy"))
    )
    poses = poses_arr[:, :-2].reshape([-1, 3, 5]).transpose([1, 2, 0])
    bds = poses_arr[:, -2:].transpose([1, 0])
    img0 = [
        os.path.join(basedir, "images", f)
        for f in sorted(_ls(path_manager, os.path.join(basedir, "images")))
        if f.endswith("JPG") or f.endswith("jpg") or f.endswith("png")
    ][0]
    def imread(f):
        return np.array(Image.open(f))
    sh = imread(_local_path(path_manager, img0)).shape
    sfx = ""
    if factor is not None:
        sfx = "_{}".format(factor)
        _minify(basedir, path_manager, factors=[factor])
        factor = factor
    elif height is not None:
        factor = sh[0] / float(height)
        width = int(sh[1] / factor)
        _minify(basedir, path_manager, resolutions=[[height, width]])
        sfx = "_{}x{}".format(width, height)
    elif width is not None:
        factor = sh[1] / float(width)
        height = int(sh[0] / factor)
        _minify(basedir, path_manager, resolutions=[[height, width]])
        sfx = "_{}x{}".format(width, height)
    else:
        factor = 1
    imgdir = os.path.join(basedir, "images" + sfx)
    if not _exists(path_manager, imgdir):
        raise ValueError(f"{imgdir} does not exist, returning")
    imgfiles = [
        _local_path(path_manager, os.path.join(imgdir, f))
        for f in sorted(_ls(path_manager, imgdir))
        if f.endswith("JPG") or f.endswith("jpg") or f.endswith("png")
    ]
    if poses.shape[-1] != len(imgfiles):
        raise ValueError(
            "Mismatch between imgs {} and poses {} !!!!".format(
                len(imgfiles), poses.shape[-1]
            )
        )
    sh = imread(imgfiles[0]).shape
    poses[:2, 4, :] = np.array(sh[:2]).reshape([2, 1])
    poses[2, 4, :] = poses[2, 4, :] * 1.0 / factor
    if not load_imgs:
        return poses, bds
    imgs = imgs = [imread(f)[..., :3] / 255.0 for f in imgfiles]
    imgs = np.stack(imgs, -1)
    logger.info(f"Loaded image data, shape {imgs.shape}")
    return poses, bds, imgs
 def normalize(x):
    denom = np.linalg.norm(x)
    if denom < 0.001:
        warnings.warn("unsafe normalize()")
    return x / denom
 def viewmatrix(z, up, pos):
    vec2 = normalize(z)
    vec1_avg = up
    vec0 = normalize(np.cross(vec1_avg, vec2))
    vec1 = normalize(np.cross(vec2, vec0))
    m = np.stack([vec0, vec1, vec2, pos], 1)
    return m
 def ptstocam(pts, c2w):
    tt = np.matmul(c2w[:3, :3].T, (pts - c2w[:3, 3])[..., np.newaxis])[..., 0]
    return tt
 def poses_avg(poses):
    hwf = poses[0, :3, -1:]
    center = poses[:, :3, 3].mean(0)
    vec2 = normalize(poses[:, :3, 2].sum(0))
    up = poses[:, :3, 1].sum(0)
    c2w = np.concatenate([viewmatrix(vec2, up, center), hwf], 1)
    return c2w
 def render_path_spiral(c2w, up, rads, focal, zdelta, zrate, rots, N):
    render_poses = []
    rads = np.array(list(rads) + [1.0])
    hwf = c2w[:, 4:5]
    for theta in np.linspace(0.0, 2.0 * np.pi * rots, N + 1)[:-1]:
        c = np.dot(
            c2w[:3, :4],
            np.array([np.cos(theta), -np.sin(theta), -np.sin(theta * zrate), 1.0])
            * rads,
        )
        z = normalize(c - np.dot(c2w[:3, :4], np.array([0, 0, -focal, 1.0])))
        render_poses.append(np.concatenate([viewmatrix(z, up, c), hwf], 1))
    return render_poses
 def recenter_poses(poses):
    poses_ = poses + 0
    bottom = np.reshape([0, 0, 0, 1.0], [1, 4])
    c2w = poses_avg(poses)
    c2w = np.concatenate([c2w[:3, :4], bottom], -2)
    bottom = np.tile(np.reshape(bottom, [1, 1, 4]), [poses.shape[0], 1, 1])
    poses = np.concatenate([poses[:, :3, :4], bottom], -2)
    poses = np.linalg.inv(c2w) @ poses
    poses_[:, :3, :4] = poses[:, :3, :4]
    poses = poses_
    return poses
 def spherify_poses(poses, bds):
    def add_row_to_homogenize_transform(p):
        r"""Add the last row to homogenize 3 x 4 transformation matrices."""
        return np.concatenate(
            [p, np.tile(np.reshape(np.eye(4)[-1, :], [1, 1, 4]), [p.shape[0], 1, 1])], 1
        )
    # p34_to_44 = lambda p: np.concatenate(
    #     [p, np.tile(np.reshape(np.eye(4)[-1, :], [1, 1, 4]), [p.shape[0], 1, 1])], 1
    # )
    p34_to_44 = add_row_to_homogenize_transform
    rays_d = poses[:, :3, 2:3]
    rays_o = poses[:, :3, 3:4]
    def min_line_dist(rays_o, rays_d):
        A_i = np.eye(3) - rays_d * np.transpose(rays_d, [0, 2, 1])
        b_i = -A_i @ rays_o
        pt_mindist = np.squeeze(
            -np.linalg.inv((np.transpose(A_i, [0, 2, 1]) @ A_i).mean(0)) @ (b_i).mean(0)
        )
        return pt_mindist
    pt_mindist = min_line_dist(rays_o, rays_d)
    center = pt_mindist
    up = (poses[:, :3, 3] - center).mean(0)
    vec0 = normalize(up)
    vec1 = normalize(np.cross([0.1, 0.2, 0.3], vec0))
    vec2 = normalize(np.cross(vec0, vec1))
    pos = center
    c2w = np.stack([vec1, vec2, vec0, pos], 1)
    poses_reset = np.linalg.inv(p34_to_44(c2w[None])) @ p34_to_44(poses[:, :3, :4])
    rad = np.sqrt(np.mean(np.sum(np.square(poses_reset[:, :3, 3]), -1)))
    sc = 1.0 / rad
    poses_reset[:, :3, 3] *= sc
    bds *= sc
    rad *= sc
    centroid = np.mean(poses_reset[:, :3, 3], 0)
    zh = centroid[2]
    radcircle = np.sqrt(rad**2 - zh**2)
    new_poses = []
    for th in np.linspace(0.0, 2.0 * np.pi, 120):
        camorigin = np.array([radcircle * np.cos(th), radcircle * np.sin(th), zh])
        up = np.array([0, 0, -1.0])
        vec2 = normalize(camorigin)
        vec0 = normalize(np.cross(vec2, up))
        vec1 = normalize(np.cross(vec2, vec0))
        pos = camorigin
        p = np.stack([vec0, vec1, vec2, pos], 1)
        new_poses.append(p)
    new_poses = np.stack(new_poses, 0)
    new_poses = np.concatenate(
        [new_poses, np.broadcast_to(poses[0, :3, -1:], new_poses[:, :3, -1:].shape)], -1
    )
    poses_reset = np.concatenate(
        [
            poses_reset[:, :3, :4],
            np.broadcast_to(poses[0, :3, -1:], poses_reset[:, :3, -1:].shape),
        ],
        -1,
    )
    return poses_reset, new_poses, bds
 def _local_path(path_manager, path):
    if path_manager is None:
        return path
    return path_manager.get_local_path(path)
 def _ls(path_manager, path):
    if path_manager is None:
        return os.listdir(path)
    return path_manager.ls(path)
 def _exists(path_manager, path):
    if path_manager is None:
        return os.path.exists(path)
    return path_manager.exists(path)
 def load_llff_data(
    basedir,
    factor=8,
    recenter=True,
    bd_factor=0.75,
    spherify=False,
    path_zflat=False,
    path_manager=None,
 ):
    poses, bds, imgs = _load_data(
        basedir, factor=factor, path_manager=path_manager
    )  # factor=8 downsamples original imgs by 8x
    logger.info(f"Loaded {basedir}, {bds.min()}, {bds.max()}")
    # Correct rotation matrix ordering and move variable dim to axis 0
    poses = np.concatenate([poses[:, 1:2, :], -poses[:, 0:1, :], poses[:, 2:, :]], 1)
    poses = np.moveaxis(poses, -1, 0).astype(np.float32)
    imgs = np.moveaxis(imgs, -1, 0).astype(np.float32)
    images = imgs
    bds = np.moveaxis(bds, -1, 0).astype(np.float32)
    # Rescale if bd_factor is provided
    sc = 1.0 if bd_factor is None else 1.0 / (bds.min() * bd_factor)
    poses[:, :3, 3] *= sc
    bds *= sc
    if recenter:
        poses = recenter_poses(poses)
    if spherify:
        poses, render_poses, bds = spherify_poses(poses, bds)
    images = images.astype(np.float32)
    poses = poses.astype(np.float32)
    return images, poses, bds
--- a/pytorch3d/implicitron/dataset/orm_types.py
+++ b/pytorch3d/implicitron/dataset/orm_types.py
@@ -13,6 +13,7 @@ import struct
 from typing import Optional, Tuple
 import numpy as np
 from pytorch3d.implicitron.dataset.types import (
    DepthAnnotation,
    ImageAnnotation,
@@ -21,6 +22,7 @@ from pytorch3d.implicitron.dataset.types import (
    VideoAnnotation,
    ViewpointAnnotation,
 )
 from sqlalchemy import LargeBinary
 from sqlalchemy.orm import (
    composite,
--- a/pytorch3d/implicitron/dataset/single_sequence_dataset.py
+++ b/pytorch3d/implicitron/dataset/single_sequence_dataset.py
@@ -85,7 +85,7 @@ class SingleSceneDataset(DatasetBase, Configurable):
 class SingleSceneDatasetMapProviderBase(DatasetMapProviderBase):
    """
-    Base for provider of data for one scene.
+    Base for provider of data for one scene from LLFF or blender datasets.
    Members:
        base_dir: directory holding the data for the scene.
@@ -171,3 +171,40 @@ class SingleSceneDatasetMapProviderBase(DatasetMapProviderBase):
        # pyre-ignore[16]
        cameras = [self.poses[i] for i in self.i_split[0]]
        return join_cameras_as_batch(cameras)
 def _interpret_blender_cameras(
    poses: torch.Tensor, focal: float
 ) -> List[PerspectiveCameras]:
    """
    Convert 4x4 matrices representing cameras in blender format
    to PyTorch3D format.
    Args:
        poses: N x 3 x 4 camera matrices
        focal: ndc space focal length
    """
    pose_target_cameras = []
    for pose_target in poses:
        pose_target = pose_target[:3, :4]
        mtx = torch.eye(4, dtype=pose_target.dtype)
        mtx[:3, :3] = pose_target[:3, :3].t()
        mtx[3, :3] = pose_target[:, 3]
        mtx = mtx.inverse()
        # flip the XZ coordinates.
        mtx[:, [0, 2]] *= -1.0
        Rpt3, Tpt3 = mtx[:, :3].split([3, 1], dim=0)
        focal_length_pt3 = torch.FloatTensor([[focal, focal]])
        principal_point_pt3 = torch.FloatTensor([[0.0, 0.0]])
        cameras = PerspectiveCameras(
            focal_length=focal_length_pt3,
            principal_point=principal_point_pt3,
            R=Rpt3[None],
            T=Tpt3,
        )
        pose_target_cameras.append(cameras)
    return pose_target_cameras
--- a/pytorch3d/implicitron/dataset/sql_dataset.py
+++ b/pytorch3d/implicitron/dataset/sql_dataset.py
@@ -10,6 +10,7 @@ import hashlib
 import json
 import logging
 import os
 import urllib
 from dataclasses import dataclass, Field, field
 from typing import (
@@ -31,11 +32,13 @@ import pandas as pd
 import sqlalchemy as sa
 import torch
 from pytorch3d.implicitron.dataset.dataset_base import DatasetBase
 from pytorch3d.implicitron.dataset.frame_data import (
    FrameData,
    FrameDataBuilder,  # noqa
    FrameDataBuilderBase,
 )
 from pytorch3d.implicitron.tools.config import (
    registry,
    ReplaceableBase,
@@ -483,10 +486,9 @@ class SqlIndexDataset(DatasetBase, ReplaceableBase):
            *self._get_pick_filters(),
            *self._get_exclude_filters(),
        ]
-        if pick_sequences_sql_clause := self.pick_sequences_sql_clause:
+        if self.pick_sequences_sql_clause:
            print("Applying the custom SQL clause.")
-            # pyre-ignore[6]: TextClause is compatible with where conditions
+            where_conditions.append(sa.text(self.pick_sequences_sql_clause))
            where_conditions.append(sa.text(pick_sequences_sql_clause))
        def add_where(stmt):
            return stmt.where(*where_conditions) if where_conditions else stmt
@@ -506,7 +508,6 @@ class SqlIndexDataset(DatasetBase, ReplaceableBase):
            subquery = add_where(subquery).subquery()
            stmt = sa.select(subquery.c.sequence_name).where(
                # pyre-ignore[6]: SQLAlchemy column comparison returns ColumnElement, not bool
                subquery.c.row_number <= self.limit_sequences_per_category_to
            )
@@ -635,10 +636,9 @@ class SqlIndexDataset(DatasetBase, ReplaceableBase):
                    )
                )
-        if pick_frames_sql_clause := self.pick_frames_sql_clause:
+        if self.pick_frames_sql_clause:
            logger.info("Applying the custom SQL clause.")
-            # pyre-ignore[6]: TextClause is compatible with where conditions
+            pick_frames_criteria.append(sa.text(self.pick_frames_sql_clause))
            pick_frames_criteria.append(sa.text(pick_frames_sql_clause))
        if pick_frames_criteria:
            index = self._pick_frames_by_criteria(index, pick_frames_criteria)
@@ -701,10 +701,9 @@ class SqlIndexDataset(DatasetBase, ReplaceableBase):
                )
            )
-        if pick_frames_sql_clause := self.pick_frames_sql_clause:
+        if self.pick_frames_sql_clause:
            logger.info("  applying custom SQL clause")
-            # pyre-ignore[6]: TextClause is compatible with where conditions
+            where_conditions.append(sa.text(self.pick_frames_sql_clause))
            where_conditions.append(sa.text(pick_frames_sql_clause))
        if where_conditions:
            stmt = stmt.where(*where_conditions)
@@ -756,7 +755,7 @@ class SqlIndexDataset(DatasetBase, ReplaceableBase):
        if pick_sequences:
            old_len = len(eval_batches)
            eval_batches = [b for b in eval_batches if b[0][0] in pick_sequences]
-            logger.warning(
+            logger.warn(
                f"Picked eval batches by sequence/cat: {old_len} -> {len(eval_batches)}"
            )
@@ -764,7 +763,7 @@ class SqlIndexDataset(DatasetBase, ReplaceableBase):
            old_len = len(eval_batches)
            exclude_sequences = set(self.exclude_sequences)
            eval_batches = [b for b in eval_batches if b[0][0] not in exclude_sequences]
-            logger.warning(
+            logger.warn(
                f"Excluded eval batches by sequence: {old_len} -> {len(eval_batches)}"
            )
--- a/pytorch3d/implicitron/dataset/sql_dataset_provider.py
+++ b/pytorch3d/implicitron/dataset/sql_dataset_provider.py
@@ -12,7 +12,9 @@ import os
 from typing import List, Optional, Tuple, Type
 import numpy as np
 from omegaconf import DictConfig, OmegaConf
 from pytorch3d.implicitron.dataset.dataset_map_provider import (
    DatasetMap,
    DatasetMapProviderBase,
--- a/pytorch3d/implicitron/dataset/train_eval_data_loader_provider.py
+++ b/pytorch3d/implicitron/dataset/train_eval_data_loader_provider.py
@@ -18,6 +18,7 @@ from pytorch3d.implicitron.dataset.dataset_base import DatasetBase
 from pytorch3d.implicitron.dataset.dataset_map_provider import DatasetMap
 from pytorch3d.implicitron.dataset.frame_data import FrameData
 from pytorch3d.implicitron.tools.config import registry, run_auto_creation
 from torch.utils.data import DataLoader
 logger = logging.getLogger(__name__)
--- a/pytorch3d/implicitron/dataset/utils.py
+++ b/pytorch3d/implicitron/dataset/utils.py
@@ -15,6 +15,7 @@ from typing import List, Optional, Tuple, TypeVar, Union
 import numpy as np
 import torch
 from PIL import Image
 from pytorch3d.io import IO
 from pytorch3d.renderer.cameras import PerspectiveCameras
 from pytorch3d.structures.pointclouds import Pointclouds
--- a/pytorch3d/implicitron/evaluation/evaluator.py
+++ b/pytorch3d/implicitron/evaluation/evaluator.py
@@ -14,6 +14,7 @@ import warnings
 from typing import Any, Dict, List, Optional, Tuple
 import torch
 import tqdm
 from pytorch3d.implicitron.evaluation import evaluate_new_view_synthesis as evaluate
 from pytorch3d.implicitron.models.base_model import EvaluationMode, ImplicitronModelBase
--- a/pytorch3d/implicitron/models/base_model.py
+++ b/pytorch3d/implicitron/models/base_model.py
@@ -10,6 +10,7 @@ from dataclasses import dataclass, field
 from typing import Any, Dict, List, Optional
 import torch
 from pytorch3d.implicitron.models.renderer.base import EvaluationMode
 from pytorch3d.implicitron.tools.config import ReplaceableBase
 from pytorch3d.renderer.cameras import CamerasBase
--- a/pytorch3d/implicitron/models/generic_model.py
+++ b/pytorch3d/implicitron/models/generic_model.py
@@ -16,6 +16,7 @@ from typing import Any, Dict, List, Optional, Tuple, TYPE_CHECKING, Union
 import torch
 from omegaconf import DictConfig
 from pytorch3d.implicitron.models.base_model import (
    ImplicitronModelBase,
    ImplicitronRender,
@@ -27,6 +28,7 @@ from pytorch3d.implicitron.models.metrics import (
    RegularizationMetricsBase,
    ViewMetricsBase,
 )
 from pytorch3d.implicitron.models.renderer.base import (
    BaseRenderer,
    EvaluationMode,
@@ -36,6 +38,7 @@ from pytorch3d.implicitron.models.renderer.base import (
    RenderSamplingMode,
 )
 from pytorch3d.implicitron.models.renderer.ray_sampler import RaySamplerBase
 from pytorch3d.implicitron.models.utils import (
    apply_chunked,
    chunk_generator,
@@ -50,6 +53,7 @@ from pytorch3d.implicitron.tools.config import (
    registry,
    run_auto_creation,
 )
 from pytorch3d.implicitron.tools.rasterize_mc import rasterize_sparse_ray_bundle
 from pytorch3d.renderer import utils as rend_utils
 from pytorch3d.renderer.cameras import CamerasBase
--- a/pytorch3d/implicitron/models/implicit_function/base.py
+++ b/pytorch3d/implicitron/models/implicit_function/base.py
@@ -10,6 +10,7 @@ from abc import ABC, abstractmethod
 from typing import Optional
 from pytorch3d.implicitron.models.renderer.base import ImplicitronRayBundle
 from pytorch3d.implicitron.tools.config import ReplaceableBase
 from pytorch3d.renderer.cameras import CamerasBase
--- a/pytorch3d/implicitron/models/implicit_function/decoding_functions.py
+++ b/pytorch3d/implicitron/models/implicit_function/decoding_functions.py
@@ -16,11 +16,14 @@ This file contains
 import logging
 from dataclasses import field
 from enum import Enum
 from typing import Dict, Optional, Tuple
 import torch
 from omegaconf import DictConfig
 from pytorch3d.implicitron.tools.config import (
    Configurable,
    registry,
--- a/pytorch3d/implicitron/models/implicit_function/idr_feature_field.py
+++ b/pytorch3d/implicitron/models/implicit_function/idr_feature_field.py
@@ -11,6 +11,7 @@ import torch
 from pytorch3d.implicitron.models.renderer.base import ImplicitronRayBundle
 from pytorch3d.implicitron.tools.config import registry
 from pytorch3d.renderer.implicit import HarmonicEmbedding
 from torch import nn
 from .base import ImplicitFunctionBase
--- a/pytorch3d/implicitron/models/implicit_function/neural_radiance_field.py
+++ b/pytorch3d/implicitron/models/implicit_function/neural_radiance_field.py
@@ -21,6 +21,7 @@ from pytorch3d.renderer.implicit import HarmonicEmbedding
 from pytorch3d.renderer.implicit.utils import ray_bundle_to_ray_points
 from .base import ImplicitFunctionBase
 from .decoding_functions import (  # noqa
    _xavier_init,
    MLPWithInputSkips,
--- a/pytorch3d/implicitron/models/implicit_function/utils.py
+++ b/pytorch3d/implicitron/models/implicit_function/utils.py
@@ -9,6 +9,7 @@
 from typing import Callable, Optional
 import torch
 import torch.nn.functional as F
 from pytorch3d.common.compat import prod
 from pytorch3d.implicitron.models.renderer.base import ImplicitronRayBundle
--- a/pytorch3d/implicitron/models/implicit_function/voxel_grid.py
+++ b/pytorch3d/implicitron/models/implicit_function/voxel_grid.py
@@ -21,6 +21,8 @@ import logging
 import warnings
 from collections.abc import Mapping
 from dataclasses import dataclass, field
 from distutils.version import LooseVersion
 from typing import Any, Callable, ClassVar, Dict, Iterator, List, Optional, Tuple, Type
 import torch
@@ -220,8 +222,7 @@ class VoxelGridBase(ReplaceableBase, torch.nn.Module):
                + "| 'bicubic' | 'linear' | 'area' | 'nearest-exact'"
            )
-        # We assume PyTorch 1.11 and newer.
+        interpolate_has_antialias = LooseVersion(torch.__version__) >= "1.11"
        interpolate_has_antialias = True
        if antialias and not interpolate_has_antialias:
            warnings.warn("Antialiased interpolation requires PyTorch 1.11+; ignoring")
--- a/pytorch3d/implicitron/models/implicit_function/voxel_grid_implicit_function.py
+++ b/pytorch3d/implicitron/models/implicit_function/voxel_grid_implicit_function.py
@@ -13,7 +13,9 @@ from dataclasses import fields
 from typing import Callable, Dict, Optional, Tuple
 import torch
 from omegaconf import DictConfig
 from pytorch3d.implicitron.models.implicit_function.base import ImplicitFunctionBase
 from pytorch3d.implicitron.models.implicit_function.decoding_functions import (
    DecoderFunctionBase,
--- a/pytorch3d/implicitron/models/overfit_model.py
+++ b/pytorch3d/implicitron/models/overfit_model.py
@@ -17,6 +17,7 @@ from typing import Any, Callable, Dict, List, Optional, Tuple, TYPE_CHECKING, Un
 import torch
 from omegaconf import DictConfig
 from pytorch3d.implicitron.models.base_model import (
    ImplicitronModelBase,
    ImplicitronRender,
@@ -27,6 +28,7 @@ from pytorch3d.implicitron.models.metrics import (
    RegularizationMetricsBase,
    ViewMetricsBase,
 )
 from pytorch3d.implicitron.models.renderer.base import (
    BaseRenderer,
    EvaluationMode,
@@ -48,6 +50,7 @@ from pytorch3d.implicitron.tools.config import (
    registry,
    run_auto_creation,
 )
 from pytorch3d.implicitron.tools.rasterize_mc import rasterize_sparse_ray_bundle
 from pytorch3d.renderer import utils as rend_utils
 from pytorch3d.renderer.cameras import CamerasBase
--- a/pytorch3d/implicitron/models/renderer/ray_point_refiner.py
+++ b/pytorch3d/implicitron/models/renderer/ray_point_refiner.py
@@ -11,6 +11,7 @@ import copy
 import torch
 from pytorch3d.implicitron.models.renderer.base import ImplicitronRayBundle
 from pytorch3d.implicitron.tools.config import Configurable, expand_args_fields
 from pytorch3d.renderer.implicit.sample_pdf import sample_pdf
--- a/pytorch3d/implicitron/models/renderer/rgb_net.py
+++ b/pytorch3d/implicitron/models/renderer/rgb_net.py
@@ -12,6 +12,7 @@ import torch
 from pytorch3d.implicitron.models.renderer.base import ImplicitronRayBundle
 from pytorch3d.implicitron.tools.config import enable_get_default_args
 from pytorch3d.renderer.implicit import HarmonicEmbedding
 from torch import nn
--- a/pytorch3d/implicitron/models/utils.py
+++ b/pytorch3d/implicitron/models/utils.py
@@ -17,8 +17,11 @@ from typing import Any, Dict, Optional, Tuple
 import torch
 import tqdm
 from pytorch3d.common.compat import prod
 from pytorch3d.implicitron.models.renderer.base import ImplicitronRayBundle
 from pytorch3d.implicitron.tools import image_utils
 from pytorch3d.implicitron.tools.utils import cat_dataclass
@@ -80,9 +83,9 @@ def preprocess_input(
    if mask_depths and fg_mask is not None and depth_map is not None:
        # mask the depths
-        assert mask_threshold > 0.0, (
+        assert (
-            "Depths should be masked only with thresholded masks"
+            mask_threshold > 0.0
-        )
+        ), "Depths should be masked only with thresholded masks"
        warnings.warn("Masking depths!")
        depth_map = depth_map * fg_mask
--- a/pytorch3d/implicitron/models/visualization/render_flyaround.py
+++ b/pytorch3d/implicitron/models/visualization/render_flyaround.py
@@ -304,11 +304,11 @@ def _show_predictions(
    assert isinstance(preds, list)
    pred_all = []
-    # Randomly choose a subset of the rendered images, sort by order in the sequence
+    # Randomly choose a subset of the rendered images, sort by ordr in the sequence
    n_samples = min(n_samples, len(preds))
    pred_idx = sorted(random.sample(list(range(len(preds))), n_samples))
    for predi in pred_idx:
-        # Make the concatenation for the same camera vertically
+        # Make the concatentation for the same camera vertically
        pred_all.append(
            torch.cat(
                [
@@ -359,7 +359,7 @@ def _generate_prediction_videos(
    vws = {}
    for k in predicted_keys:
        if k not in preds[0]:
-            logger.warning(f"Cannot generate video for prediction key '{k}'")
+            logger.warn(f"Cannot generate video for prediction key '{k}'")
            continue
        cache_dir = (
            None
--- a/pytorch3d/implicitron/tools/rasterize_mc.py
+++ b/pytorch3d/implicitron/tools/rasterize_mc.py
@@ -10,6 +10,7 @@ import math
 from typing import Optional, Tuple
 import pytorch3d
 import torch
 from pytorch3d.ops import packed_to_padded
 from pytorch3d.renderer import PerspectiveCameras
--- a/pytorch3d/implicitron/tools/stats.py
+++ b/pytorch3d/implicitron/tools/stats.py
@@ -499,7 +499,7 @@ class StatsJSONEncoder(json.JSONEncoder):
            return enc
        else:
            raise TypeError(
-                f"Object of type {o.__class__.__name__} is not JSON serializable"
+                f"Object of type {o.__class__.__name__} " f"is not JSON serializable"
            )
--- a/pytorch3d/implicitron/tools/video_writer.py
+++ b/pytorch3d/implicitron/tools/video_writer.py
@@ -17,6 +17,7 @@ import matplotlib
 import matplotlib.pyplot as plt
 import numpy as np
 import torch
 from PIL import Image
 _NO_TORCHVISION = False
--- a/pytorch3d/io/obj_io.py
+++ b/pytorch3d/io/obj_io.py
@@ -796,7 +796,7 @@ def save_obj(
        # Create .mtl file with the material name and texture map filename
        # TODO: enable material properties to also be saved.
        with _open_file(mtl_path, path_manager, "w") as f_mtl:
-            lines = f"newmtl mesh\nmap_Kd {output_path.stem}.png\n"
+            lines = f"newmtl mesh\n" f"map_Kd {output_path.stem}.png\n"
            f_mtl.write(lines)
--- a/pytorch3d/loss/init.py
+++ b/pytorch3d/loss/init.py
@@ -8,8 +8,11 @@
 from .chamfer import chamfer_distance
 from .mesh_edge_loss import mesh_edge_loss
 from .mesh_laplacian_smoothing import mesh_laplacian_smoothing
 from .mesh_normal_consistency import mesh_normal_consistency
 from .point_mesh_distance import point_mesh_edge_distance, point_mesh_face_distance
--- a/Show More
+++ b/Show More
`@@ -6,4 +6,4 @@`

	`# pyre-unsafe`	`# pyre-unsafe`

	`__version__ = "0.7.9"`	`__version__ = "0.7.8"`