Melvin He 3aee2a6005 Fixes bus error hard crashes on Apple Silicon MPS devices
Summary:
Fixes hard crashes (bus errors) when using MPS device (Apple Silicon) by implementing CPU checks throughout files in csrc subdirectories to check if on same mesh on a CPU device.

Note that this is the fourth and ultimate part of a larger change through multiple files & directories.

Reviewed By: bottler

Differential Revision: D77698176

fbshipit-source-id: 5bc9e3c5cea61afd486aed7396f390d92775ec6d
2025-07-03 12:34:37 -07:00

563 lines
25 KiB
C++

/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/
#pragma once
#include <torch/extension.h>
#include <cstdio>
#include <tuple>
#include "rasterize_coarse/rasterize_coarse.h"
#include "utils/pytorch3d_cutils.h"
// ****************************************************************************
// * FORWARD PASS *
// ****************************************************************************
std::tuple<torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor>
RasterizeMeshesNaiveCpu(
const torch::Tensor& face_verts,
const torch::Tensor& mesh_to_face_first_idx,
const torch::Tensor& num_faces_per_mesh,
const torch::Tensor& clipped_faces_neighbor_idx,
const std::tuple<int, int> image_size,
const float blur_radius,
const int faces_per_pixel,
const bool perspective_correct,
const bool clip_barycentric_coords,
const bool cull_backfaces);
#ifdef WITH_CUDA
std::tuple<at::Tensor, at::Tensor, at::Tensor, at::Tensor>
RasterizeMeshesNaiveCuda(
const at::Tensor& face_verts,
const at::Tensor& mesh_to_face_first_idx,
const at::Tensor& num_faces_per_mesh,
const torch::Tensor& clipped_faces_neighbor_idx,
const std::tuple<int, int> image_size,
const float blur_radius,
const int num_closest,
const bool perspective_correct,
const bool clip_barycentric_coords,
const bool cull_backfaces);
#endif
// Forward pass for rasterizing a batch of meshes.
//
// Args:
// face_verts: Tensor of shape (F, 3, 3) giving (packed) vertex positions for
// faces in all the meshes in the batch. Concretely,
// face_verts[f, i] = [x, y, z] gives the coordinates for the
// ith vertex of the fth face. These vertices are expected to be
// in NDC coordinates in the range [-1, 1].
// mesh_to_face_first_idx: LongTensor of shape (N) giving the index in
// faces_verts of the first face in each mesh in
// the batch where N is the batch size.
// num_faces_per_mesh: LongTensor of shape (N) giving the number of faces
// for each mesh in the batch.
// clipped_faces_neighbor_idx: LongTensor of shape (F,) giving the
// index of the neighboring face for each face which was clipped to a
// quadrilateral and then divided into two triangles.
// e.g. for a face f partially behind the image plane which is split into
// two triangles (t1, t2): clipped_faces_neighbor_idx[t1_idx] = t2_idx
// Faces which are not clipped and subdivided are set to -1.
// image_size: Tuple (H, W) giving the size in pixels of the output
// image to be rasterized.
// blur_radius: float distance in NDC coordinates uses to expand the face
// bounding boxes for the rasterization. Set to 0.0 if no blur
// is required.
// faces_per_pixel: the number of closeset faces to rasterize per pixel.
// perspective_correct: Whether to apply perspective correction when
// computing barycentric coordinates. If this is True,
// then this function returns world-space barycentric
// coordinates for each pixel; if this is False then
// this function instead returns screen-space
// barycentric coordinates for each pixel.
// clip_barycentric_coords: Whether, after any perspective correction
// is applied but before the depth is calculated (e.g. for
// z clipping), to "correct" a location outside the face (i.e. with
// a negative barycentric coordinate) to a position on the edge of the
// face.
// cull_backfaces: Bool, Whether to only rasterize mesh faces which are
// visible to the camera. This assumes that vertices of
// front-facing triangles are ordered in an anti-clockwise
// fashion, and triangles that face away from the camera are
// in a clockwise order relative to the current view
// direction. NOTE: This will only work if the mesh faces are
// consistently defined with counter-clockwise ordering when
// viewed from the outside.
//
// Returns:
// A 4 element tuple of:
// pix_to_face: int64 tensor of shape (N, H, W, K) giving the face index of
// each of the closest faces to the pixel in the rasterized
// image, or -1 for pixels that are not covered by any face.
// zbuf: float32 Tensor of shape (N, H, W, K) giving the depth of each of
// the closest faces for each pixel.
// barycentric_coords: float tensor of shape (N, H, W, K, 3) giving
// barycentric coordinates of the pixel with respect to
// each of the closest faces along the z axis, padded
// with -1 for pixels hit by fewer than
// faces_per_pixel faces.
// dists: float tensor of shape (N, H, W, K) giving the euclidean distance
// in the (NDC) x/y plane between each pixel and its K closest
// faces along the z axis padded with -1 for pixels hit by fewer than
// faces_per_pixel faces.
inline std::tuple<torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor>
RasterizeMeshesNaive(
const torch::Tensor& face_verts,
const torch::Tensor& mesh_to_face_first_idx,
const torch::Tensor& num_faces_per_mesh,
const torch::Tensor& clipped_faces_neighbor_idx,
const std::tuple<int, int> image_size,
const float blur_radius,
const int faces_per_pixel,
const bool perspective_correct,
const bool clip_barycentric_coords,
const bool cull_backfaces) {
// TODO: Better type checking.
if (face_verts.is_cuda()) {
#ifdef WITH_CUDA
CHECK_CUDA(face_verts);
CHECK_CUDA(mesh_to_face_first_idx);
CHECK_CUDA(num_faces_per_mesh);
return RasterizeMeshesNaiveCuda(
face_verts,
mesh_to_face_first_idx,
num_faces_per_mesh,
clipped_faces_neighbor_idx,
image_size,
blur_radius,
faces_per_pixel,
perspective_correct,
clip_barycentric_coords,
cull_backfaces);
#else
AT_ERROR("Not compiled with GPU support");
#endif
} else {
CHECK_CPU(face_verts);
CHECK_CPU(mesh_to_face_first_idx);
CHECK_CPU(num_faces_per_mesh);
return RasterizeMeshesNaiveCpu(
face_verts,
mesh_to_face_first_idx,
num_faces_per_mesh,
clipped_faces_neighbor_idx,
image_size,
blur_radius,
faces_per_pixel,
perspective_correct,
clip_barycentric_coords,
cull_backfaces);
}
}
// ****************************************************************************
// * BACKWARD PASS *
// ****************************************************************************
torch::Tensor RasterizeMeshesBackwardCpu(
const torch::Tensor& face_verts,
const torch::Tensor& pix_to_face,
const torch::Tensor& grad_zbuf,
const torch::Tensor& grad_bary,
const torch::Tensor& grad_dists,
const bool perspective_correct,
const bool clip_barycentric_coords);
#ifdef WITH_CUDA
torch::Tensor RasterizeMeshesBackwardCuda(
const torch::Tensor& face_verts,
const torch::Tensor& pix_to_face,
const torch::Tensor& grad_zbuf,
const torch::Tensor& grad_bary,
const torch::Tensor& grad_dists,
const bool perspective_correct,
const bool clip_barycentric_coords);
#endif
// Args:
// face_verts: float32 Tensor of shape (F, 3, 3) (from forward pass) giving
// (packed) vertex positions for faces in all the meshes in
// the batch.
// pix_to_face: int64 tensor of shape (N, H, W, K) giving the face index of
// each of the closest faces to the pixel in the rasterized
// image, or -1 for pixels that are not covered by any face.
// grad_zbuf: Tensor of shape (N, H, W, K) giving upstream gradients
// d(loss)/d(zbuf) of the zbuf tensor from the forward pass.
// grad_bary: Tensor of shape (N, H, W, K, 3) giving upstream gradients
// d(loss)/d(bary) of the barycentric_coords tensor returned by
// the forward pass.
// grad_dists: Tensor of shape (N, H, W, K) giving upstream gradients
// d(loss)/d(dists) of the dists tensor from the forward pass.
// perspective_correct: Whether to apply perspective correction when
// computing barycentric coordinates. If this is True,
// then this function returns world-space barycentric
// coordinates for each pixel; if this is False then
// this function instead returns screen-space
// barycentric coordinates for each pixel.
// clip_barycentric_coords: Whether, after any perspective correction
// is applied but before the depth is calculated (e.g. for
// z clipping), to "correct" a location outside the face (i.e. with
// a negative barycentric coordinate) to a position on the edge of the
// face.
//
// Returns:
// grad_face_verts: float32 Tensor of shape (F, 3, 3) giving downstream
// gradients for the face vertices.
torch::Tensor RasterizeMeshesBackward(
const torch::Tensor& face_verts,
const torch::Tensor& pix_to_face,
const torch::Tensor& grad_zbuf,
const torch::Tensor& grad_bary,
const torch::Tensor& grad_dists,
const bool perspective_correct,
const bool clip_barycentric_coords) {
if (face_verts.is_cuda()) {
#ifdef WITH_CUDA
CHECK_CUDA(face_verts);
CHECK_CUDA(pix_to_face);
CHECK_CUDA(grad_zbuf);
CHECK_CUDA(grad_bary);
CHECK_CUDA(grad_dists);
return RasterizeMeshesBackwardCuda(
face_verts,
pix_to_face,
grad_zbuf,
grad_bary,
grad_dists,
perspective_correct,
clip_barycentric_coords);
#else
AT_ERROR("Not compiled with GPU support");
#endif
} else {
CHECK_CPU(face_verts);
CHECK_CPU(pix_to_face);
CHECK_CPU(grad_zbuf);
CHECK_CPU(grad_bary);
CHECK_CPU(grad_dists);
return RasterizeMeshesBackwardCpu(
face_verts,
pix_to_face,
grad_zbuf,
grad_bary,
grad_dists,
perspective_correct,
clip_barycentric_coords);
}
}
// ****************************************************************************
// * COARSE RASTERIZATION *
// ****************************************************************************
// RasterizeMeshesCoarseCuda in rasterize_coarse/rasterize_coarse.h
torch::Tensor RasterizeMeshesCoarseCpu(
const torch::Tensor& face_verts,
const at::Tensor& mesh_to_face_first_idx,
const at::Tensor& num_faces_per_mesh,
const std::tuple<int, int> image_size,
const float blur_radius,
const int bin_size,
const int max_faces_per_bin);
// Args:
// face_verts: Tensor of shape (F, 3, 3) giving (packed) vertex positions for
// faces in all the meshes in the batch. Concretely,
// face_verts[f, i] = [x, y, z] gives the coordinates for the
// ith vertex of the fth face. These vertices are expected to be
// in NDC coordinates in the range [-1, 1].
// mesh_to_face_first_idx: LongTensor of shape (N) giving the index in
// faces_verts of the first face in each mesh in
// the batch where N is the batch size.
// num_faces_per_mesh: LongTensor of shape (N) giving the number of faces
// for each mesh in the batch.
// image_size: Tuple (H, W) giving the size in pixels of the output
// image to be rasterized.
// blur_radius: float distance in NDC coordinates uses to expand the face
// bounding boxes for the rasterization. Set to 0.0 if no blur
// is required.
// bin_size: Size of each bin within the image (in pixels)
// max_faces_per_bin: Maximum number of faces to count in each bin.
//
// Returns:
// bin_face_idxs: Tensor of shape (N, num_bins, num_bins, K) giving the
// indices of faces that fall into each bin.
torch::Tensor RasterizeMeshesCoarse(
const torch::Tensor& face_verts,
const torch::Tensor& mesh_to_face_first_idx,
const torch::Tensor& num_faces_per_mesh,
const std::tuple<int, int> image_size,
const float blur_radius,
const int bin_size,
const int max_faces_per_bin) {
if (face_verts.is_cuda()) {
#ifdef WITH_CUDA
CHECK_CUDA(face_verts);
CHECK_CUDA(mesh_to_face_first_idx);
CHECK_CUDA(num_faces_per_mesh);
return RasterizeMeshesCoarseCuda(
face_verts,
mesh_to_face_first_idx,
num_faces_per_mesh,
image_size,
blur_radius,
bin_size,
max_faces_per_bin);
#else
AT_ERROR("Not compiled with GPU support");
#endif
} else {
CHECK_CPU(face_verts);
CHECK_CPU(mesh_to_face_first_idx);
CHECK_CPU(num_faces_per_mesh);
return RasterizeMeshesCoarseCpu(
face_verts,
mesh_to_face_first_idx,
num_faces_per_mesh,
image_size,
blur_radius,
bin_size,
max_faces_per_bin);
}
}
// ****************************************************************************
// * FINE RASTERIZATION *
// ****************************************************************************
#ifdef WITH_CUDA
std::tuple<torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor>
RasterizeMeshesFineCuda(
const torch::Tensor& face_verts,
const torch::Tensor& bin_faces,
const torch::Tensor& clipped_faces_neighbor_idx,
const std::tuple<int, int> image_size,
const float blur_radius,
const int bin_size,
const int faces_per_pixel,
const bool perspective_correct,
const bool clip_barycentric_coords,
const bool cull_backfaces);
#endif
// Args:
// face_verts: Tensor of shape (F, 3, 3) giving (packed) vertex positions for
// faces in all the meshes in the batch. Concretely,
// face_verts[f, i] = [x, y, z] gives the coordinates for the
// ith vertex of the fth face. These vertices are expected to be
// in NDC coordinates in the range [-1, 1].
// bin_faces: int32 Tensor of shape (N, B, B, M) giving the indices of faces
// that fall into each bin (output from coarse rasterization).
// clipped_faces_neighbor_idx: LongTensor of shape (F,) giving the
// index of the neighboring face for each face which was clipped to a
// quadrilateral and then divided into two triangles.
// e.g. for a face f partially behind the image plane which is split into
// two triangles (t1, t2): clipped_faces_neighbor_idx[t1_idx] = t2_idx
// Faces which are not clipped and subdivided are set to -1.
// image_size: Tuple (H, W) giving the size in pixels of the output
// image to be rasterized.
// blur_radius: float distance in NDC coordinates uses to expand the face
// bounding boxes for the rasterization. Set to 0.0 if no blur
// is required.
// bin_size: Size of each bin within the image (in pixels)
// faces_per_pixel: the number of closeset faces to rasterize per pixel.
// perspective_correct: Whether to apply perspective correction when
// computing barycentric coordinates. If this is True,
// then this function returns world-space barycentric
// coordinates for each pixel; if this is False then
// this function instead returns screen-space
// barycentric coordinates for each pixel.
// clip_barycentric_coords: Whether, after any perspective correction
// is applied but before the depth is calculated (e.g. for
// z clipping), to "correct" a location outside the face (i.e. with
// a negative barycentric coordinate) to a position on the edge of the
// face.
// cull_backfaces: Bool, Whether to only rasterize mesh faces which are
// visible to the camera. This assumes that vertices of
// front-facing triangles are ordered in an anti-clockwise
// fashion, and triangles that face away from the camera are
// in a clockwise order relative to the current view
// direction. NOTE: This will only work if the mesh faces are
// consistently defined with counter-clockwise ordering when
// viewed from the outside.
//
// Returns (same as rasterize_meshes):
// A 4 element tuple of:
// pix_to_face: int64 tensor of shape (N, H, W, K) giving the face index of
// each of the closest faces to the pixel in the rasterized
// image, or -1 for pixels that are not covered by any face.
// zbuf: float32 Tensor of shape (N, H, W, K) giving the depth of each of
// the closest faces for each pixel.
// barycentric_coords: float tensor of shape (N, H, W, K, 3) giving
// barycentric coordinates of the pixel with respect to
// each of the closest faces along the z axis, padded
// with -1 for pixels hit by fewer than
// faces_per_pixel faces.
// dists: float tensor of shape (N, H, W, K) giving the euclidean distance
// in the (NDC) x/y plane between each pixel and its K closest
// faces along the z axis padded with -1 for pixels hit by fewer than
// faces_per_pixel faces.
std::tuple<torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor>
RasterizeMeshesFine(
const torch::Tensor& face_verts,
const torch::Tensor& bin_faces,
const torch::Tensor& clipped_faces_neighbor_idx,
const std::tuple<int, int> image_size,
const float blur_radius,
const int bin_size,
const int faces_per_pixel,
const bool perspective_correct,
const bool clip_barycentric_coords,
const bool cull_backfaces) {
if (face_verts.is_cuda()) {
#ifdef WITH_CUDA
CHECK_CUDA(face_verts);
CHECK_CUDA(bin_faces);
return RasterizeMeshesFineCuda(
face_verts,
bin_faces,
clipped_faces_neighbor_idx,
image_size,
blur_radius,
bin_size,
faces_per_pixel,
perspective_correct,
clip_barycentric_coords,
cull_backfaces);
#else
AT_ERROR("Not compiled with GPU support");
#endif
} else {
CHECK_CPU(face_verts);
CHECK_CPU(bin_faces);
AT_ERROR("NOT IMPLEMENTED");
}
}
// ****************************************************************************
// * MAIN ENTRY POINT *
// ****************************************************************************
// This is the main entry point for the forward pass of the mesh rasterizer;
// it uses either naive or coarse-to-fine rasterization based on bin_size.
//
// Args:
// face_verts: Tensor of shape (F, 3, 3) giving (packed) vertex positions for
// faces in all the meshes in the batch. Concretely,
// face_verts[f, i] = [x, y, z] gives the coordinates for the
// ith vertex of the fth face. These vertices are expected to be
// in NDC coordinates in the range [-1, 1].
// mesh_to_face_first_idx: LongTensor of shape (N) giving the index in
// faces_verts of the first face in each mesh in
// the batch where N is the batch size.
// num_faces_per_mesh: LongTensor of shape (N) giving the number of faces
// for each mesh in the batch.
// clipped_faces_neighbor_idx: LongTensor of shape (F,) giving the
// index of the neighboring face for each face which was clipped to a
// quadrilateral and then divided into two triangles.
// e.g. for a face f partially behind the image plane which is split into
// two triangles (t1, t2): clipped_faces_neighbor_idx[t1_idx] = t2_idx
// Faces which are not clipped and subdivided are set to -1.
// image_size: Tuple (H, W) giving the size in pixels of the output
// image to be rasterized.
// blur_radius: float distance in NDC coordinates uses to expand the face
// bounding boxes for the rasterization. Set to 0.0 if no blur
// is required.
// faces_per_pixel: the number of closeset faces to rasterize per pixel.
// bin_size: Bin size (in pixels) for coarse-to-fine rasterization. Setting
// bin_size=0 uses naive rasterization instead.
// max_faces_per_bin: The maximum number of faces allowed to fall into each
// bin when using coarse-to-fine rasterization.
// perspective_correct: Whether to apply perspective correction when
// computing barycentric coordinates. If this is True,
// then this function returns world-space barycentric
// coordinates for each pixel; if this is False then
// this function instead returns screen-space
// barycentric coordinates for each pixel.
// clip_barycentric_coords: Whether, after any perspective correction
// is applied but before the depth is calculated (e.g. for
// z clipping), to "correct" a location outside the face (i.e. with
// a negative barycentric coordinate) to a position on the edge of the
// face.
// cull_backfaces: Bool, Whether to only rasterize mesh faces which are
// visible to the camera. This assumes that vertices of
// front-facing triangles are ordered in an anti-clockwise
// fashion, and triangles that face away from the camera are
// in a clockwise order relative to the current view
// direction. NOTE: This will only work if the mesh faces are
// consistently defined with counter-clockwise ordering when
// viewed from the outside.
//
// Returns:
// A 4 element tuple of:
// pix_to_face: int64 tensor of shape (N, H, W, K) giving the face index of
// each of the closest faces to the pixel in the rasterized
// image, or -1 for pixels that are not covered by any face.
// zbuf: float32 Tensor of shape (N, H, W, K) giving the depth of each of
// the closest faces for each pixel.
// barycentric_coords: float tensor of shape (N, H, W, K, 3) giving
// barycentric coordinates of the pixel with respect to
// each of the closest faces along the z axis, padded
// with -1 for pixels hit by fewer than
// faces_per_pixel faces.
// dists: float tensor of shape (N, H, W, K) giving the euclidean distance
// in the (NDC) x/y plane between each pixel and its K closest
// faces along the z axis padded with -1 for pixels hit by fewer than
// faces_per_pixel faces.
std::tuple<torch::Tensor, torch::Tensor, torch::Tensor, torch::Tensor>
RasterizeMeshes(
const torch::Tensor& face_verts,
const torch::Tensor& mesh_to_face_first_idx,
const torch::Tensor& num_faces_per_mesh,
const torch::Tensor& clipped_faces_neighbor_idx,
const std::tuple<int, int> image_size,
const float blur_radius,
const int faces_per_pixel,
const int bin_size,
const int max_faces_per_bin,
const bool perspective_correct,
const bool clip_barycentric_coords,
const bool cull_backfaces) {
if (bin_size > 0 && max_faces_per_bin > 0) {
// Use coarse-to-fine rasterization
at::Tensor bin_faces = RasterizeMeshesCoarse(
face_verts,
mesh_to_face_first_idx,
num_faces_per_mesh,
image_size,
blur_radius,
bin_size,
max_faces_per_bin);
return RasterizeMeshesFine(
face_verts,
bin_faces,
clipped_faces_neighbor_idx,
image_size,
blur_radius,
bin_size,
faces_per_pixel,
perspective_correct,
clip_barycentric_coords,
cull_backfaces);
} else {
// Use the naive per-pixel implementation
return RasterizeMeshesNaive(
face_verts,
mesh_to_face_first_idx,
num_faces_per_mesh,
clipped_faces_neighbor_idx,
image_size,
blur_radius,
faces_per_pixel,
perspective_correct,
clip_barycentric_coords,
cull_backfaces);
}
}