Files
pytorch3d/pytorch3d/csrc/point_mesh/point_mesh_cuda.h
Melvin He c5ea8fa49e Adds CHECK_CPU macros checks for tensors not on CPU
Summary:
Adds CHECK_CPU macros that checks if a tensor is on the CPU device throughout csrc directories and subdir up to `pulsar`.

Note that this is the third part of a larger change, and to keep diffs better organized, subsequent diffs will update the remaining directories.

Reviewed By: bottler

Differential Revision: D77696998

fbshipit-source-id: 470ca65b23d9965483b5bdd30c712da8e1131787
2025-07-03 08:29:36 -07:00

750 lines
25 KiB
C++

/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/
#pragma once
#include <torch/extension.h>
#include <cstdio>
#include <tuple>
#include "utils/pytorch3d_cutils.h"
// ****************************************************************************
// * PointFaceDistance *
// ****************************************************************************
// Computes the squared euclidean distance of each p in points to it closest
// triangular face belonging to the corresponding mesh example in the batch of
// size N.
//
// Args:
// points: FloatTensor of shape (P, 3)
// points_first_idx: LongTensor of shape (N,) indicating the first point
// index for each example in the batch
// tris: FloatTensor of shape (T, 3, 3) of the triangular faces. The t-th
// triangular face is spanned by (tris[t, 0], tris[t, 1], tris[t, 2])
// tris_first_idx: LongTensor of shape (N,) indicating the first face
// index for each example in the batch
// max_points: Scalar equal to max(P_i) for i in [0, N - 1] containing
// the maximum number of points in the batch and is used to set
// the block dimensions in the CUDA implementation.
// min_triangle_area: triangles less than this size are considered
// points/lines.
//
// Returns:
// dists: FloatTensor of shape (P,), where dists[p] is the minimum
// squared euclidean distance of points[p] to the faces in the same
// example in the batch.
// idxs: LongTensor of shape (P,), where idxs[p] is the index of the closest
// face in the batch.
// So, dists[p] = d(points[p], tris[idxs[p], 0], tris[idxs[p], 1],
// tris[idxs[p], 2]) where d(u, v0, v1, v2) is the distance of u from the
// face spanned by (v0, v1, v2)
//
//
#ifdef WITH_CUDA
std::tuple<torch::Tensor, torch::Tensor> PointFaceDistanceForwardCuda(
const torch::Tensor& points,
const torch::Tensor& points_first_idx,
const torch::Tensor& tris,
const torch::Tensor& tris_first_idx,
const int64_t max_points,
const double min_triangle_area);
#endif
std::tuple<torch::Tensor, torch::Tensor> PointFaceDistanceForwardCpu(
const torch::Tensor& points,
const torch::Tensor& points_first_idx,
const torch::Tensor& tris,
const torch::Tensor& tris_first_idx,
const double min_triangle_area);
std::tuple<torch::Tensor, torch::Tensor> PointFaceDistanceForward(
const torch::Tensor& points,
const torch::Tensor& points_first_idx,
const torch::Tensor& tris,
const torch::Tensor& tris_first_idx,
const int64_t max_points,
const double min_triangle_area) {
if (points.is_cuda()) {
#ifdef WITH_CUDA
CHECK_CUDA(points);
CHECK_CUDA(points_first_idx);
CHECK_CUDA(tris);
CHECK_CUDA(tris_first_idx);
return PointFaceDistanceForwardCuda(
points,
points_first_idx,
tris,
tris_first_idx,
max_points,
min_triangle_area);
#else
AT_ERROR("Not compiled with GPU support.");
#endif
}
CHECK_CPU(points);
CHECK_CPU(points_first_idx);
CHECK_CPU(tris);
CHECK_CPU(tris_first_idx);
return PointFaceDistanceForwardCpu(
points, points_first_idx, tris, tris_first_idx, min_triangle_area);
}
// Backward pass for PointFaceDistance.
//
// Args:
// points: FloatTensor of shape (P, 3)
// tris: FloatTensor of shape (T, 3, 3)
// idx_points: LongTensor of shape (P,) containing the indices
// of the closest face in the example in the batch.
// This is computed by the forward pass
// grad_dists: FloatTensor of shape (P,)
// min_triangle_area: triangles less than this size are considered
// points/lines.
//
// Returns:
// grad_points: FloatTensor of shape (P, 3)
// grad_tris: FloatTensor of shape (T, 3, 3)
//
#ifdef WITH_CUDA
std::tuple<torch::Tensor, torch::Tensor> PointFaceDistanceBackwardCuda(
const torch::Tensor& points,
const torch::Tensor& tris,
const torch::Tensor& idx_points,
const torch::Tensor& grad_dists,
const double min_triangle_area);
#endif
std::tuple<torch::Tensor, torch::Tensor> PointFaceDistanceBackwardCpu(
const torch::Tensor& points,
const torch::Tensor& tris,
const torch::Tensor& idx_points,
const torch::Tensor& grad_dists,
const double min_triangle_area);
std::tuple<torch::Tensor, torch::Tensor> PointFaceDistanceBackward(
const torch::Tensor& points,
const torch::Tensor& tris,
const torch::Tensor& idx_points,
const torch::Tensor& grad_dists,
const double min_triangle_area) {
if (points.is_cuda()) {
#ifdef WITH_CUDA
CHECK_CUDA(points);
CHECK_CUDA(tris);
CHECK_CUDA(idx_points);
CHECK_CUDA(grad_dists);
return PointFaceDistanceBackwardCuda(
points, tris, idx_points, grad_dists, min_triangle_area);
#else
AT_ERROR("Not compiled with GPU support.");
#endif
}
CHECK_CPU(points);
CHECK_CPU(tris);
CHECK_CPU(idx_points);
CHECK_CPU(grad_dists);
return PointFaceDistanceBackwardCpu(
points, tris, idx_points, grad_dists, min_triangle_area);
}
// ****************************************************************************
// * FacePointDistance *
// ****************************************************************************
// Computes the squared euclidean distance of each triangular face to its
// closest point belonging to the corresponding example in the batch of size N.
//
// Args:
// points: FloatTensor of shape (P, 3)
// points_first_idx: LongTensor of shape (N,) indicating the first point
// index for each example in the batch
// tris: FloatTensor of shape (T, 3, 3) of the triangular faces. The t-th
// triangular face is spanned by (tris[t, 0], tris[t, 1], tris[t, 2])
// tris_first_idx: LongTensor of shape (N,) indicating the first face
// index for each example in the batch
// max_tris: Scalar equal to max(T_i) for i in [0, N - 1] containing
// the maximum number of faces in the batch and is used to set
// the block dimensions in the CUDA implementation.
// min_triangle_area: triangles less than this size are considered
// points/lines.
//
// Returns:
// dists: FloatTensor of shape (T,), where dists[t] is the minimum squared
// euclidean distance of t-th triangular face from the closest point in
// the batch.
// idxs: LongTensor of shape (T,), where idxs[t] is the index of the closest
// point in the batch.
// So, dists[t] = d(points[idxs[t]], tris[t, 0], tris[t, 1], tris[t, 2])
// where d(u, v0, v1, v2) is the distance of u from the triangular face
// spanned by (v0, v1, v2)
//
#ifdef WITH_CUDA
std::tuple<torch::Tensor, torch::Tensor> FacePointDistanceForwardCuda(
const torch::Tensor& points,
const torch::Tensor& points_first_idx,
const torch::Tensor& tris,
const torch::Tensor& tris_first_idx,
const int64_t max_tris,
const double min_triangle_area);
#endif
std::tuple<torch::Tensor, torch::Tensor> FacePointDistanceForwardCpu(
const torch::Tensor& points,
const torch::Tensor& points_first_idx,
const torch::Tensor& tris,
const torch::Tensor& tris_first_idx,
const double min_triangle_area);
std::tuple<torch::Tensor, torch::Tensor> FacePointDistanceForward(
const torch::Tensor& points,
const torch::Tensor& points_first_idx,
const torch::Tensor& tris,
const torch::Tensor& tris_first_idx,
const int64_t max_tris,
const double min_triangle_area) {
if (points.is_cuda()) {
#ifdef WITH_CUDA
CHECK_CUDA(points);
CHECK_CUDA(points_first_idx);
CHECK_CUDA(tris);
CHECK_CUDA(tris_first_idx);
return FacePointDistanceForwardCuda(
points,
points_first_idx,
tris,
tris_first_idx,
max_tris,
min_triangle_area);
#else
AT_ERROR("Not compiled with GPU support.");
#endif
}
CHECK_CPU(points);
CHECK_CPU(points_first_idx);
CHECK_CPU(tris);
CHECK_CPU(tris_first_idx);
return FacePointDistanceForwardCpu(
points, points_first_idx, tris, tris_first_idx, min_triangle_area);
}
// Backward pass for FacePointDistance.
//
// Args:
// points: FloatTensor of shape (P, 3)
// tris: FloatTensor of shape (T, 3, 3)
// idx_tris: LongTensor of shape (T,) containing the indices
// of the closest point in the example in the batch.
// This is computed by the forward pass
// grad_dists: FloatTensor of shape (T,)
// min_triangle_area: triangles less than this size are considered
// points/lines.
//
// Returns:
// grad_points: FloatTensor of shape (P, 3)
// grad_tris: FloatTensor of shape (T, 3, 3)
//
#ifdef WITH_CUDA
std::tuple<torch::Tensor, torch::Tensor> FacePointDistanceBackwardCuda(
const torch::Tensor& points,
const torch::Tensor& tris,
const torch::Tensor& idx_tris,
const torch::Tensor& grad_dists,
const double min_triangle_area);
#endif
std::tuple<torch::Tensor, torch::Tensor> FacePointDistanceBackwardCpu(
const torch::Tensor& points,
const torch::Tensor& tris,
const torch::Tensor& idx_tris,
const torch::Tensor& grad_dists,
const double min_triangle_area);
std::tuple<torch::Tensor, torch::Tensor> FacePointDistanceBackward(
const torch::Tensor& points,
const torch::Tensor& tris,
const torch::Tensor& idx_tris,
const torch::Tensor& grad_dists,
const double min_triangle_area) {
if (points.is_cuda()) {
#ifdef WITH_CUDA
CHECK_CUDA(points);
CHECK_CUDA(tris);
CHECK_CUDA(idx_tris);
CHECK_CUDA(grad_dists);
return FacePointDistanceBackwardCuda(
points, tris, idx_tris, grad_dists, min_triangle_area);
#else
AT_ERROR("Not compiled with GPU support.");
#endif
}
CHECK_CPU(points);
CHECK_CPU(tris);
CHECK_CPU(idx_tris);
CHECK_CPU(grad_dists);
return FacePointDistanceBackwardCpu(
points, tris, idx_tris, grad_dists, min_triangle_area);
}
// ****************************************************************************
// * PointEdgeDistance *
// ****************************************************************************
// Computes the squared euclidean distance of each p in points to the closest
// mesh edge belonging to the corresponding example in the batch of size N.
//
// Args:
// points: FloatTensor of shape (P, 3)
// points_first_idx: LongTensor of shape (N,) indicating the first point
// index for each example in the batch
// segms: FloatTensor of shape (S, 2, 3) of edge segments. The s-th edge
// segment is spanned by (segms[s, 0], segms[s, 1])
// segms_first_idx: LongTensor of shape (N,) indicating the first edge
// index for each example in the batch
// max_points: Scalar equal to max(P_i) for i in [0, N - 1] containing
// the maximum number of points in the batch and is used to set
// the grid dimensions in the CUDA implementation.
//
// Returns:
// dists: FloatTensor of shape (P,), where dists[p] is the squared euclidean
// distance of points[p] to the closest edge in the same example in the
// batch.
// idxs: LongTensor of shape (P,), where idxs[p] is the index of the closest
// edge in the batch.
// So, dists[p] = d(points[p], segms[idxs[p], 0], segms[idxs[p], 1]),
// where d(u, v0, v1) is the distance of u from the segment spanned by
// (v0, v1).
//
#ifdef WITH_CUDA
std::tuple<torch::Tensor, torch::Tensor> PointEdgeDistanceForwardCuda(
const torch::Tensor& points,
const torch::Tensor& points_first_idx,
const torch::Tensor& segms,
const torch::Tensor& segms_first_idx,
const int64_t max_points);
#endif
std::tuple<torch::Tensor, torch::Tensor> PointEdgeDistanceForwardCpu(
const torch::Tensor& points,
const torch::Tensor& points_first_idx,
const torch::Tensor& segms,
const torch::Tensor& segms_first_idx,
const int64_t max_points);
std::tuple<torch::Tensor, torch::Tensor> PointEdgeDistanceForward(
const torch::Tensor& points,
const torch::Tensor& points_first_idx,
const torch::Tensor& segms,
const torch::Tensor& segms_first_idx,
const int64_t max_points) {
if (points.is_cuda()) {
#ifdef WITH_CUDA
CHECK_CUDA(points);
CHECK_CUDA(points_first_idx);
CHECK_CUDA(segms);
CHECK_CUDA(segms_first_idx);
return PointEdgeDistanceForwardCuda(
points, points_first_idx, segms, segms_first_idx, max_points);
#else
AT_ERROR("Not compiled with GPU support.");
#endif
}
CHECK_CPU(points);
CHECK_CPU(points_first_idx);
CHECK_CPU(segms);
CHECK_CPU(segms_first_idx);
return PointEdgeDistanceForwardCpu(
points, points_first_idx, segms, segms_first_idx, max_points);
}
// Backward pass for PointEdgeDistance.
//
// Args:
// points: FloatTensor of shape (P, 3)
// segms: FloatTensor of shape (S, 2, 3)
// idx_points: LongTensor of shape (P,) containing the indices
// of the closest edge in the example in the batch.
// This is computed by the forward pass.
// grad_dists: FloatTensor of shape (P,)
//
// Returns:
// grad_points: FloatTensor of shape (P, 3)
// grad_segms: FloatTensor of shape (S, 2, 3)
//
#ifdef WITH_CUDA
std::tuple<torch::Tensor, torch::Tensor> PointEdgeDistanceBackwardCuda(
const torch::Tensor& points,
const torch::Tensor& segms,
const torch::Tensor& idx_points,
const torch::Tensor& grad_dists);
#endif
std::tuple<torch::Tensor, torch::Tensor> PointEdgeDistanceBackwardCpu(
const torch::Tensor& points,
const torch::Tensor& segms,
const torch::Tensor& idx_points,
const torch::Tensor& grad_dists);
std::tuple<torch::Tensor, torch::Tensor> PointEdgeDistanceBackward(
const torch::Tensor& points,
const torch::Tensor& segms,
const torch::Tensor& idx_points,
const torch::Tensor& grad_dists) {
if (points.is_cuda()) {
#ifdef WITH_CUDA
CHECK_CUDA(points);
CHECK_CUDA(segms);
CHECK_CUDA(idx_points);
CHECK_CUDA(grad_dists);
return PointEdgeDistanceBackwardCuda(points, segms, idx_points, grad_dists);
#else
AT_ERROR("Not compiled with GPU support.");
#endif
}
CHECK_CPU(points);
CHECK_CPU(segms);
CHECK_CPU(idx_points);
CHECK_CPU(grad_dists);
return PointEdgeDistanceBackwardCpu(points, segms, idx_points, grad_dists);
}
// ****************************************************************************
// * EdgePointDistance *
// ****************************************************************************
// Computes the squared euclidean distance of each edge segment to the closest
// point belonging to the corresponding example in the batch of size N.
//
// Args:
// points: FloatTensor of shape (P, 3)
// points_first_idx: LongTensor of shape (N,) indicating the first point
// index for each example in the batch
// segms: FloatTensor of shape (S, 2, 3) of edge segments. The s-th edge
// segment is spanned by (segms[s, 0], segms[s, 1])
// segms_first_idx: LongTensor of shape (N,) indicating the first edge
// index for each example in the batch
// max_segms: Scalar equal to max(S_i) for i in [0, N - 1] containing
// the maximum number of edges in the batch and is used to set
// the block dimensions in the CUDA implementation.
//
// Returns:
// dists: FloatTensor of shape (S,), where dists[s] is the squared
// euclidean distance of s-th edge to the closest point in the
// corresponding example in the batch.
// idxs: LongTensor of shape (S,), where idxs[s] is the index of the closest
// point in the example in the batch.
// So, dists[s] = d(points[idxs[s]], segms[s, 0], segms[s, 1]), where
// d(u, v0, v1) is the distance of u from the segment spanned by (v0, v1)
//
//
#ifdef WITH_CUDA
std::tuple<torch::Tensor, torch::Tensor> EdgePointDistanceForwardCuda(
const torch::Tensor& points,
const torch::Tensor& points_first_idx,
const torch::Tensor& segms,
const torch::Tensor& segms_first_idx,
const int64_t max_segms);
#endif
std::tuple<torch::Tensor, torch::Tensor> EdgePointDistanceForwardCpu(
const torch::Tensor& points,
const torch::Tensor& points_first_idx,
const torch::Tensor& segms,
const torch::Tensor& segms_first_idx,
const int64_t max_segms);
std::tuple<torch::Tensor, torch::Tensor> EdgePointDistanceForward(
const torch::Tensor& points,
const torch::Tensor& points_first_idx,
const torch::Tensor& segms,
const torch::Tensor& segms_first_idx,
const int64_t max_segms) {
if (points.is_cuda()) {
#ifdef WITH_CUDA
CHECK_CUDA(points);
CHECK_CUDA(points_first_idx);
CHECK_CUDA(segms);
CHECK_CUDA(segms_first_idx);
return EdgePointDistanceForwardCuda(
points, points_first_idx, segms, segms_first_idx, max_segms);
#else
AT_ERROR("Not compiled with GPU support.");
#endif
}
CHECK_CPU(points);
CHECK_CPU(points_first_idx);
CHECK_CPU(segms);
CHECK_CPU(segms_first_idx);
return EdgePointDistanceForwardCpu(
points, points_first_idx, segms, segms_first_idx, max_segms);
}
// Backward pass for EdgePointDistance.
//
// Args:
// points: FloatTensor of shape (P, 3)
// segms: FloatTensor of shape (S, 2, 3)
// idx_segms: LongTensor of shape (S,) containing the indices
// of the closest point in the example in the batch.
// This is computed by the forward pass
// grad_dists: FloatTensor of shape (S,)
//
// Returns:
// grad_points: FloatTensor of shape (P, 3)
// grad_segms: FloatTensor of shape (S, 2, 3)
//
#ifdef WITH_CUDA
std::tuple<torch::Tensor, torch::Tensor> EdgePointDistanceBackwardCuda(
const torch::Tensor& points,
const torch::Tensor& segms,
const torch::Tensor& idx_segms,
const torch::Tensor& grad_dists);
#endif
std::tuple<torch::Tensor, torch::Tensor> EdgePointDistanceBackwardCpu(
const torch::Tensor& points,
const torch::Tensor& segms,
const torch::Tensor& idx_segms,
const torch::Tensor& grad_dists);
std::tuple<torch::Tensor, torch::Tensor> EdgePointDistanceBackward(
const torch::Tensor& points,
const torch::Tensor& segms,
const torch::Tensor& idx_segms,
const torch::Tensor& grad_dists) {
if (points.is_cuda()) {
#ifdef WITH_CUDA
CHECK_CUDA(points);
CHECK_CUDA(segms);
CHECK_CUDA(idx_segms);
CHECK_CUDA(grad_dists);
return EdgePointDistanceBackwardCuda(points, segms, idx_segms, grad_dists);
#else
AT_ERROR("Not compiled with GPU support.");
#endif
}
CHECK_CPU(points);
CHECK_CPU(segms);
CHECK_CPU(idx_segms);
CHECK_CPU(grad_dists);
return EdgePointDistanceBackwardCpu(points, segms, idx_segms, grad_dists);
}
// ****************************************************************************
// * PointFaceArrayDistance *
// ****************************************************************************
// Computes the squared euclidean distance of each p in points to each
// triangular face spanned by (v0, v1, v2) in tris.
//
// Args:
// points: FloatTensor of shape (P, 3)
// tris: FloatTensor of shape (T, 3, 3) of the triangular faces. The t-th
// triangular face is spanned by (tris[t, 0], tris[t, 1], tris[t, 2])
// min_triangle_area: triangles less than this size are considered
// points/lines.
//
// Returns:
// dists: FloatTensor of shape (P, T), where dists[p, t] is the squared
// euclidean distance of points[p] to the face spanned by (v0, v1, v2)
// where v0 = tris[t, 0], v1 = tris[t, 1] and v2 = tris[t, 2]
//
// For pointcloud and meshes of batch size N, this function requires N
// computations. The memory occupied is O(NPT) which can become quite large.
// For example, a medium sized batch with N = 32 with P = 10000 and T = 5000
// will require for the forward pass 5.8G of memory to store dists.
#ifdef WITH_CUDA
torch::Tensor PointFaceArrayDistanceForwardCuda(
const torch::Tensor& points,
const torch::Tensor& tris,
const double min_triangle_area);
#endif
torch::Tensor PointFaceArrayDistanceForwardCpu(
const torch::Tensor& points,
const torch::Tensor& tris,
const double min_triangle_area);
torch::Tensor PointFaceArrayDistanceForward(
const torch::Tensor& points,
const torch::Tensor& tris,
const double min_triangle_area) {
if (points.is_cuda()) {
#ifdef WITH_CUDA
CHECK_CUDA(points);
CHECK_CUDA(tris);
return PointFaceArrayDistanceForwardCuda(points, tris, min_triangle_area);
#else
AT_ERROR("Not compiled with GPU support.");
#endif
}
CHECK_CPU(points);
CHECK_CPU(tris);
return PointFaceArrayDistanceForwardCpu(points, tris, min_triangle_area);
}
// Backward pass for PointFaceArrayDistance.
//
// Args:
// points: FloatTensor of shape (P, 3)
// tris: FloatTensor of shape (T, 3, 3)
// grad_dists: FloatTensor of shape (P, T)
// min_triangle_area: triangles less than this size are considered
// points/lines.
//
// Returns:
// grad_points: FloatTensor of shape (P, 3)
// grad_tris: FloatTensor of shape (T, 3, 3)
//
#ifdef WITH_CUDA
std::tuple<torch::Tensor, torch::Tensor> PointFaceArrayDistanceBackwardCuda(
const torch::Tensor& points,
const torch::Tensor& tris,
const torch::Tensor& grad_dists,
const double min_triangle_area);
#endif
std::tuple<torch::Tensor, torch::Tensor> PointFaceArrayDistanceBackwardCpu(
const torch::Tensor& points,
const torch::Tensor& tris,
const torch::Tensor& grad_dists,
const double min_triangle_area);
std::tuple<torch::Tensor, torch::Tensor> PointFaceArrayDistanceBackward(
const torch::Tensor& points,
const torch::Tensor& tris,
const torch::Tensor& grad_dists,
const double min_triangle_area) {
if (points.is_cuda()) {
#ifdef WITH_CUDA
CHECK_CUDA(points);
CHECK_CUDA(tris);
CHECK_CUDA(grad_dists);
return PointFaceArrayDistanceBackwardCuda(
points, tris, grad_dists, min_triangle_area);
#else
AT_ERROR("Not compiled with GPU support.");
#endif
}
CHECK_CPU(points);
CHECK_CPU(tris);
CHECK_CPU(grad_dists);
return PointFaceArrayDistanceBackwardCpu(
points, tris, grad_dists, min_triangle_area);
}
// ****************************************************************************
// * PointEdgeArrayDistance *
// ****************************************************************************
// Computes the squared euclidean distance of each p in points to each edge
// segment in segms.
//
// Args:
// points: FloatTensor of shape (P, 3)
// segms: FloatTensor of shape (S, 2, 3) of edge segments. The s-th
// edge segment is spanned by (segms[s, 0], segms[s, 1])
//
// Returns:
// dists: FloatTensor of shape (P, S), where dists[p, s] is the squared
// euclidean distance of points[p] to the segment spanned by
// (segms[s, 0], segms[s, 1])
//
// For pointcloud and meshes of batch size N, this function requires N
// computations. The memory occupied is O(NPS) which can become quite large.
// For example, a medium sized batch with N = 32 with P = 10000 and S = 5000
// will require for the forward pass 5.8G of memory to store dists.
#ifdef WITH_CUDA
torch::Tensor PointEdgeArrayDistanceForwardCuda(
const torch::Tensor& points,
const torch::Tensor& segms);
#endif
torch::Tensor PointEdgeArrayDistanceForwardCpu(
const torch::Tensor& points,
const torch::Tensor& segms);
torch::Tensor PointEdgeArrayDistanceForward(
const torch::Tensor& points,
const torch::Tensor& segms) {
if (points.is_cuda()) {
#ifdef WITH_CUDA
CHECK_CUDA(points);
CHECK_CUDA(segms);
return PointEdgeArrayDistanceForwardCuda(points, segms);
#else
AT_ERROR("Not compiled with GPU support.");
#endif
}
CHECK_CPU(points);
CHECK_CPU(segms);
return PointEdgeArrayDistanceForwardCpu(points, segms);
}
// Backward pass for PointEdgeArrayDistance.
//
// Args:
// points: FloatTensor of shape (P, 3)
// segms: FloatTensor of shape (S, 2, 3)
// grad_dists: FloatTensor of shape (P, S)
//
// Returns:
// grad_points: FloatTensor of shape (P, 3)
// grad_segms: FloatTensor of shape (S, 2, 3)
//
#ifdef WITH_CUDA
std::tuple<torch::Tensor, torch::Tensor> PointEdgeArrayDistanceBackwardCuda(
const torch::Tensor& points,
const torch::Tensor& segms,
const torch::Tensor& grad_dists);
#endif
std::tuple<torch::Tensor, torch::Tensor> PointEdgeArrayDistanceBackwardCpu(
const torch::Tensor& points,
const torch::Tensor& segms,
const torch::Tensor& grad_dists);
std::tuple<torch::Tensor, torch::Tensor> PointEdgeArrayDistanceBackward(
const torch::Tensor& points,
const torch::Tensor& segms,
const torch::Tensor& grad_dists) {
if (points.is_cuda()) {
#ifdef WITH_CUDA
CHECK_CUDA(points);
CHECK_CUDA(segms);
CHECK_CUDA(grad_dists);
return PointEdgeArrayDistanceBackwardCuda(points, segms, grad_dists);
#else
AT_ERROR("Not compiled with GPU support.");
#endif
}
CHECK_CPU(points);
CHECK_CPU(segms);
CHECK_CPU(grad_dists);
return PointEdgeArrayDistanceBackwardCpu(points, segms, grad_dists);
}