mirror of
https://github.com/facebookresearch/pytorch3d.git
synced 2026-03-03 18:55:59 +08:00
Make cuda tensors contiguous in host function and remove contiguous check
Summary: Update the cuda kernels to: - remove contiguous checks for the grad tensors and for cpu functions which use accessors - for cuda implementations call `.contiguous()` on all tensors in the host function before invoking the kernel Reviewed By: gkioxari Differential Revision: D21598008 fbshipit-source-id: 9b97bda4582fd4269c8a00999874d4552a1aea2d
This commit is contained in:
committed by
Facebook GitHub Bot
parent
a8377f1f06
commit
3fef506895
@@ -144,15 +144,16 @@ std::tuple<at::Tensor, at::Tensor> PointEdgeDistanceForwardCuda(
|
||||
size_t shared_size = threads * sizeof(size_t) + threads * sizeof(int64_t);
|
||||
|
||||
PointEdgeForwardKernel<<<blocks, threads, shared_size, stream>>>(
|
||||
points.data_ptr<float>(),
|
||||
points_first_idx.data_ptr<int64_t>(),
|
||||
segms.data_ptr<float>(),
|
||||
segms_first_idx.data_ptr<int64_t>(),
|
||||
points.contiguous().data_ptr<float>(),
|
||||
points_first_idx.contiguous().data_ptr<int64_t>(),
|
||||
segms.contiguous().data_ptr<float>(),
|
||||
segms_first_idx.contiguous().data_ptr<int64_t>(),
|
||||
dists.data_ptr<float>(),
|
||||
idxs.data_ptr<int64_t>(),
|
||||
B,
|
||||
P,
|
||||
S);
|
||||
|
||||
AT_CUDA_CHECK(cudaGetLastError());
|
||||
return std::make_tuple(dists, idxs);
|
||||
}
|
||||
@@ -240,10 +241,10 @@ std::tuple<at::Tensor, at::Tensor> PointEdgeDistanceBackwardCuda(
|
||||
const int threads = 512;
|
||||
|
||||
PointEdgeBackwardKernel<<<blocks, threads, 0, stream>>>(
|
||||
points.data_ptr<float>(),
|
||||
segms.data_ptr<float>(),
|
||||
idx_points.data_ptr<int64_t>(),
|
||||
grad_dists.data_ptr<float>(),
|
||||
points.contiguous().data_ptr<float>(),
|
||||
segms.contiguous().data_ptr<float>(),
|
||||
idx_points.contiguous().data_ptr<int64_t>(),
|
||||
grad_dists.contiguous().data_ptr<float>(),
|
||||
grad_points.data_ptr<float>(),
|
||||
grad_segms.data_ptr<float>(),
|
||||
P);
|
||||
@@ -386,10 +387,10 @@ std::tuple<at::Tensor, at::Tensor> EdgePointDistanceForwardCuda(
|
||||
size_t shared_size = threads * sizeof(size_t) + threads * sizeof(int64_t);
|
||||
|
||||
EdgePointForwardKernel<<<blocks, threads, shared_size, stream>>>(
|
||||
points.data_ptr<float>(),
|
||||
points_first_idx.data_ptr<int64_t>(),
|
||||
segms.data_ptr<float>(),
|
||||
segms_first_idx.data_ptr<int64_t>(),
|
||||
points.contiguous().data_ptr<float>(),
|
||||
points_first_idx.contiguous().data_ptr<int64_t>(),
|
||||
segms.contiguous().data_ptr<float>(),
|
||||
segms_first_idx.contiguous().data_ptr<int64_t>(),
|
||||
dists.data_ptr<float>(),
|
||||
idxs.data_ptr<int64_t>(),
|
||||
B,
|
||||
@@ -478,10 +479,10 @@ std::tuple<at::Tensor, at::Tensor> EdgePointDistanceBackwardCuda(
|
||||
const int threads = 512;
|
||||
|
||||
EdgePointBackwardKernel<<<blocks, threads, 0, stream>>>(
|
||||
points.data_ptr<float>(),
|
||||
segms.data_ptr<float>(),
|
||||
idx_segms.data_ptr<int64_t>(),
|
||||
grad_dists.data_ptr<float>(),
|
||||
points.contiguous().data_ptr<float>(),
|
||||
segms.contiguous().data_ptr<float>(),
|
||||
idx_segms.contiguous().data_ptr<int64_t>(),
|
||||
grad_dists.contiguous().data_ptr<float>(),
|
||||
grad_points.data_ptr<float>(),
|
||||
grad_segms.data_ptr<float>(),
|
||||
S);
|
||||
@@ -550,8 +551,8 @@ at::Tensor PointEdgeArrayDistanceForwardCuda(
|
||||
const size_t threads = 64;
|
||||
|
||||
PointEdgeArrayForwardKernel<<<blocks, threads, 0, stream>>>(
|
||||
points.data_ptr<float>(),
|
||||
segms.data_ptr<float>(),
|
||||
points.contiguous().data_ptr<float>(),
|
||||
segms.contiguous().data_ptr<float>(),
|
||||
dists.data_ptr<float>(),
|
||||
P,
|
||||
S);
|
||||
@@ -638,9 +639,9 @@ std::tuple<at::Tensor, at::Tensor> PointEdgeArrayDistanceBackwardCuda(
|
||||
const size_t threads = 64;
|
||||
|
||||
PointEdgeArrayBackwardKernel<<<blocks, threads, 0, stream>>>(
|
||||
points.data_ptr<float>(),
|
||||
segms.data_ptr<float>(),
|
||||
grad_dists.data_ptr<float>(),
|
||||
points.contiguous().data_ptr<float>(),
|
||||
segms.contiguous().data_ptr<float>(),
|
||||
grad_dists.contiguous().data_ptr<float>(),
|
||||
grad_points.data_ptr<float>(),
|
||||
grad_segms.data_ptr<float>(),
|
||||
P,
|
||||
|
||||
@@ -54,10 +54,10 @@ std::tuple<torch::Tensor, torch::Tensor> PointEdgeDistanceForward(
|
||||
const int64_t max_points) {
|
||||
if (points.is_cuda()) {
|
||||
#ifdef WITH_CUDA
|
||||
CHECK_CONTIGUOUS_CUDA(points);
|
||||
CHECK_CONTIGUOUS_CUDA(points_first_idx);
|
||||
CHECK_CONTIGUOUS_CUDA(segms);
|
||||
CHECK_CONTIGUOUS_CUDA(segms_first_idx);
|
||||
CHECK_CUDA(points);
|
||||
CHECK_CUDA(points_first_idx);
|
||||
CHECK_CUDA(segms);
|
||||
CHECK_CUDA(segms_first_idx);
|
||||
return PointEdgeDistanceForwardCuda(
|
||||
points, points_first_idx, segms, segms_first_idx, max_points);
|
||||
#else
|
||||
@@ -98,10 +98,10 @@ std::tuple<torch::Tensor, torch::Tensor> PointEdgeDistanceBackward(
|
||||
const torch::Tensor& grad_dists) {
|
||||
if (points.is_cuda()) {
|
||||
#ifdef WITH_CUDA
|
||||
CHECK_CONTIGUOUS_CUDA(points);
|
||||
CHECK_CONTIGUOUS_CUDA(segms);
|
||||
CHECK_CONTIGUOUS_CUDA(idx_points);
|
||||
CHECK_CONTIGUOUS_CUDA(grad_dists);
|
||||
CHECK_CUDA(points);
|
||||
CHECK_CUDA(segms);
|
||||
CHECK_CUDA(idx_points);
|
||||
CHECK_CUDA(grad_dists);
|
||||
return PointEdgeDistanceBackwardCuda(points, segms, idx_points, grad_dists);
|
||||
#else
|
||||
AT_ERROR("Not compiled with GPU support.");
|
||||
@@ -158,10 +158,10 @@ std::tuple<torch::Tensor, torch::Tensor> EdgePointDistanceForward(
|
||||
const int64_t max_segms) {
|
||||
if (points.is_cuda()) {
|
||||
#ifdef WITH_CUDA
|
||||
CHECK_CONTIGUOUS_CUDA(points);
|
||||
CHECK_CONTIGUOUS_CUDA(points_first_idx);
|
||||
CHECK_CONTIGUOUS_CUDA(segms);
|
||||
CHECK_CONTIGUOUS_CUDA(segms_first_idx);
|
||||
CHECK_CUDA(points);
|
||||
CHECK_CUDA(points_first_idx);
|
||||
CHECK_CUDA(segms);
|
||||
CHECK_CUDA(segms_first_idx);
|
||||
return EdgePointDistanceForwardCuda(
|
||||
points, points_first_idx, segms, segms_first_idx, max_segms);
|
||||
#else
|
||||
@@ -202,10 +202,10 @@ std::tuple<torch::Tensor, torch::Tensor> EdgePointDistanceBackward(
|
||||
const torch::Tensor& grad_dists) {
|
||||
if (points.is_cuda()) {
|
||||
#ifdef WITH_CUDA
|
||||
CHECK_CONTIGUOUS_CUDA(points);
|
||||
CHECK_CONTIGUOUS_CUDA(segms);
|
||||
CHECK_CONTIGUOUS_CUDA(idx_segms);
|
||||
CHECK_CONTIGUOUS_CUDA(grad_dists);
|
||||
CHECK_CUDA(points);
|
||||
CHECK_CUDA(segms);
|
||||
CHECK_CUDA(idx_segms);
|
||||
CHECK_CUDA(grad_dists);
|
||||
return EdgePointDistanceBackwardCuda(points, segms, idx_segms, grad_dists);
|
||||
#else
|
||||
AT_ERROR("Not compiled with GPU support.");
|
||||
@@ -247,8 +247,8 @@ torch::Tensor PointEdgeArrayDistanceForward(
|
||||
const torch::Tensor& segms) {
|
||||
if (points.is_cuda()) {
|
||||
#ifdef WITH_CUDA
|
||||
CHECK_CONTIGUOUS_CUDA(points);
|
||||
CHECK_CONTIGUOUS_CUDA(segms);
|
||||
CHECK_CUDA(points);
|
||||
CHECK_CUDA(segms);
|
||||
return PointEdgeArrayDistanceForwardCuda(points, segms);
|
||||
#else
|
||||
AT_ERROR("Not compiled with GPU support.");
|
||||
@@ -283,9 +283,9 @@ std::tuple<torch::Tensor, torch::Tensor> PointEdgeArrayDistanceBackward(
|
||||
const torch::Tensor& grad_dists) {
|
||||
if (points.is_cuda()) {
|
||||
#ifdef WITH_CUDA
|
||||
CHECK_CONTIGUOUS_CUDA(points);
|
||||
CHECK_CONTIGUOUS_CUDA(segms);
|
||||
CHECK_CONTIGUOUS_CUDA(grad_dists);
|
||||
CHECK_CUDA(points);
|
||||
CHECK_CUDA(segms);
|
||||
CHECK_CUDA(grad_dists);
|
||||
return PointEdgeArrayDistanceBackwardCuda(points, segms, grad_dists);
|
||||
#else
|
||||
AT_ERROR("Not compiled with GPU support.");
|
||||
|
||||
@@ -145,10 +145,10 @@ std::tuple<at::Tensor, at::Tensor> PointFaceDistanceForwardCuda(
|
||||
size_t shared_size = threads * sizeof(size_t) + threads * sizeof(int64_t);
|
||||
|
||||
PointFaceForwardKernel<<<blocks, threads, shared_size, stream>>>(
|
||||
points.data_ptr<float>(),
|
||||
points_first_idx.data_ptr<int64_t>(),
|
||||
tris.data_ptr<float>(),
|
||||
tris_first_idx.data_ptr<int64_t>(),
|
||||
points.contiguous().data_ptr<float>(),
|
||||
points_first_idx.contiguous().data_ptr<int64_t>(),
|
||||
tris.contiguous().data_ptr<float>(),
|
||||
tris_first_idx.contiguous().data_ptr<int64_t>(),
|
||||
dists.data_ptr<float>(),
|
||||
idxs.data_ptr<int64_t>(),
|
||||
B,
|
||||
@@ -249,10 +249,10 @@ std::tuple<at::Tensor, at::Tensor> PointFaceDistanceBackwardCuda(
|
||||
const int threads = 512;
|
||||
|
||||
PointFaceBackwardKernel<<<blocks, threads, 0, stream>>>(
|
||||
points.data_ptr<float>(),
|
||||
tris.data_ptr<float>(),
|
||||
idx_points.data_ptr<int64_t>(),
|
||||
grad_dists.data_ptr<float>(),
|
||||
points.contiguous().data_ptr<float>(),
|
||||
tris.contiguous().data_ptr<float>(),
|
||||
idx_points.contiguous().data_ptr<int64_t>(),
|
||||
grad_dists.contiguous().data_ptr<float>(),
|
||||
grad_points.data_ptr<float>(),
|
||||
grad_tris.data_ptr<float>(),
|
||||
P);
|
||||
@@ -396,10 +396,10 @@ std::tuple<at::Tensor, at::Tensor> FacePointDistanceForwardCuda(
|
||||
size_t shared_size = threads * sizeof(size_t) + threads * sizeof(int64_t);
|
||||
|
||||
FacePointForwardKernel<<<blocks, threads, shared_size, stream>>>(
|
||||
points.data_ptr<float>(),
|
||||
points_first_idx.data_ptr<int64_t>(),
|
||||
tris.data_ptr<float>(),
|
||||
tris_first_idx.data_ptr<int64_t>(),
|
||||
points.contiguous().data_ptr<float>(),
|
||||
points_first_idx.contiguous().data_ptr<int64_t>(),
|
||||
tris.contiguous().data_ptr<float>(),
|
||||
tris_first_idx.contiguous().data_ptr<int64_t>(),
|
||||
dists.data_ptr<float>(),
|
||||
idxs.data_ptr<int64_t>(),
|
||||
B,
|
||||
@@ -501,10 +501,10 @@ std::tuple<at::Tensor, at::Tensor> FacePointDistanceBackwardCuda(
|
||||
const int threads = 512;
|
||||
|
||||
FacePointBackwardKernel<<<blocks, threads, 0, stream>>>(
|
||||
points.data_ptr<float>(),
|
||||
tris.data_ptr<float>(),
|
||||
idx_tris.data_ptr<int64_t>(),
|
||||
grad_dists.data_ptr<float>(),
|
||||
points.contiguous().data_ptr<float>(),
|
||||
tris.contiguous().data_ptr<float>(),
|
||||
idx_tris.contiguous().data_ptr<int64_t>(),
|
||||
grad_dists.contiguous().data_ptr<float>(),
|
||||
grad_points.data_ptr<float>(),
|
||||
grad_tris.data_ptr<float>(),
|
||||
T);
|
||||
@@ -575,8 +575,8 @@ at::Tensor PointFaceArrayDistanceForwardCuda(
|
||||
const size_t threads = 64;
|
||||
|
||||
PointFaceArrayForwardKernel<<<blocks, threads, 0, stream>>>(
|
||||
points.data_ptr<float>(),
|
||||
tris.data_ptr<float>(),
|
||||
points.contiguous().data_ptr<float>(),
|
||||
tris.contiguous().data_ptr<float>(),
|
||||
dists.data_ptr<float>(),
|
||||
P,
|
||||
T);
|
||||
@@ -672,9 +672,9 @@ std::tuple<at::Tensor, at::Tensor> PointFaceArrayDistanceBackwardCuda(
|
||||
const size_t threads = 64;
|
||||
|
||||
PointFaceArrayBackwardKernel<<<blocks, threads, 0, stream>>>(
|
||||
points.data_ptr<float>(),
|
||||
tris.data_ptr<float>(),
|
||||
grad_dists.data_ptr<float>(),
|
||||
points.contiguous().data_ptr<float>(),
|
||||
tris.contiguous().data_ptr<float>(),
|
||||
grad_dists.contiguous().data_ptr<float>(),
|
||||
grad_points.data_ptr<float>(),
|
||||
grad_tris.data_ptr<float>(),
|
||||
P,
|
||||
|
||||
@@ -56,10 +56,10 @@ std::tuple<torch::Tensor, torch::Tensor> PointFaceDistanceForward(
|
||||
const int64_t max_points) {
|
||||
if (points.is_cuda()) {
|
||||
#ifdef WITH_CUDA
|
||||
CHECK_CONTIGUOUS_CUDA(points);
|
||||
CHECK_CONTIGUOUS_CUDA(points_first_idx);
|
||||
CHECK_CONTIGUOUS_CUDA(tris);
|
||||
CHECK_CONTIGUOUS_CUDA(tris_first_idx);
|
||||
CHECK_CUDA(points);
|
||||
CHECK_CUDA(points_first_idx);
|
||||
CHECK_CUDA(tris);
|
||||
CHECK_CUDA(tris_first_idx);
|
||||
return PointFaceDistanceForwardCuda(
|
||||
points, points_first_idx, tris, tris_first_idx, max_points);
|
||||
#else
|
||||
@@ -100,10 +100,10 @@ std::tuple<torch::Tensor, torch::Tensor> PointFaceDistanceBackward(
|
||||
const torch::Tensor& grad_dists) {
|
||||
if (points.is_cuda()) {
|
||||
#ifdef WITH_CUDA
|
||||
CHECK_CONTIGUOUS_CUDA(points);
|
||||
CHECK_CONTIGUOUS_CUDA(tris);
|
||||
CHECK_CONTIGUOUS_CUDA(idx_points);
|
||||
CHECK_CONTIGUOUS_CUDA(grad_dists);
|
||||
CHECK_CUDA(points);
|
||||
CHECK_CUDA(tris);
|
||||
CHECK_CUDA(idx_points);
|
||||
CHECK_CUDA(grad_dists);
|
||||
return PointFaceDistanceBackwardCuda(points, tris, idx_points, grad_dists);
|
||||
#else
|
||||
AT_ERROR("Not compiled with GPU support.");
|
||||
@@ -160,10 +160,10 @@ std::tuple<torch::Tensor, torch::Tensor> FacePointDistanceForward(
|
||||
const int64_t max_tris) {
|
||||
if (points.is_cuda()) {
|
||||
#ifdef WITH_CUDA
|
||||
CHECK_CONTIGUOUS_CUDA(points);
|
||||
CHECK_CONTIGUOUS_CUDA(points_first_idx);
|
||||
CHECK_CONTIGUOUS_CUDA(tris);
|
||||
CHECK_CONTIGUOUS_CUDA(tris_first_idx);
|
||||
CHECK_CUDA(points);
|
||||
CHECK_CUDA(points_first_idx);
|
||||
CHECK_CUDA(tris);
|
||||
CHECK_CUDA(tris_first_idx);
|
||||
return FacePointDistanceForwardCuda(
|
||||
points, points_first_idx, tris, tris_first_idx, max_tris);
|
||||
#else
|
||||
@@ -204,10 +204,10 @@ std::tuple<torch::Tensor, torch::Tensor> FacePointDistanceBackward(
|
||||
const torch::Tensor& grad_dists) {
|
||||
if (points.is_cuda()) {
|
||||
#ifdef WITH_CUDA
|
||||
CHECK_CONTIGUOUS_CUDA(points);
|
||||
CHECK_CONTIGUOUS_CUDA(tris);
|
||||
CHECK_CONTIGUOUS_CUDA(idx_tris);
|
||||
CHECK_CONTIGUOUS_CUDA(grad_dists);
|
||||
CHECK_CUDA(points);
|
||||
CHECK_CUDA(tris);
|
||||
CHECK_CUDA(idx_tris);
|
||||
CHECK_CUDA(grad_dists);
|
||||
return FacePointDistanceBackwardCuda(points, tris, idx_tris, grad_dists);
|
||||
#else
|
||||
AT_ERROR("Not compiled with GPU support.");
|
||||
@@ -250,8 +250,8 @@ torch::Tensor PointFaceArrayDistanceForward(
|
||||
const torch::Tensor& tris) {
|
||||
if (points.is_cuda()) {
|
||||
#ifdef WITH_CUDA
|
||||
CHECK_CONTIGUOUS_CUDA(points);
|
||||
CHECK_CONTIGUOUS_CUDA(tris);
|
||||
CHECK_CUDA(points);
|
||||
CHECK_CUDA(tris);
|
||||
return PointFaceArrayDistanceForwardCuda(points, tris);
|
||||
#else
|
||||
AT_ERROR("Not compiled with GPU support.");
|
||||
@@ -285,9 +285,9 @@ std::tuple<torch::Tensor, torch::Tensor> PointFaceArrayDistanceBackward(
|
||||
const torch::Tensor& grad_dists) {
|
||||
if (points.is_cuda()) {
|
||||
#ifdef WITH_CUDA
|
||||
CHECK_CONTIGUOUS_CUDA(points);
|
||||
CHECK_CONTIGUOUS_CUDA(tris);
|
||||
CHECK_CONTIGUOUS_CUDA(grad_dists);
|
||||
CHECK_CUDA(points);
|
||||
CHECK_CUDA(tris);
|
||||
CHECK_CUDA(grad_dists);
|
||||
return PointFaceArrayDistanceBackwardCuda(points, tris, grad_dists);
|
||||
#else
|
||||
AT_ERROR("Not compiled with GPU support.");
|
||||
|
||||
Reference in New Issue
Block a user