mirror of
https://github.com/facebookresearch/pytorch3d.git
synced 2025-08-02 03:42:50 +08:00
Make cuda tensors contiguous in host function and remove contiguous check
Summary: Update the cuda kernels to: - remove contiguous checks for the grad tensors and for cpu functions which use accessors - for cuda implementations call `.contiguous()` on all tensors in the host function before invoking the kernel Reviewed By: gkioxari Differential Revision: D21598008 fbshipit-source-id: 9b97bda4582fd4269c8a00999874d4552a1aea2d
This commit is contained in:
parent
a8377f1f06
commit
3fef506895
@ -168,6 +168,8 @@ at::Tensor alphaCompositeCudaForward(
|
||||
// doubles. Currently, support is for floats only.
|
||||
alphaCompositeCudaForwardKernel<<<numBlocks, threadsPerBlock, 0, stream>>>(
|
||||
// clang-format off
|
||||
// As we are using packed accessors here the tensors
|
||||
// do not need to be made contiguous.
|
||||
result.packed_accessor64<float, 4, at::RestrictPtrTraits>(),
|
||||
features.packed_accessor64<float, 2, at::RestrictPtrTraits>(),
|
||||
alphas.packed_accessor64<float, 4, at::RestrictPtrTraits>(),
|
||||
@ -211,6 +213,8 @@ std::tuple<at::Tensor, at::Tensor> alphaCompositeCudaBackward(
|
||||
// doubles. Currently, support is for floats only.
|
||||
alphaCompositeCudaBackwardKernel<<<numBlocks, threadsPerBlock, 0, stream>>>(
|
||||
// clang-format off
|
||||
// As we are using packed accessors here the tensors
|
||||
// do not need to be made contiguous.
|
||||
grad_features.packed_accessor64<float, 2, at::RestrictPtrTraits>(),
|
||||
grad_alphas.packed_accessor64<float, 4, at::RestrictPtrTraits>(),
|
||||
grad_outputs.packed_accessor64<float, 4, at::RestrictPtrTraits>(),
|
||||
|
@ -60,18 +60,14 @@ torch::Tensor alphaCompositeForward(
|
||||
|
||||
if (features.is_cuda()) {
|
||||
#ifdef WITH_CUDA
|
||||
CHECK_CONTIGUOUS_CUDA(features);
|
||||
CHECK_CONTIGUOUS_CUDA(alphas);
|
||||
CHECK_CONTIGUOUS_CUDA(points_idx);
|
||||
CHECK_CUDA(features);
|
||||
CHECK_CUDA(alphas);
|
||||
CHECK_CUDA(points_idx);
|
||||
return alphaCompositeCudaForward(features, alphas, points_idx);
|
||||
#else
|
||||
AT_ERROR("Not compiled with GPU support");
|
||||
#endif
|
||||
} else {
|
||||
CHECK_CONTIGUOUS(features);
|
||||
CHECK_CONTIGUOUS(alphas);
|
||||
CHECK_CONTIGUOUS(points_idx);
|
||||
|
||||
return alphaCompositeCpuForward(features, alphas, points_idx);
|
||||
}
|
||||
}
|
||||
@ -88,10 +84,10 @@ std::tuple<torch::Tensor, torch::Tensor> alphaCompositeBackward(
|
||||
|
||||
if (grad_outputs.is_cuda()) {
|
||||
#ifdef WITH_CUDA
|
||||
CHECK_CONTIGUOUS_CUDA(grad_outputs);
|
||||
CHECK_CONTIGUOUS_CUDA(features);
|
||||
CHECK_CONTIGUOUS_CUDA(alphas);
|
||||
CHECK_CONTIGUOUS_CUDA(points_idx);
|
||||
CHECK_CUDA(grad_outputs);
|
||||
CHECK_CUDA(features);
|
||||
CHECK_CUDA(alphas);
|
||||
CHECK_CUDA(points_idx);
|
||||
|
||||
return alphaCompositeCudaBackward(
|
||||
grad_outputs, features, alphas, points_idx);
|
||||
@ -99,11 +95,6 @@ std::tuple<torch::Tensor, torch::Tensor> alphaCompositeBackward(
|
||||
AT_ERROR("Not compiled with GPU support");
|
||||
#endif
|
||||
} else {
|
||||
CHECK_CONTIGUOUS(grad_outputs);
|
||||
CHECK_CONTIGUOUS(features);
|
||||
CHECK_CONTIGUOUS(alphas);
|
||||
CHECK_CONTIGUOUS(points_idx);
|
||||
|
||||
return alphaCompositeCpuBackward(
|
||||
grad_outputs, features, alphas, points_idx);
|
||||
}
|
||||
|
@ -183,6 +183,8 @@ at::Tensor weightedSumNormCudaForward(
|
||||
// doubles. Currently, support is for floats only.
|
||||
// clang-format off
|
||||
weightedSumNormCudaForwardKernel<<<numBlocks, threadsPerBlock, 0, stream>>>(
|
||||
// As we are using packed accessors here the tensors
|
||||
// do not need to be made contiguous.
|
||||
result.packed_accessor64<float, 4, at::RestrictPtrTraits>(),
|
||||
features.packed_accessor64<float, 2, at::RestrictPtrTraits>(),
|
||||
alphas.packed_accessor64<float, 4, at::RestrictPtrTraits>(),
|
||||
@ -227,6 +229,8 @@ std::tuple<at::Tensor, at::Tensor> weightedSumNormCudaBackward(
|
||||
// doubles. Currently, support is for floats only.
|
||||
weightedSumNormCudaBackwardKernel<<<numBlocks, threadsPerBlock, 0, stream>>>(
|
||||
// clang-format off
|
||||
// As we are using packed accessors here the tensors
|
||||
// do not need to be made contiguous.
|
||||
grad_features.packed_accessor64<float, 2, at::RestrictPtrTraits>(),
|
||||
grad_alphas.packed_accessor64<float, 4, at::RestrictPtrTraits>(),
|
||||
grad_outputs.packed_accessor64<float, 4, at::RestrictPtrTraits>(),
|
||||
|
@ -58,19 +58,15 @@ torch::Tensor weightedSumNormForward(
|
||||
|
||||
if (features.is_cuda()) {
|
||||
#ifdef WITH_CUDA
|
||||
CHECK_CONTIGUOUS_CUDA(features);
|
||||
CHECK_CONTIGUOUS_CUDA(alphas);
|
||||
CHECK_CONTIGUOUS_CUDA(points_idx);
|
||||
CHECK_CUDA(features);
|
||||
CHECK_CUDA(alphas);
|
||||
CHECK_CUDA(points_idx);
|
||||
|
||||
return weightedSumNormCudaForward(features, alphas, points_idx);
|
||||
#else
|
||||
AT_ERROR("Not compiled with GPU support");
|
||||
#endif
|
||||
} else {
|
||||
CHECK_CONTIGUOUS(features);
|
||||
CHECK_CONTIGUOUS(alphas);
|
||||
CHECK_CONTIGUOUS(points_idx);
|
||||
|
||||
return weightedSumNormCpuForward(features, alphas, points_idx);
|
||||
}
|
||||
}
|
||||
@ -87,10 +83,10 @@ std::tuple<torch::Tensor, torch::Tensor> weightedSumNormBackward(
|
||||
|
||||
if (grad_outputs.is_cuda()) {
|
||||
#ifdef WITH_CUDA
|
||||
CHECK_CONTIGUOUS_CUDA(grad_outputs);
|
||||
CHECK_CONTIGUOUS_CUDA(features);
|
||||
CHECK_CONTIGUOUS_CUDA(alphas);
|
||||
CHECK_CONTIGUOUS_CUDA(points_idx);
|
||||
CHECK_CUDA(grad_outputs);
|
||||
CHECK_CUDA(features);
|
||||
CHECK_CUDA(alphas);
|
||||
CHECK_CUDA(points_idx);
|
||||
|
||||
return weightedSumNormCudaBackward(
|
||||
grad_outputs, features, alphas, points_idx);
|
||||
@ -98,11 +94,6 @@ std::tuple<torch::Tensor, torch::Tensor> weightedSumNormBackward(
|
||||
AT_ERROR("Not compiled with GPU support");
|
||||
#endif
|
||||
} else {
|
||||
CHECK_CONTIGUOUS(grad_outputs);
|
||||
CHECK_CONTIGUOUS(features);
|
||||
CHECK_CONTIGUOUS(alphas);
|
||||
CHECK_CONTIGUOUS(points_idx);
|
||||
|
||||
return weightedSumNormCpuBackward(
|
||||
grad_outputs, features, alphas, points_idx);
|
||||
}
|
||||
|
@ -142,6 +142,8 @@ at::Tensor weightedSumCudaForward(
|
||||
// doubles. Currently, support is for floats only.
|
||||
weightedSumCudaForwardKernel<<<numBlocks, threadsPerBlock, 0, stream>>>(
|
||||
// clang-format off
|
||||
// As we are using packed accessors here the tensors
|
||||
// do not need to be made contiguous.
|
||||
result.packed_accessor64<float, 4, at::RestrictPtrTraits>(),
|
||||
features.packed_accessor64<float, 2, at::RestrictPtrTraits>(),
|
||||
alphas.packed_accessor64<float, 4, at::RestrictPtrTraits>(),
|
||||
@ -185,6 +187,8 @@ std::tuple<at::Tensor, at::Tensor> weightedSumCudaBackward(
|
||||
// doubles. Currently, support is for floats only.
|
||||
weightedSumCudaBackwardKernel<<<numBlocks, threadsPerBlock, 0, stream>>>(
|
||||
// clang-format off
|
||||
// As we are using packed accessors here the tensors
|
||||
// do not need to be made contiguous.
|
||||
grad_features.packed_accessor64<float, 2, at::RestrictPtrTraits>(),
|
||||
grad_alphas.packed_accessor64<float, 4, at::RestrictPtrTraits>(),
|
||||
grad_outputs.packed_accessor64<float, 4, at::RestrictPtrTraits>(),
|
||||
|
@ -58,18 +58,14 @@ torch::Tensor weightedSumForward(
|
||||
|
||||
if (features.is_cuda()) {
|
||||
#ifdef WITH_CUDA
|
||||
CHECK_CONTIGUOUS_CUDA(features);
|
||||
CHECK_CONTIGUOUS_CUDA(alphas);
|
||||
CHECK_CONTIGUOUS_CUDA(points_idx);
|
||||
CHECK_CUDA(features);
|
||||
CHECK_CUDA(alphas);
|
||||
CHECK_CUDA(points_idx);
|
||||
return weightedSumCudaForward(features, alphas, points_idx);
|
||||
#else
|
||||
AT_ERROR("Not compiled with GPU support");
|
||||
#endif
|
||||
} else {
|
||||
CHECK_CONTIGUOUS(features);
|
||||
CHECK_CONTIGUOUS(alphas);
|
||||
CHECK_CONTIGUOUS(points_idx);
|
||||
|
||||
return weightedSumCpuForward(features, alphas, points_idx);
|
||||
}
|
||||
}
|
||||
@ -86,21 +82,16 @@ std::tuple<torch::Tensor, torch::Tensor> weightedSumBackward(
|
||||
|
||||
if (grad_outputs.is_cuda()) {
|
||||
#ifdef WITH_CUDA
|
||||
CHECK_CONTIGUOUS_CUDA(grad_outputs);
|
||||
CHECK_CONTIGUOUS_CUDA(features);
|
||||
CHECK_CONTIGUOUS_CUDA(alphas);
|
||||
CHECK_CONTIGUOUS_CUDA(points_idx);
|
||||
CHECK_CUDA(grad_outputs);
|
||||
CHECK_CUDA(features);
|
||||
CHECK_CUDA(alphas);
|
||||
CHECK_CUDA(points_idx);
|
||||
|
||||
return weightedSumCudaBackward(grad_outputs, features, alphas, points_idx);
|
||||
#else
|
||||
AT_ERROR("Not compiled with GPU support");
|
||||
#endif
|
||||
} else {
|
||||
CHECK_CONTIGUOUS(grad_outputs);
|
||||
CHECK_CONTIGUOUS(features);
|
||||
CHECK_CONTIGUOUS(alphas);
|
||||
CHECK_CONTIGUOUS(points_idx);
|
||||
|
||||
return weightedSumCpuBackward(grad_outputs, features, alphas, points_idx);
|
||||
}
|
||||
}
|
||||
|
@ -239,8 +239,8 @@ std::tuple<at::Tensor, at::Tensor> FaceAreasNormalsForwardCuda(
|
||||
AT_DISPATCH_FLOATING_TYPES(
|
||||
verts.scalar_type(), "face_areas_normals_forward_cuda", ([&] {
|
||||
FaceAreasNormalsForwardKernel<scalar_t><<<blocks, threads, 0, stream>>>(
|
||||
verts.data_ptr<scalar_t>(),
|
||||
faces.data_ptr<int64_t>(),
|
||||
verts.contiguous().data_ptr<scalar_t>(),
|
||||
faces.contiguous().data_ptr<int64_t>(),
|
||||
areas.data_ptr<scalar_t>(),
|
||||
normals.data_ptr<scalar_t>(),
|
||||
V,
|
||||
@ -282,10 +282,10 @@ at::Tensor FaceAreasNormalsBackwardCuda(
|
||||
// TODO(gkioxari) add AT_DISPATCH_FLOATING_TYPES once atomicAdd supports
|
||||
// doubles. Currently, support is for floats only.
|
||||
FaceAreasNormalsBackwardKernel<<<blocks, threads, 0, stream>>>(
|
||||
grad_areas.data_ptr<float>(),
|
||||
grad_normals.data_ptr<float>(),
|
||||
verts.data_ptr<float>(),
|
||||
faces.data_ptr<int64_t>(),
|
||||
grad_areas.contiguous().data_ptr<float>(),
|
||||
grad_normals.contiguous().data_ptr<float>(),
|
||||
verts.contiguous().data_ptr<float>(),
|
||||
faces.contiguous().data_ptr<int64_t>(),
|
||||
grad_verts.data_ptr<float>(),
|
||||
V,
|
||||
F);
|
||||
|
@ -47,8 +47,8 @@ std::tuple<at::Tensor, at::Tensor> FaceAreasNormalsForward(
|
||||
const at::Tensor faces) {
|
||||
if (verts.is_cuda() && faces.is_cuda()) {
|
||||
#ifdef WITH_CUDA
|
||||
CHECK_CONTIGUOUS_CUDA(verts);
|
||||
CHECK_CONTIGUOUS_CUDA(faces);
|
||||
CHECK_CUDA(verts);
|
||||
CHECK_CUDA(faces);
|
||||
return FaceAreasNormalsForwardCuda(verts, faces);
|
||||
#else
|
||||
AT_ERROR("Not compiled with GPU support.");
|
||||
@ -65,10 +65,10 @@ at::Tensor FaceAreasNormalsBackward(
|
||||
const at::Tensor faces) {
|
||||
if (verts.is_cuda() && faces.is_cuda()) {
|
||||
#ifdef WITH_CUDA
|
||||
CHECK_CONTIGUOUS_CUDA(verts);
|
||||
CHECK_CONTIGUOUS_CUDA(faces);
|
||||
CHECK_CONTIGUOUS_CUDA(grad_areas);
|
||||
CHECK_CONTIGUOUS_CUDA(grad_normals);
|
||||
CHECK_CUDA(verts);
|
||||
CHECK_CUDA(faces);
|
||||
CHECK_CUDA(grad_areas);
|
||||
CHECK_CUDA(grad_normals);
|
||||
return FaceAreasNormalsBackwardCuda(grad_areas, grad_normals, verts, faces);
|
||||
#else
|
||||
AT_ERROR("Not compiled with GPU support.");
|
||||
|
@ -72,8 +72,8 @@ at::Tensor GatherScatterCuda(
|
||||
}
|
||||
|
||||
GatherScatterCudaKernel<<<blocks, threads, 0, stream>>>(
|
||||
input.data_ptr<float>(),
|
||||
edges.data_ptr<int64_t>(),
|
||||
input.contiguous().data_ptr<float>(),
|
||||
edges.contiguous().data_ptr<int64_t>(),
|
||||
output.data_ptr<float>(),
|
||||
directed,
|
||||
backward,
|
||||
|
@ -35,8 +35,8 @@ at::Tensor GatherScatter(
|
||||
bool backward) {
|
||||
if (input.is_cuda() && edges.is_cuda()) {
|
||||
#ifdef WITH_CUDA
|
||||
CHECK_CONTIGUOUS_CUDA(input);
|
||||
CHECK_CONTIGUOUS_CUDA(edges);
|
||||
CHECK_CUDA(input);
|
||||
CHECK_CUDA(edges);
|
||||
return GatherScatterCuda(input, edges, directed, backward);
|
||||
#else
|
||||
AT_ERROR("Not compiled with GPU support.");
|
||||
|
@ -347,21 +347,21 @@ std::tuple<at::Tensor, at::Tensor> KNearestNeighborIdxCuda(
|
||||
const size_t threads = 256;
|
||||
const size_t blocks = 256;
|
||||
if (version == 0) {
|
||||
AT_DISPATCH_FLOATING_TYPES(p1.scalar_type(), "knn_kernel_cuda", ([&] {
|
||||
KNearestNeighborKernelV0<scalar_t>
|
||||
<<<blocks, threads, 0, stream>>>(
|
||||
p1.data_ptr<scalar_t>(),
|
||||
p2.data_ptr<scalar_t>(),
|
||||
lengths1.data_ptr<int64_t>(),
|
||||
lengths2.data_ptr<int64_t>(),
|
||||
dists.data_ptr<scalar_t>(),
|
||||
idxs.data_ptr<int64_t>(),
|
||||
N,
|
||||
P1,
|
||||
P2,
|
||||
D,
|
||||
K);
|
||||
}));
|
||||
AT_DISPATCH_FLOATING_TYPES(
|
||||
p1.scalar_type(), "knn_kernel_cuda", ([&] {
|
||||
KNearestNeighborKernelV0<scalar_t><<<blocks, threads, 0, stream>>>(
|
||||
p1.contiguous().data_ptr<scalar_t>(),
|
||||
p2.contiguous().data_ptr<scalar_t>(),
|
||||
lengths1.contiguous().data_ptr<int64_t>(),
|
||||
lengths2.contiguous().data_ptr<int64_t>(),
|
||||
dists.data_ptr<scalar_t>(),
|
||||
idxs.data_ptr<int64_t>(),
|
||||
N,
|
||||
P1,
|
||||
P2,
|
||||
D,
|
||||
K);
|
||||
}));
|
||||
} else if (version == 1) {
|
||||
AT_DISPATCH_FLOATING_TYPES(p1.scalar_type(), "knn_kernel_cuda", ([&] {
|
||||
DispatchKernel1D<
|
||||
@ -372,10 +372,10 @@ std::tuple<at::Tensor, at::Tensor> KNearestNeighborIdxCuda(
|
||||
D,
|
||||
blocks,
|
||||
threads,
|
||||
p1.data_ptr<scalar_t>(),
|
||||
p2.data_ptr<scalar_t>(),
|
||||
lengths1.data_ptr<int64_t>(),
|
||||
lengths2.data_ptr<int64_t>(),
|
||||
p1.contiguous().data_ptr<scalar_t>(),
|
||||
p2.contiguous().data_ptr<scalar_t>(),
|
||||
lengths1.contiguous().data_ptr<int64_t>(),
|
||||
lengths2.contiguous().data_ptr<int64_t>(),
|
||||
dists.data_ptr<scalar_t>(),
|
||||
idxs.data_ptr<int64_t>(),
|
||||
N,
|
||||
@ -396,10 +396,10 @@ std::tuple<at::Tensor, at::Tensor> KNearestNeighborIdxCuda(
|
||||
K_64,
|
||||
blocks,
|
||||
threads,
|
||||
p1.data_ptr<scalar_t>(),
|
||||
p2.data_ptr<scalar_t>(),
|
||||
lengths1.data_ptr<int64_t>(),
|
||||
lengths2.data_ptr<int64_t>(),
|
||||
p1.contiguous().data_ptr<scalar_t>(),
|
||||
p2.contiguous().data_ptr<scalar_t>(),
|
||||
lengths1.contiguous().data_ptr<int64_t>(),
|
||||
lengths2.contiguous().data_ptr<int64_t>(),
|
||||
dists.data_ptr<scalar_t>(),
|
||||
idxs.data_ptr<int64_t>(),
|
||||
N,
|
||||
@ -419,10 +419,10 @@ std::tuple<at::Tensor, at::Tensor> KNearestNeighborIdxCuda(
|
||||
K_64,
|
||||
blocks,
|
||||
threads,
|
||||
p1.data_ptr<scalar_t>(),
|
||||
p2.data_ptr<scalar_t>(),
|
||||
lengths1.data_ptr<int64_t>(),
|
||||
lengths2.data_ptr<int64_t>(),
|
||||
p1.contiguous().data_ptr<scalar_t>(),
|
||||
p2.contiguous().data_ptr<scalar_t>(),
|
||||
lengths1.contiguous().data_ptr<int64_t>(),
|
||||
lengths2.contiguous().data_ptr<int64_t>(),
|
||||
dists.data_ptr<scalar_t>(),
|
||||
idxs.data_ptr<int64_t>(),
|
||||
N,
|
||||
@ -525,12 +525,12 @@ std::tuple<at::Tensor, at::Tensor> KNearestNeighborBackwardCuda(
|
||||
const int threads = 512;
|
||||
|
||||
KNearestNeighborBackwardKernel<<<blocks, threads, 0, stream>>>(
|
||||
p1.data_ptr<float>(),
|
||||
p2.data_ptr<float>(),
|
||||
lengths1.data_ptr<int64_t>(),
|
||||
lengths2.data_ptr<int64_t>(),
|
||||
idxs.data_ptr<int64_t>(),
|
||||
grad_dists.data_ptr<float>(),
|
||||
p1.contiguous().data_ptr<float>(),
|
||||
p2.contiguous().data_ptr<float>(),
|
||||
lengths1.contiguous().data_ptr<int64_t>(),
|
||||
lengths2.contiguous().data_ptr<int64_t>(),
|
||||
idxs.contiguous().data_ptr<int64_t>(),
|
||||
grad_dists.contiguous().data_ptr<float>(),
|
||||
grad_p1.data_ptr<float>(),
|
||||
grad_p2.data_ptr<float>(),
|
||||
N,
|
||||
|
@ -56,8 +56,8 @@ std::tuple<at::Tensor, at::Tensor> KNearestNeighborIdx(
|
||||
int version) {
|
||||
if (p1.is_cuda() || p2.is_cuda()) {
|
||||
#ifdef WITH_CUDA
|
||||
CHECK_CONTIGUOUS_CUDA(p1);
|
||||
CHECK_CONTIGUOUS_CUDA(p2);
|
||||
CHECK_CUDA(p1);
|
||||
CHECK_CUDA(p2);
|
||||
return KNearestNeighborIdxCuda(p1, p2, lengths1, lengths2, K, version);
|
||||
#else
|
||||
AT_ERROR("Not compiled with GPU support.");
|
||||
@ -117,8 +117,8 @@ std::tuple<at::Tensor, at::Tensor> KNearestNeighborBackward(
|
||||
const at::Tensor& grad_dists) {
|
||||
if (p1.is_cuda() || p2.is_cuda()) {
|
||||
#ifdef WITH_CUDA
|
||||
CHECK_CONTIGUOUS_CUDA(p1);
|
||||
CHECK_CONTIGUOUS_CUDA(p2);
|
||||
CHECK_CUDA(p1);
|
||||
CHECK_CUDA(p2);
|
||||
return KNearestNeighborBackwardCuda(
|
||||
p1, p2, lengths1, lengths2, idxs, grad_dists);
|
||||
#else
|
||||
|
@ -146,8 +146,8 @@ at::Tensor PackedToPaddedCuda(
|
||||
AT_DISPATCH_FLOATING_TYPES(
|
||||
inputs_packed.scalar_type(), "packed_to_padded_d1_kernel", ([&] {
|
||||
PackedToPaddedKernelD1<scalar_t><<<blocks, threads, 0, stream>>>(
|
||||
inputs_packed.data_ptr<scalar_t>(),
|
||||
first_idxs.data_ptr<int64_t>(),
|
||||
inputs_packed.contiguous().data_ptr<scalar_t>(),
|
||||
first_idxs.contiguous().data_ptr<int64_t>(),
|
||||
inputs_padded.data_ptr<scalar_t>(),
|
||||
batch_size,
|
||||
max_size,
|
||||
@ -157,8 +157,8 @@ at::Tensor PackedToPaddedCuda(
|
||||
AT_DISPATCH_FLOATING_TYPES(
|
||||
inputs_packed.scalar_type(), "packed_to_padded_kernel", ([&] {
|
||||
PackedToPaddedKernel<scalar_t><<<blocks, threads, 0, stream>>>(
|
||||
inputs_packed.data_ptr<scalar_t>(),
|
||||
first_idxs.data_ptr<int64_t>(),
|
||||
inputs_packed.contiguous().data_ptr<scalar_t>(),
|
||||
first_idxs.contiguous().data_ptr<int64_t>(),
|
||||
inputs_padded.data_ptr<scalar_t>(),
|
||||
batch_size,
|
||||
max_size,
|
||||
@ -209,8 +209,8 @@ at::Tensor PaddedToPackedCuda(
|
||||
AT_DISPATCH_FLOATING_TYPES(
|
||||
inputs_padded.scalar_type(), "padded_to_packed_d1_kernel", ([&] {
|
||||
PaddedToPackedKernelD1<scalar_t><<<blocks, threads, 0, stream>>>(
|
||||
inputs_padded.data_ptr<scalar_t>(),
|
||||
first_idxs.data_ptr<int64_t>(),
|
||||
inputs_padded.contiguous().data_ptr<scalar_t>(),
|
||||
first_idxs.contiguous().data_ptr<int64_t>(),
|
||||
inputs_packed.data_ptr<scalar_t>(),
|
||||
batch_size,
|
||||
max_size,
|
||||
@ -220,8 +220,8 @@ at::Tensor PaddedToPackedCuda(
|
||||
AT_DISPATCH_FLOATING_TYPES(
|
||||
inputs_padded.scalar_type(), "padded_to_packed_kernel", ([&] {
|
||||
PaddedToPackedKernel<scalar_t><<<blocks, threads, 0, stream>>>(
|
||||
inputs_padded.data_ptr<scalar_t>(),
|
||||
first_idxs.data_ptr<int64_t>(),
|
||||
inputs_padded.contiguous().data_ptr<scalar_t>(),
|
||||
first_idxs.contiguous().data_ptr<int64_t>(),
|
||||
inputs_packed.data_ptr<scalar_t>(),
|
||||
batch_size,
|
||||
max_size,
|
||||
|
@ -75,8 +75,8 @@ at::Tensor PackedToPadded(
|
||||
const int64_t max_size) {
|
||||
if (inputs_packed.is_cuda()) {
|
||||
#ifdef WITH_CUDA
|
||||
CHECK_CONTIGUOUS_CUDA(inputs_packed);
|
||||
CHECK_CONTIGUOUS_CUDA(first_idxs);
|
||||
CHECK_CUDA(inputs_packed);
|
||||
CHECK_CUDA(first_idxs);
|
||||
return PackedToPaddedCuda(inputs_packed, first_idxs, max_size);
|
||||
#else
|
||||
AT_ERROR("Not compiled with GPU support.");
|
||||
@ -92,8 +92,8 @@ at::Tensor PaddedToPacked(
|
||||
const int64_t num_inputs) {
|
||||
if (inputs_padded.is_cuda()) {
|
||||
#ifdef WITH_CUDA
|
||||
CHECK_CONTIGUOUS_CUDA(inputs_padded);
|
||||
CHECK_CONTIGUOUS_CUDA(first_idxs);
|
||||
CHECK_CUDA(inputs_padded);
|
||||
CHECK_CUDA(first_idxs);
|
||||
return PaddedToPackedCuda(inputs_padded, first_idxs, num_inputs);
|
||||
#else
|
||||
AT_ERROR("Not compiled with GPU support.");
|
||||
|
@ -144,15 +144,16 @@ std::tuple<at::Tensor, at::Tensor> PointEdgeDistanceForwardCuda(
|
||||
size_t shared_size = threads * sizeof(size_t) + threads * sizeof(int64_t);
|
||||
|
||||
PointEdgeForwardKernel<<<blocks, threads, shared_size, stream>>>(
|
||||
points.data_ptr<float>(),
|
||||
points_first_idx.data_ptr<int64_t>(),
|
||||
segms.data_ptr<float>(),
|
||||
segms_first_idx.data_ptr<int64_t>(),
|
||||
points.contiguous().data_ptr<float>(),
|
||||
points_first_idx.contiguous().data_ptr<int64_t>(),
|
||||
segms.contiguous().data_ptr<float>(),
|
||||
segms_first_idx.contiguous().data_ptr<int64_t>(),
|
||||
dists.data_ptr<float>(),
|
||||
idxs.data_ptr<int64_t>(),
|
||||
B,
|
||||
P,
|
||||
S);
|
||||
|
||||
AT_CUDA_CHECK(cudaGetLastError());
|
||||
return std::make_tuple(dists, idxs);
|
||||
}
|
||||
@ -240,10 +241,10 @@ std::tuple<at::Tensor, at::Tensor> PointEdgeDistanceBackwardCuda(
|
||||
const int threads = 512;
|
||||
|
||||
PointEdgeBackwardKernel<<<blocks, threads, 0, stream>>>(
|
||||
points.data_ptr<float>(),
|
||||
segms.data_ptr<float>(),
|
||||
idx_points.data_ptr<int64_t>(),
|
||||
grad_dists.data_ptr<float>(),
|
||||
points.contiguous().data_ptr<float>(),
|
||||
segms.contiguous().data_ptr<float>(),
|
||||
idx_points.contiguous().data_ptr<int64_t>(),
|
||||
grad_dists.contiguous().data_ptr<float>(),
|
||||
grad_points.data_ptr<float>(),
|
||||
grad_segms.data_ptr<float>(),
|
||||
P);
|
||||
@ -386,10 +387,10 @@ std::tuple<at::Tensor, at::Tensor> EdgePointDistanceForwardCuda(
|
||||
size_t shared_size = threads * sizeof(size_t) + threads * sizeof(int64_t);
|
||||
|
||||
EdgePointForwardKernel<<<blocks, threads, shared_size, stream>>>(
|
||||
points.data_ptr<float>(),
|
||||
points_first_idx.data_ptr<int64_t>(),
|
||||
segms.data_ptr<float>(),
|
||||
segms_first_idx.data_ptr<int64_t>(),
|
||||
points.contiguous().data_ptr<float>(),
|
||||
points_first_idx.contiguous().data_ptr<int64_t>(),
|
||||
segms.contiguous().data_ptr<float>(),
|
||||
segms_first_idx.contiguous().data_ptr<int64_t>(),
|
||||
dists.data_ptr<float>(),
|
||||
idxs.data_ptr<int64_t>(),
|
||||
B,
|
||||
@ -478,10 +479,10 @@ std::tuple<at::Tensor, at::Tensor> EdgePointDistanceBackwardCuda(
|
||||
const int threads = 512;
|
||||
|
||||
EdgePointBackwardKernel<<<blocks, threads, 0, stream>>>(
|
||||
points.data_ptr<float>(),
|
||||
segms.data_ptr<float>(),
|
||||
idx_segms.data_ptr<int64_t>(),
|
||||
grad_dists.data_ptr<float>(),
|
||||
points.contiguous().data_ptr<float>(),
|
||||
segms.contiguous().data_ptr<float>(),
|
||||
idx_segms.contiguous().data_ptr<int64_t>(),
|
||||
grad_dists.contiguous().data_ptr<float>(),
|
||||
grad_points.data_ptr<float>(),
|
||||
grad_segms.data_ptr<float>(),
|
||||
S);
|
||||
@ -550,8 +551,8 @@ at::Tensor PointEdgeArrayDistanceForwardCuda(
|
||||
const size_t threads = 64;
|
||||
|
||||
PointEdgeArrayForwardKernel<<<blocks, threads, 0, stream>>>(
|
||||
points.data_ptr<float>(),
|
||||
segms.data_ptr<float>(),
|
||||
points.contiguous().data_ptr<float>(),
|
||||
segms.contiguous().data_ptr<float>(),
|
||||
dists.data_ptr<float>(),
|
||||
P,
|
||||
S);
|
||||
@ -638,9 +639,9 @@ std::tuple<at::Tensor, at::Tensor> PointEdgeArrayDistanceBackwardCuda(
|
||||
const size_t threads = 64;
|
||||
|
||||
PointEdgeArrayBackwardKernel<<<blocks, threads, 0, stream>>>(
|
||||
points.data_ptr<float>(),
|
||||
segms.data_ptr<float>(),
|
||||
grad_dists.data_ptr<float>(),
|
||||
points.contiguous().data_ptr<float>(),
|
||||
segms.contiguous().data_ptr<float>(),
|
||||
grad_dists.contiguous().data_ptr<float>(),
|
||||
grad_points.data_ptr<float>(),
|
||||
grad_segms.data_ptr<float>(),
|
||||
P,
|
||||
|
@ -54,10 +54,10 @@ std::tuple<torch::Tensor, torch::Tensor> PointEdgeDistanceForward(
|
||||
const int64_t max_points) {
|
||||
if (points.is_cuda()) {
|
||||
#ifdef WITH_CUDA
|
||||
CHECK_CONTIGUOUS_CUDA(points);
|
||||
CHECK_CONTIGUOUS_CUDA(points_first_idx);
|
||||
CHECK_CONTIGUOUS_CUDA(segms);
|
||||
CHECK_CONTIGUOUS_CUDA(segms_first_idx);
|
||||
CHECK_CUDA(points);
|
||||
CHECK_CUDA(points_first_idx);
|
||||
CHECK_CUDA(segms);
|
||||
CHECK_CUDA(segms_first_idx);
|
||||
return PointEdgeDistanceForwardCuda(
|
||||
points, points_first_idx, segms, segms_first_idx, max_points);
|
||||
#else
|
||||
@ -98,10 +98,10 @@ std::tuple<torch::Tensor, torch::Tensor> PointEdgeDistanceBackward(
|
||||
const torch::Tensor& grad_dists) {
|
||||
if (points.is_cuda()) {
|
||||
#ifdef WITH_CUDA
|
||||
CHECK_CONTIGUOUS_CUDA(points);
|
||||
CHECK_CONTIGUOUS_CUDA(segms);
|
||||
CHECK_CONTIGUOUS_CUDA(idx_points);
|
||||
CHECK_CONTIGUOUS_CUDA(grad_dists);
|
||||
CHECK_CUDA(points);
|
||||
CHECK_CUDA(segms);
|
||||
CHECK_CUDA(idx_points);
|
||||
CHECK_CUDA(grad_dists);
|
||||
return PointEdgeDistanceBackwardCuda(points, segms, idx_points, grad_dists);
|
||||
#else
|
||||
AT_ERROR("Not compiled with GPU support.");
|
||||
@ -158,10 +158,10 @@ std::tuple<torch::Tensor, torch::Tensor> EdgePointDistanceForward(
|
||||
const int64_t max_segms) {
|
||||
if (points.is_cuda()) {
|
||||
#ifdef WITH_CUDA
|
||||
CHECK_CONTIGUOUS_CUDA(points);
|
||||
CHECK_CONTIGUOUS_CUDA(points_first_idx);
|
||||
CHECK_CONTIGUOUS_CUDA(segms);
|
||||
CHECK_CONTIGUOUS_CUDA(segms_first_idx);
|
||||
CHECK_CUDA(points);
|
||||
CHECK_CUDA(points_first_idx);
|
||||
CHECK_CUDA(segms);
|
||||
CHECK_CUDA(segms_first_idx);
|
||||
return EdgePointDistanceForwardCuda(
|
||||
points, points_first_idx, segms, segms_first_idx, max_segms);
|
||||
#else
|
||||
@ -202,10 +202,10 @@ std::tuple<torch::Tensor, torch::Tensor> EdgePointDistanceBackward(
|
||||
const torch::Tensor& grad_dists) {
|
||||
if (points.is_cuda()) {
|
||||
#ifdef WITH_CUDA
|
||||
CHECK_CONTIGUOUS_CUDA(points);
|
||||
CHECK_CONTIGUOUS_CUDA(segms);
|
||||
CHECK_CONTIGUOUS_CUDA(idx_segms);
|
||||
CHECK_CONTIGUOUS_CUDA(grad_dists);
|
||||
CHECK_CUDA(points);
|
||||
CHECK_CUDA(segms);
|
||||
CHECK_CUDA(idx_segms);
|
||||
CHECK_CUDA(grad_dists);
|
||||
return EdgePointDistanceBackwardCuda(points, segms, idx_segms, grad_dists);
|
||||
#else
|
||||
AT_ERROR("Not compiled with GPU support.");
|
||||
@ -247,8 +247,8 @@ torch::Tensor PointEdgeArrayDistanceForward(
|
||||
const torch::Tensor& segms) {
|
||||
if (points.is_cuda()) {
|
||||
#ifdef WITH_CUDA
|
||||
CHECK_CONTIGUOUS_CUDA(points);
|
||||
CHECK_CONTIGUOUS_CUDA(segms);
|
||||
CHECK_CUDA(points);
|
||||
CHECK_CUDA(segms);
|
||||
return PointEdgeArrayDistanceForwardCuda(points, segms);
|
||||
#else
|
||||
AT_ERROR("Not compiled with GPU support.");
|
||||
@ -283,9 +283,9 @@ std::tuple<torch::Tensor, torch::Tensor> PointEdgeArrayDistanceBackward(
|
||||
const torch::Tensor& grad_dists) {
|
||||
if (points.is_cuda()) {
|
||||
#ifdef WITH_CUDA
|
||||
CHECK_CONTIGUOUS_CUDA(points);
|
||||
CHECK_CONTIGUOUS_CUDA(segms);
|
||||
CHECK_CONTIGUOUS_CUDA(grad_dists);
|
||||
CHECK_CUDA(points);
|
||||
CHECK_CUDA(segms);
|
||||
CHECK_CUDA(grad_dists);
|
||||
return PointEdgeArrayDistanceBackwardCuda(points, segms, grad_dists);
|
||||
#else
|
||||
AT_ERROR("Not compiled with GPU support.");
|
||||
|
@ -145,10 +145,10 @@ std::tuple<at::Tensor, at::Tensor> PointFaceDistanceForwardCuda(
|
||||
size_t shared_size = threads * sizeof(size_t) + threads * sizeof(int64_t);
|
||||
|
||||
PointFaceForwardKernel<<<blocks, threads, shared_size, stream>>>(
|
||||
points.data_ptr<float>(),
|
||||
points_first_idx.data_ptr<int64_t>(),
|
||||
tris.data_ptr<float>(),
|
||||
tris_first_idx.data_ptr<int64_t>(),
|
||||
points.contiguous().data_ptr<float>(),
|
||||
points_first_idx.contiguous().data_ptr<int64_t>(),
|
||||
tris.contiguous().data_ptr<float>(),
|
||||
tris_first_idx.contiguous().data_ptr<int64_t>(),
|
||||
dists.data_ptr<float>(),
|
||||
idxs.data_ptr<int64_t>(),
|
||||
B,
|
||||
@ -249,10 +249,10 @@ std::tuple<at::Tensor, at::Tensor> PointFaceDistanceBackwardCuda(
|
||||
const int threads = 512;
|
||||
|
||||
PointFaceBackwardKernel<<<blocks, threads, 0, stream>>>(
|
||||
points.data_ptr<float>(),
|
||||
tris.data_ptr<float>(),
|
||||
idx_points.data_ptr<int64_t>(),
|
||||
grad_dists.data_ptr<float>(),
|
||||
points.contiguous().data_ptr<float>(),
|
||||
tris.contiguous().data_ptr<float>(),
|
||||
idx_points.contiguous().data_ptr<int64_t>(),
|
||||
grad_dists.contiguous().data_ptr<float>(),
|
||||
grad_points.data_ptr<float>(),
|
||||
grad_tris.data_ptr<float>(),
|
||||
P);
|
||||
@ -396,10 +396,10 @@ std::tuple<at::Tensor, at::Tensor> FacePointDistanceForwardCuda(
|
||||
size_t shared_size = threads * sizeof(size_t) + threads * sizeof(int64_t);
|
||||
|
||||
FacePointForwardKernel<<<blocks, threads, shared_size, stream>>>(
|
||||
points.data_ptr<float>(),
|
||||
points_first_idx.data_ptr<int64_t>(),
|
||||
tris.data_ptr<float>(),
|
||||
tris_first_idx.data_ptr<int64_t>(),
|
||||
points.contiguous().data_ptr<float>(),
|
||||
points_first_idx.contiguous().data_ptr<int64_t>(),
|
||||
tris.contiguous().data_ptr<float>(),
|
||||
tris_first_idx.contiguous().data_ptr<int64_t>(),
|
||||
dists.data_ptr<float>(),
|
||||
idxs.data_ptr<int64_t>(),
|
||||
B,
|
||||
@ -501,10 +501,10 @@ std::tuple<at::Tensor, at::Tensor> FacePointDistanceBackwardCuda(
|
||||
const int threads = 512;
|
||||
|
||||
FacePointBackwardKernel<<<blocks, threads, 0, stream>>>(
|
||||
points.data_ptr<float>(),
|
||||
tris.data_ptr<float>(),
|
||||
idx_tris.data_ptr<int64_t>(),
|
||||
grad_dists.data_ptr<float>(),
|
||||
points.contiguous().data_ptr<float>(),
|
||||
tris.contiguous().data_ptr<float>(),
|
||||
idx_tris.contiguous().data_ptr<int64_t>(),
|
||||
grad_dists.contiguous().data_ptr<float>(),
|
||||
grad_points.data_ptr<float>(),
|
||||
grad_tris.data_ptr<float>(),
|
||||
T);
|
||||
@ -575,8 +575,8 @@ at::Tensor PointFaceArrayDistanceForwardCuda(
|
||||
const size_t threads = 64;
|
||||
|
||||
PointFaceArrayForwardKernel<<<blocks, threads, 0, stream>>>(
|
||||
points.data_ptr<float>(),
|
||||
tris.data_ptr<float>(),
|
||||
points.contiguous().data_ptr<float>(),
|
||||
tris.contiguous().data_ptr<float>(),
|
||||
dists.data_ptr<float>(),
|
||||
P,
|
||||
T);
|
||||
@ -672,9 +672,9 @@ std::tuple<at::Tensor, at::Tensor> PointFaceArrayDistanceBackwardCuda(
|
||||
const size_t threads = 64;
|
||||
|
||||
PointFaceArrayBackwardKernel<<<blocks, threads, 0, stream>>>(
|
||||
points.data_ptr<float>(),
|
||||
tris.data_ptr<float>(),
|
||||
grad_dists.data_ptr<float>(),
|
||||
points.contiguous().data_ptr<float>(),
|
||||
tris.contiguous().data_ptr<float>(),
|
||||
grad_dists.contiguous().data_ptr<float>(),
|
||||
grad_points.data_ptr<float>(),
|
||||
grad_tris.data_ptr<float>(),
|
||||
P,
|
||||
|
@ -56,10 +56,10 @@ std::tuple<torch::Tensor, torch::Tensor> PointFaceDistanceForward(
|
||||
const int64_t max_points) {
|
||||
if (points.is_cuda()) {
|
||||
#ifdef WITH_CUDA
|
||||
CHECK_CONTIGUOUS_CUDA(points);
|
||||
CHECK_CONTIGUOUS_CUDA(points_first_idx);
|
||||
CHECK_CONTIGUOUS_CUDA(tris);
|
||||
CHECK_CONTIGUOUS_CUDA(tris_first_idx);
|
||||
CHECK_CUDA(points);
|
||||
CHECK_CUDA(points_first_idx);
|
||||
CHECK_CUDA(tris);
|
||||
CHECK_CUDA(tris_first_idx);
|
||||
return PointFaceDistanceForwardCuda(
|
||||
points, points_first_idx, tris, tris_first_idx, max_points);
|
||||
#else
|
||||
@ -100,10 +100,10 @@ std::tuple<torch::Tensor, torch::Tensor> PointFaceDistanceBackward(
|
||||
const torch::Tensor& grad_dists) {
|
||||
if (points.is_cuda()) {
|
||||
#ifdef WITH_CUDA
|
||||
CHECK_CONTIGUOUS_CUDA(points);
|
||||
CHECK_CONTIGUOUS_CUDA(tris);
|
||||
CHECK_CONTIGUOUS_CUDA(idx_points);
|
||||
CHECK_CONTIGUOUS_CUDA(grad_dists);
|
||||
CHECK_CUDA(points);
|
||||
CHECK_CUDA(tris);
|
||||
CHECK_CUDA(idx_points);
|
||||
CHECK_CUDA(grad_dists);
|
||||
return PointFaceDistanceBackwardCuda(points, tris, idx_points, grad_dists);
|
||||
#else
|
||||
AT_ERROR("Not compiled with GPU support.");
|
||||
@ -160,10 +160,10 @@ std::tuple<torch::Tensor, torch::Tensor> FacePointDistanceForward(
|
||||
const int64_t max_tris) {
|
||||
if (points.is_cuda()) {
|
||||
#ifdef WITH_CUDA
|
||||
CHECK_CONTIGUOUS_CUDA(points);
|
||||
CHECK_CONTIGUOUS_CUDA(points_first_idx);
|
||||
CHECK_CONTIGUOUS_CUDA(tris);
|
||||
CHECK_CONTIGUOUS_CUDA(tris_first_idx);
|
||||
CHECK_CUDA(points);
|
||||
CHECK_CUDA(points_first_idx);
|
||||
CHECK_CUDA(tris);
|
||||
CHECK_CUDA(tris_first_idx);
|
||||
return FacePointDistanceForwardCuda(
|
||||
points, points_first_idx, tris, tris_first_idx, max_tris);
|
||||
#else
|
||||
@ -204,10 +204,10 @@ std::tuple<torch::Tensor, torch::Tensor> FacePointDistanceBackward(
|
||||
const torch::Tensor& grad_dists) {
|
||||
if (points.is_cuda()) {
|
||||
#ifdef WITH_CUDA
|
||||
CHECK_CONTIGUOUS_CUDA(points);
|
||||
CHECK_CONTIGUOUS_CUDA(tris);
|
||||
CHECK_CONTIGUOUS_CUDA(idx_tris);
|
||||
CHECK_CONTIGUOUS_CUDA(grad_dists);
|
||||
CHECK_CUDA(points);
|
||||
CHECK_CUDA(tris);
|
||||
CHECK_CUDA(idx_tris);
|
||||
CHECK_CUDA(grad_dists);
|
||||
return FacePointDistanceBackwardCuda(points, tris, idx_tris, grad_dists);
|
||||
#else
|
||||
AT_ERROR("Not compiled with GPU support.");
|
||||
@ -250,8 +250,8 @@ torch::Tensor PointFaceArrayDistanceForward(
|
||||
const torch::Tensor& tris) {
|
||||
if (points.is_cuda()) {
|
||||
#ifdef WITH_CUDA
|
||||
CHECK_CONTIGUOUS_CUDA(points);
|
||||
CHECK_CONTIGUOUS_CUDA(tris);
|
||||
CHECK_CUDA(points);
|
||||
CHECK_CUDA(tris);
|
||||
return PointFaceArrayDistanceForwardCuda(points, tris);
|
||||
#else
|
||||
AT_ERROR("Not compiled with GPU support.");
|
||||
@ -285,9 +285,9 @@ std::tuple<torch::Tensor, torch::Tensor> PointFaceArrayDistanceBackward(
|
||||
const torch::Tensor& grad_dists) {
|
||||
if (points.is_cuda()) {
|
||||
#ifdef WITH_CUDA
|
||||
CHECK_CONTIGUOUS_CUDA(points);
|
||||
CHECK_CONTIGUOUS_CUDA(tris);
|
||||
CHECK_CONTIGUOUS_CUDA(grad_dists);
|
||||
CHECK_CUDA(points);
|
||||
CHECK_CUDA(tris);
|
||||
CHECK_CUDA(grad_dists);
|
||||
return PointFaceArrayDistanceBackwardCuda(points, tris, grad_dists);
|
||||
#else
|
||||
AT_ERROR("Not compiled with GPU support.");
|
||||
|
@ -348,10 +348,10 @@ RasterizeMeshesNaiveCuda(
|
||||
H,
|
||||
W,
|
||||
K,
|
||||
face_idxs.contiguous().data_ptr<int64_t>(),
|
||||
zbuf.contiguous().data_ptr<float>(),
|
||||
pix_dists.contiguous().data_ptr<float>(),
|
||||
bary.contiguous().data_ptr<float>());
|
||||
face_idxs.data_ptr<int64_t>(),
|
||||
zbuf.data_ptr<float>(),
|
||||
pix_dists.data_ptr<float>(),
|
||||
bary.data_ptr<float>());
|
||||
|
||||
AT_CUDA_CHECK(cudaGetLastError());
|
||||
return std::make_tuple(face_idxs, zbuf, bary, pix_dists);
|
||||
@ -530,7 +530,7 @@ at::Tensor RasterizeMeshesBackwardCuda(
|
||||
grad_zbuf.contiguous().data_ptr<float>(),
|
||||
grad_bary.contiguous().data_ptr<float>(),
|
||||
grad_dists.contiguous().data_ptr<float>(),
|
||||
grad_face_verts.contiguous().data_ptr<float>());
|
||||
grad_face_verts.data_ptr<float>());
|
||||
|
||||
AT_CUDA_CHECK(cudaGetLastError());
|
||||
return grad_face_verts;
|
||||
@ -727,8 +727,8 @@ at::Tensor RasterizeMeshesCoarseCuda(
|
||||
bin_size,
|
||||
chunk_size,
|
||||
M,
|
||||
faces_per_bin.contiguous().data_ptr<int32_t>(),
|
||||
bin_faces.contiguous().data_ptr<int32_t>());
|
||||
faces_per_bin.data_ptr<int32_t>(),
|
||||
bin_faces.data_ptr<int32_t>());
|
||||
|
||||
AT_CUDA_CHECK(cudaGetLastError());
|
||||
return bin_faces;
|
||||
@ -897,10 +897,10 @@ RasterizeMeshesFineCuda(
|
||||
H,
|
||||
W,
|
||||
K,
|
||||
face_idxs.contiguous().data_ptr<int64_t>(),
|
||||
zbuf.contiguous().data_ptr<float>(),
|
||||
pix_dists.contiguous().data_ptr<float>(),
|
||||
bary.contiguous().data_ptr<float>());
|
||||
face_idxs.data_ptr<int64_t>(),
|
||||
zbuf.data_ptr<float>(),
|
||||
pix_dists.data_ptr<float>(),
|
||||
bary.data_ptr<float>());
|
||||
|
||||
return std::make_tuple(face_idxs, zbuf, bary, pix_dists);
|
||||
}
|
||||
|
@ -96,9 +96,9 @@ RasterizeMeshesNaive(
|
||||
// TODO: Better type checking.
|
||||
if (face_verts.is_cuda()) {
|
||||
#ifdef WITH_CUDA
|
||||
CHECK_CONTIGUOUS_CUDA(face_verts);
|
||||
CHECK_CONTIGUOUS_CUDA(mesh_to_face_first_idx);
|
||||
CHECK_CONTIGUOUS_CUDA(num_faces_per_mesh);
|
||||
CHECK_CUDA(face_verts);
|
||||
CHECK_CUDA(mesh_to_face_first_idx);
|
||||
CHECK_CUDA(num_faces_per_mesh);
|
||||
return RasterizeMeshesNaiveCuda(
|
||||
face_verts,
|
||||
mesh_to_face_first_idx,
|
||||
@ -179,11 +179,11 @@ torch::Tensor RasterizeMeshesBackward(
|
||||
const bool perspective_correct) {
|
||||
if (face_verts.is_cuda()) {
|
||||
#ifdef WITH_CUDA
|
||||
CHECK_CONTIGUOUS_CUDA(face_verts);
|
||||
CHECK_CONTIGUOUS_CUDA(pix_to_face);
|
||||
CHECK_CONTIGUOUS_CUDA(grad_zbuf);
|
||||
CHECK_CONTIGUOUS_CUDA(grad_bary);
|
||||
CHECK_CONTIGUOUS_CUDA(grad_dists);
|
||||
CHECK_CUDA(face_verts);
|
||||
CHECK_CUDA(pix_to_face);
|
||||
CHECK_CUDA(grad_zbuf);
|
||||
CHECK_CUDA(grad_bary);
|
||||
CHECK_CUDA(grad_dists);
|
||||
return RasterizeMeshesBackwardCuda(
|
||||
face_verts,
|
||||
pix_to_face,
|
||||
@ -260,9 +260,9 @@ torch::Tensor RasterizeMeshesCoarse(
|
||||
const int max_faces_per_bin) {
|
||||
if (face_verts.is_cuda()) {
|
||||
#ifdef WITH_CUDA
|
||||
CHECK_CONTIGUOUS_CUDA(face_verts);
|
||||
CHECK_CONTIGUOUS_CUDA(mesh_to_face_first_idx);
|
||||
CHECK_CONTIGUOUS_CUDA(num_faces_per_mesh);
|
||||
CHECK_CUDA(face_verts);
|
||||
CHECK_CUDA(mesh_to_face_first_idx);
|
||||
CHECK_CUDA(num_faces_per_mesh);
|
||||
return RasterizeMeshesCoarseCuda(
|
||||
face_verts,
|
||||
mesh_to_face_first_idx,
|
||||
@ -359,8 +359,8 @@ RasterizeMeshesFine(
|
||||
const bool cull_backfaces) {
|
||||
if (face_verts.is_cuda()) {
|
||||
#ifdef WITH_CUDA
|
||||
CHECK_CONTIGUOUS_CUDA(face_verts);
|
||||
CHECK_CONTIGUOUS_CUDA(bin_faces);
|
||||
CHECK_CUDA(face_verts);
|
||||
CHECK_CUDA(bin_faces);
|
||||
return RasterizeMeshesFineCuda(
|
||||
face_verts,
|
||||
bin_faces,
|
||||
|
@ -67,9 +67,9 @@ std::tuple<torch::Tensor, torch::Tensor, torch::Tensor> RasterizePointsNaive(
|
||||
if (points.is_cuda() && cloud_to_packed_first_idx.is_cuda() &&
|
||||
num_points_per_cloud.is_cuda()) {
|
||||
#ifdef WITH_CUDA
|
||||
CHECK_CONTIGUOUS_CUDA(points);
|
||||
CHECK_CONTIGUOUS_CUDA(cloud_to_packed_first_idx);
|
||||
CHECK_CONTIGUOUS_CUDA(num_points_per_cloud);
|
||||
CHECK_CUDA(points);
|
||||
CHECK_CUDA(cloud_to_packed_first_idx);
|
||||
CHECK_CUDA(num_points_per_cloud);
|
||||
return RasterizePointsNaiveCuda(
|
||||
points,
|
||||
cloud_to_packed_first_idx,
|
||||
@ -144,9 +144,9 @@ torch::Tensor RasterizePointsCoarse(
|
||||
if (points.is_cuda() && cloud_to_packed_first_idx.is_cuda() &&
|
||||
num_points_per_cloud.is_cuda()) {
|
||||
#ifdef WITH_CUDA
|
||||
CHECK_CONTIGUOUS_CUDA(points);
|
||||
CHECK_CONTIGUOUS_CUDA(cloud_to_packed_first_idx);
|
||||
CHECK_CONTIGUOUS_CUDA(num_points_per_cloud);
|
||||
CHECK_CUDA(points);
|
||||
CHECK_CUDA(cloud_to_packed_first_idx);
|
||||
CHECK_CUDA(num_points_per_cloud);
|
||||
return RasterizePointsCoarseCuda(
|
||||
points,
|
||||
cloud_to_packed_first_idx,
|
||||
@ -215,8 +215,8 @@ std::tuple<torch::Tensor, torch::Tensor, torch::Tensor> RasterizePointsFine(
|
||||
const int points_per_pixel) {
|
||||
if (points.is_cuda()) {
|
||||
#ifdef WITH_CUDA
|
||||
CHECK_CONTIGUOUS_CUDA(points);
|
||||
CHECK_CONTIGUOUS_CUDA(bin_points);
|
||||
CHECK_CUDA(points);
|
||||
CHECK_CUDA(bin_points);
|
||||
return RasterizePointsFineCuda(
|
||||
points, bin_points, image_size, radius, bin_size, points_per_pixel);
|
||||
#else
|
||||
@ -266,10 +266,10 @@ torch::Tensor RasterizePointsBackward(
|
||||
const torch::Tensor& grad_dists) {
|
||||
if (points.is_cuda()) {
|
||||
#ifdef WITH_CUDA
|
||||
CHECK_CONTIGUOUS_CUDA(points);
|
||||
CHECK_CONTIGUOUS_CUDA(idxs);
|
||||
CHECK_CONTIGUOUS_CUDA(grad_zbuf);
|
||||
CHECK_CONTIGUOUS_CUDA(grad_dists);
|
||||
CHECK_CUDA(points);
|
||||
CHECK_CUDA(idxs);
|
||||
CHECK_CUDA(grad_zbuf);
|
||||
CHECK_CUDA(grad_dists);
|
||||
return RasterizePointsBackwardCuda(points, idxs, grad_zbuf, grad_dists);
|
||||
#else
|
||||
AT_ERROR("Not compiled with GPU support");
|
||||
|
Loading…
x
Reference in New Issue
Block a user