diff --git a/pytorch3d/csrc/compositing/alpha_composite.cu b/pytorch3d/csrc/compositing/alpha_composite.cu index 27c5d7e2..d9c33bee 100644 --- a/pytorch3d/csrc/compositing/alpha_composite.cu +++ b/pytorch3d/csrc/compositing/alpha_composite.cu @@ -168,6 +168,8 @@ at::Tensor alphaCompositeCudaForward( // doubles. Currently, support is for floats only. alphaCompositeCudaForwardKernel<<>>( // clang-format off + // As we are using packed accessors here the tensors + // do not need to be made contiguous. result.packed_accessor64(), features.packed_accessor64(), alphas.packed_accessor64(), @@ -211,6 +213,8 @@ std::tuple alphaCompositeCudaBackward( // doubles. Currently, support is for floats only. alphaCompositeCudaBackwardKernel<<>>( // clang-format off + // As we are using packed accessors here the tensors + // do not need to be made contiguous. grad_features.packed_accessor64(), grad_alphas.packed_accessor64(), grad_outputs.packed_accessor64(), diff --git a/pytorch3d/csrc/compositing/alpha_composite.h b/pytorch3d/csrc/compositing/alpha_composite.h index 61b1fbbc..735d87e1 100644 --- a/pytorch3d/csrc/compositing/alpha_composite.h +++ b/pytorch3d/csrc/compositing/alpha_composite.h @@ -60,18 +60,14 @@ torch::Tensor alphaCompositeForward( if (features.is_cuda()) { #ifdef WITH_CUDA - CHECK_CONTIGUOUS_CUDA(features); - CHECK_CONTIGUOUS_CUDA(alphas); - CHECK_CONTIGUOUS_CUDA(points_idx); + CHECK_CUDA(features); + CHECK_CUDA(alphas); + CHECK_CUDA(points_idx); return alphaCompositeCudaForward(features, alphas, points_idx); #else AT_ERROR("Not compiled with GPU support"); #endif } else { - CHECK_CONTIGUOUS(features); - CHECK_CONTIGUOUS(alphas); - CHECK_CONTIGUOUS(points_idx); - return alphaCompositeCpuForward(features, alphas, points_idx); } } @@ -88,10 +84,10 @@ std::tuple alphaCompositeBackward( if (grad_outputs.is_cuda()) { #ifdef WITH_CUDA - CHECK_CONTIGUOUS_CUDA(grad_outputs); - CHECK_CONTIGUOUS_CUDA(features); - CHECK_CONTIGUOUS_CUDA(alphas); - CHECK_CONTIGUOUS_CUDA(points_idx); + CHECK_CUDA(grad_outputs); + CHECK_CUDA(features); + CHECK_CUDA(alphas); + CHECK_CUDA(points_idx); return alphaCompositeCudaBackward( grad_outputs, features, alphas, points_idx); @@ -99,11 +95,6 @@ std::tuple alphaCompositeBackward( AT_ERROR("Not compiled with GPU support"); #endif } else { - CHECK_CONTIGUOUS(grad_outputs); - CHECK_CONTIGUOUS(features); - CHECK_CONTIGUOUS(alphas); - CHECK_CONTIGUOUS(points_idx); - return alphaCompositeCpuBackward( grad_outputs, features, alphas, points_idx); } diff --git a/pytorch3d/csrc/compositing/norm_weighted_sum.cu b/pytorch3d/csrc/compositing/norm_weighted_sum.cu index d3d094ff..a787e1fa 100644 --- a/pytorch3d/csrc/compositing/norm_weighted_sum.cu +++ b/pytorch3d/csrc/compositing/norm_weighted_sum.cu @@ -183,6 +183,8 @@ at::Tensor weightedSumNormCudaForward( // doubles. Currently, support is for floats only. // clang-format off weightedSumNormCudaForwardKernel<<>>( + // As we are using packed accessors here the tensors + // do not need to be made contiguous. result.packed_accessor64(), features.packed_accessor64(), alphas.packed_accessor64(), @@ -227,6 +229,8 @@ std::tuple weightedSumNormCudaBackward( // doubles. Currently, support is for floats only. weightedSumNormCudaBackwardKernel<<>>( // clang-format off + // As we are using packed accessors here the tensors + // do not need to be made contiguous. grad_features.packed_accessor64(), grad_alphas.packed_accessor64(), grad_outputs.packed_accessor64(), diff --git a/pytorch3d/csrc/compositing/norm_weighted_sum.h b/pytorch3d/csrc/compositing/norm_weighted_sum.h index 2d17eafc..34c271bc 100644 --- a/pytorch3d/csrc/compositing/norm_weighted_sum.h +++ b/pytorch3d/csrc/compositing/norm_weighted_sum.h @@ -58,19 +58,15 @@ torch::Tensor weightedSumNormForward( if (features.is_cuda()) { #ifdef WITH_CUDA - CHECK_CONTIGUOUS_CUDA(features); - CHECK_CONTIGUOUS_CUDA(alphas); - CHECK_CONTIGUOUS_CUDA(points_idx); + CHECK_CUDA(features); + CHECK_CUDA(alphas); + CHECK_CUDA(points_idx); return weightedSumNormCudaForward(features, alphas, points_idx); #else AT_ERROR("Not compiled with GPU support"); #endif } else { - CHECK_CONTIGUOUS(features); - CHECK_CONTIGUOUS(alphas); - CHECK_CONTIGUOUS(points_idx); - return weightedSumNormCpuForward(features, alphas, points_idx); } } @@ -87,10 +83,10 @@ std::tuple weightedSumNormBackward( if (grad_outputs.is_cuda()) { #ifdef WITH_CUDA - CHECK_CONTIGUOUS_CUDA(grad_outputs); - CHECK_CONTIGUOUS_CUDA(features); - CHECK_CONTIGUOUS_CUDA(alphas); - CHECK_CONTIGUOUS_CUDA(points_idx); + CHECK_CUDA(grad_outputs); + CHECK_CUDA(features); + CHECK_CUDA(alphas); + CHECK_CUDA(points_idx); return weightedSumNormCudaBackward( grad_outputs, features, alphas, points_idx); @@ -98,11 +94,6 @@ std::tuple weightedSumNormBackward( AT_ERROR("Not compiled with GPU support"); #endif } else { - CHECK_CONTIGUOUS(grad_outputs); - CHECK_CONTIGUOUS(features); - CHECK_CONTIGUOUS(alphas); - CHECK_CONTIGUOUS(points_idx); - return weightedSumNormCpuBackward( grad_outputs, features, alphas, points_idx); } diff --git a/pytorch3d/csrc/compositing/weighted_sum.cu b/pytorch3d/csrc/compositing/weighted_sum.cu index 862aea0a..68ec351e 100644 --- a/pytorch3d/csrc/compositing/weighted_sum.cu +++ b/pytorch3d/csrc/compositing/weighted_sum.cu @@ -142,6 +142,8 @@ at::Tensor weightedSumCudaForward( // doubles. Currently, support is for floats only. weightedSumCudaForwardKernel<<>>( // clang-format off + // As we are using packed accessors here the tensors + // do not need to be made contiguous. result.packed_accessor64(), features.packed_accessor64(), alphas.packed_accessor64(), @@ -185,6 +187,8 @@ std::tuple weightedSumCudaBackward( // doubles. Currently, support is for floats only. weightedSumCudaBackwardKernel<<>>( // clang-format off + // As we are using packed accessors here the tensors + // do not need to be made contiguous. grad_features.packed_accessor64(), grad_alphas.packed_accessor64(), grad_outputs.packed_accessor64(), diff --git a/pytorch3d/csrc/compositing/weighted_sum.h b/pytorch3d/csrc/compositing/weighted_sum.h index 89e15809..4928a252 100644 --- a/pytorch3d/csrc/compositing/weighted_sum.h +++ b/pytorch3d/csrc/compositing/weighted_sum.h @@ -58,18 +58,14 @@ torch::Tensor weightedSumForward( if (features.is_cuda()) { #ifdef WITH_CUDA - CHECK_CONTIGUOUS_CUDA(features); - CHECK_CONTIGUOUS_CUDA(alphas); - CHECK_CONTIGUOUS_CUDA(points_idx); + CHECK_CUDA(features); + CHECK_CUDA(alphas); + CHECK_CUDA(points_idx); return weightedSumCudaForward(features, alphas, points_idx); #else AT_ERROR("Not compiled with GPU support"); #endif } else { - CHECK_CONTIGUOUS(features); - CHECK_CONTIGUOUS(alphas); - CHECK_CONTIGUOUS(points_idx); - return weightedSumCpuForward(features, alphas, points_idx); } } @@ -86,21 +82,16 @@ std::tuple weightedSumBackward( if (grad_outputs.is_cuda()) { #ifdef WITH_CUDA - CHECK_CONTIGUOUS_CUDA(grad_outputs); - CHECK_CONTIGUOUS_CUDA(features); - CHECK_CONTIGUOUS_CUDA(alphas); - CHECK_CONTIGUOUS_CUDA(points_idx); + CHECK_CUDA(grad_outputs); + CHECK_CUDA(features); + CHECK_CUDA(alphas); + CHECK_CUDA(points_idx); return weightedSumCudaBackward(grad_outputs, features, alphas, points_idx); #else AT_ERROR("Not compiled with GPU support"); #endif } else { - CHECK_CONTIGUOUS(grad_outputs); - CHECK_CONTIGUOUS(features); - CHECK_CONTIGUOUS(alphas); - CHECK_CONTIGUOUS(points_idx); - return weightedSumCpuBackward(grad_outputs, features, alphas, points_idx); } } diff --git a/pytorch3d/csrc/face_areas_normals/face_areas_normals.cu b/pytorch3d/csrc/face_areas_normals/face_areas_normals.cu index e1ee2261..6e286add 100644 --- a/pytorch3d/csrc/face_areas_normals/face_areas_normals.cu +++ b/pytorch3d/csrc/face_areas_normals/face_areas_normals.cu @@ -239,8 +239,8 @@ std::tuple FaceAreasNormalsForwardCuda( AT_DISPATCH_FLOATING_TYPES( verts.scalar_type(), "face_areas_normals_forward_cuda", ([&] { FaceAreasNormalsForwardKernel<<>>( - verts.data_ptr(), - faces.data_ptr(), + verts.contiguous().data_ptr(), + faces.contiguous().data_ptr(), areas.data_ptr(), normals.data_ptr(), V, @@ -282,10 +282,10 @@ at::Tensor FaceAreasNormalsBackwardCuda( // TODO(gkioxari) add AT_DISPATCH_FLOATING_TYPES once atomicAdd supports // doubles. Currently, support is for floats only. FaceAreasNormalsBackwardKernel<<>>( - grad_areas.data_ptr(), - grad_normals.data_ptr(), - verts.data_ptr(), - faces.data_ptr(), + grad_areas.contiguous().data_ptr(), + grad_normals.contiguous().data_ptr(), + verts.contiguous().data_ptr(), + faces.contiguous().data_ptr(), grad_verts.data_ptr(), V, F); diff --git a/pytorch3d/csrc/face_areas_normals/face_areas_normals.h b/pytorch3d/csrc/face_areas_normals/face_areas_normals.h index 3188ad3b..ecc8827a 100644 --- a/pytorch3d/csrc/face_areas_normals/face_areas_normals.h +++ b/pytorch3d/csrc/face_areas_normals/face_areas_normals.h @@ -47,8 +47,8 @@ std::tuple FaceAreasNormalsForward( const at::Tensor faces) { if (verts.is_cuda() && faces.is_cuda()) { #ifdef WITH_CUDA - CHECK_CONTIGUOUS_CUDA(verts); - CHECK_CONTIGUOUS_CUDA(faces); + CHECK_CUDA(verts); + CHECK_CUDA(faces); return FaceAreasNormalsForwardCuda(verts, faces); #else AT_ERROR("Not compiled with GPU support."); @@ -65,10 +65,10 @@ at::Tensor FaceAreasNormalsBackward( const at::Tensor faces) { if (verts.is_cuda() && faces.is_cuda()) { #ifdef WITH_CUDA - CHECK_CONTIGUOUS_CUDA(verts); - CHECK_CONTIGUOUS_CUDA(faces); - CHECK_CONTIGUOUS_CUDA(grad_areas); - CHECK_CONTIGUOUS_CUDA(grad_normals); + CHECK_CUDA(verts); + CHECK_CUDA(faces); + CHECK_CUDA(grad_areas); + CHECK_CUDA(grad_normals); return FaceAreasNormalsBackwardCuda(grad_areas, grad_normals, verts, faces); #else AT_ERROR("Not compiled with GPU support."); diff --git a/pytorch3d/csrc/gather_scatter/gather_scatter.cu b/pytorch3d/csrc/gather_scatter/gather_scatter.cu index 4740a00e..0e744a11 100644 --- a/pytorch3d/csrc/gather_scatter/gather_scatter.cu +++ b/pytorch3d/csrc/gather_scatter/gather_scatter.cu @@ -72,8 +72,8 @@ at::Tensor GatherScatterCuda( } GatherScatterCudaKernel<<>>( - input.data_ptr(), - edges.data_ptr(), + input.contiguous().data_ptr(), + edges.contiguous().data_ptr(), output.data_ptr(), directed, backward, diff --git a/pytorch3d/csrc/gather_scatter/gather_scatter.h b/pytorch3d/csrc/gather_scatter/gather_scatter.h index 53f3d1ac..864e84ff 100644 --- a/pytorch3d/csrc/gather_scatter/gather_scatter.h +++ b/pytorch3d/csrc/gather_scatter/gather_scatter.h @@ -35,8 +35,8 @@ at::Tensor GatherScatter( bool backward) { if (input.is_cuda() && edges.is_cuda()) { #ifdef WITH_CUDA - CHECK_CONTIGUOUS_CUDA(input); - CHECK_CONTIGUOUS_CUDA(edges); + CHECK_CUDA(input); + CHECK_CUDA(edges); return GatherScatterCuda(input, edges, directed, backward); #else AT_ERROR("Not compiled with GPU support."); diff --git a/pytorch3d/csrc/knn/knn.cu b/pytorch3d/csrc/knn/knn.cu index 4a842f21..c679362e 100644 --- a/pytorch3d/csrc/knn/knn.cu +++ b/pytorch3d/csrc/knn/knn.cu @@ -347,21 +347,21 @@ std::tuple KNearestNeighborIdxCuda( const size_t threads = 256; const size_t blocks = 256; if (version == 0) { - AT_DISPATCH_FLOATING_TYPES(p1.scalar_type(), "knn_kernel_cuda", ([&] { - KNearestNeighborKernelV0 - <<>>( - p1.data_ptr(), - p2.data_ptr(), - lengths1.data_ptr(), - lengths2.data_ptr(), - dists.data_ptr(), - idxs.data_ptr(), - N, - P1, - P2, - D, - K); - })); + AT_DISPATCH_FLOATING_TYPES( + p1.scalar_type(), "knn_kernel_cuda", ([&] { + KNearestNeighborKernelV0<<>>( + p1.contiguous().data_ptr(), + p2.contiguous().data_ptr(), + lengths1.contiguous().data_ptr(), + lengths2.contiguous().data_ptr(), + dists.data_ptr(), + idxs.data_ptr(), + N, + P1, + P2, + D, + K); + })); } else if (version == 1) { AT_DISPATCH_FLOATING_TYPES(p1.scalar_type(), "knn_kernel_cuda", ([&] { DispatchKernel1D< @@ -372,10 +372,10 @@ std::tuple KNearestNeighborIdxCuda( D, blocks, threads, - p1.data_ptr(), - p2.data_ptr(), - lengths1.data_ptr(), - lengths2.data_ptr(), + p1.contiguous().data_ptr(), + p2.contiguous().data_ptr(), + lengths1.contiguous().data_ptr(), + lengths2.contiguous().data_ptr(), dists.data_ptr(), idxs.data_ptr(), N, @@ -396,10 +396,10 @@ std::tuple KNearestNeighborIdxCuda( K_64, blocks, threads, - p1.data_ptr(), - p2.data_ptr(), - lengths1.data_ptr(), - lengths2.data_ptr(), + p1.contiguous().data_ptr(), + p2.contiguous().data_ptr(), + lengths1.contiguous().data_ptr(), + lengths2.contiguous().data_ptr(), dists.data_ptr(), idxs.data_ptr(), N, @@ -419,10 +419,10 @@ std::tuple KNearestNeighborIdxCuda( K_64, blocks, threads, - p1.data_ptr(), - p2.data_ptr(), - lengths1.data_ptr(), - lengths2.data_ptr(), + p1.contiguous().data_ptr(), + p2.contiguous().data_ptr(), + lengths1.contiguous().data_ptr(), + lengths2.contiguous().data_ptr(), dists.data_ptr(), idxs.data_ptr(), N, @@ -525,12 +525,12 @@ std::tuple KNearestNeighborBackwardCuda( const int threads = 512; KNearestNeighborBackwardKernel<<>>( - p1.data_ptr(), - p2.data_ptr(), - lengths1.data_ptr(), - lengths2.data_ptr(), - idxs.data_ptr(), - grad_dists.data_ptr(), + p1.contiguous().data_ptr(), + p2.contiguous().data_ptr(), + lengths1.contiguous().data_ptr(), + lengths2.contiguous().data_ptr(), + idxs.contiguous().data_ptr(), + grad_dists.contiguous().data_ptr(), grad_p1.data_ptr(), grad_p2.data_ptr(), N, diff --git a/pytorch3d/csrc/knn/knn.h b/pytorch3d/csrc/knn/knn.h index 9a4b42f6..77c83221 100644 --- a/pytorch3d/csrc/knn/knn.h +++ b/pytorch3d/csrc/knn/knn.h @@ -56,8 +56,8 @@ std::tuple KNearestNeighborIdx( int version) { if (p1.is_cuda() || p2.is_cuda()) { #ifdef WITH_CUDA - CHECK_CONTIGUOUS_CUDA(p1); - CHECK_CONTIGUOUS_CUDA(p2); + CHECK_CUDA(p1); + CHECK_CUDA(p2); return KNearestNeighborIdxCuda(p1, p2, lengths1, lengths2, K, version); #else AT_ERROR("Not compiled with GPU support."); @@ -117,8 +117,8 @@ std::tuple KNearestNeighborBackward( const at::Tensor& grad_dists) { if (p1.is_cuda() || p2.is_cuda()) { #ifdef WITH_CUDA - CHECK_CONTIGUOUS_CUDA(p1); - CHECK_CONTIGUOUS_CUDA(p2); + CHECK_CUDA(p1); + CHECK_CUDA(p2); return KNearestNeighborBackwardCuda( p1, p2, lengths1, lengths2, idxs, grad_dists); #else diff --git a/pytorch3d/csrc/packed_to_padded_tensor/packed_to_padded_tensor.cu b/pytorch3d/csrc/packed_to_padded_tensor/packed_to_padded_tensor.cu index 09e408a7..f185c674 100644 --- a/pytorch3d/csrc/packed_to_padded_tensor/packed_to_padded_tensor.cu +++ b/pytorch3d/csrc/packed_to_padded_tensor/packed_to_padded_tensor.cu @@ -146,8 +146,8 @@ at::Tensor PackedToPaddedCuda( AT_DISPATCH_FLOATING_TYPES( inputs_packed.scalar_type(), "packed_to_padded_d1_kernel", ([&] { PackedToPaddedKernelD1<<>>( - inputs_packed.data_ptr(), - first_idxs.data_ptr(), + inputs_packed.contiguous().data_ptr(), + first_idxs.contiguous().data_ptr(), inputs_padded.data_ptr(), batch_size, max_size, @@ -157,8 +157,8 @@ at::Tensor PackedToPaddedCuda( AT_DISPATCH_FLOATING_TYPES( inputs_packed.scalar_type(), "packed_to_padded_kernel", ([&] { PackedToPaddedKernel<<>>( - inputs_packed.data_ptr(), - first_idxs.data_ptr(), + inputs_packed.contiguous().data_ptr(), + first_idxs.contiguous().data_ptr(), inputs_padded.data_ptr(), batch_size, max_size, @@ -209,8 +209,8 @@ at::Tensor PaddedToPackedCuda( AT_DISPATCH_FLOATING_TYPES( inputs_padded.scalar_type(), "padded_to_packed_d1_kernel", ([&] { PaddedToPackedKernelD1<<>>( - inputs_padded.data_ptr(), - first_idxs.data_ptr(), + inputs_padded.contiguous().data_ptr(), + first_idxs.contiguous().data_ptr(), inputs_packed.data_ptr(), batch_size, max_size, @@ -220,8 +220,8 @@ at::Tensor PaddedToPackedCuda( AT_DISPATCH_FLOATING_TYPES( inputs_padded.scalar_type(), "padded_to_packed_kernel", ([&] { PaddedToPackedKernel<<>>( - inputs_padded.data_ptr(), - first_idxs.data_ptr(), + inputs_padded.contiguous().data_ptr(), + first_idxs.contiguous().data_ptr(), inputs_packed.data_ptr(), batch_size, max_size, diff --git a/pytorch3d/csrc/packed_to_padded_tensor/packed_to_padded_tensor.h b/pytorch3d/csrc/packed_to_padded_tensor/packed_to_padded_tensor.h index 234cf084..326f4dcb 100644 --- a/pytorch3d/csrc/packed_to_padded_tensor/packed_to_padded_tensor.h +++ b/pytorch3d/csrc/packed_to_padded_tensor/packed_to_padded_tensor.h @@ -75,8 +75,8 @@ at::Tensor PackedToPadded( const int64_t max_size) { if (inputs_packed.is_cuda()) { #ifdef WITH_CUDA - CHECK_CONTIGUOUS_CUDA(inputs_packed); - CHECK_CONTIGUOUS_CUDA(first_idxs); + CHECK_CUDA(inputs_packed); + CHECK_CUDA(first_idxs); return PackedToPaddedCuda(inputs_packed, first_idxs, max_size); #else AT_ERROR("Not compiled with GPU support."); @@ -92,8 +92,8 @@ at::Tensor PaddedToPacked( const int64_t num_inputs) { if (inputs_padded.is_cuda()) { #ifdef WITH_CUDA - CHECK_CONTIGUOUS_CUDA(inputs_padded); - CHECK_CONTIGUOUS_CUDA(first_idxs); + CHECK_CUDA(inputs_padded); + CHECK_CUDA(first_idxs); return PaddedToPackedCuda(inputs_padded, first_idxs, num_inputs); #else AT_ERROR("Not compiled with GPU support."); diff --git a/pytorch3d/csrc/point_mesh/point_mesh_edge.cu b/pytorch3d/csrc/point_mesh/point_mesh_edge.cu index 5b438a10..98db3bd2 100644 --- a/pytorch3d/csrc/point_mesh/point_mesh_edge.cu +++ b/pytorch3d/csrc/point_mesh/point_mesh_edge.cu @@ -144,15 +144,16 @@ std::tuple PointEdgeDistanceForwardCuda( size_t shared_size = threads * sizeof(size_t) + threads * sizeof(int64_t); PointEdgeForwardKernel<<>>( - points.data_ptr(), - points_first_idx.data_ptr(), - segms.data_ptr(), - segms_first_idx.data_ptr(), + points.contiguous().data_ptr(), + points_first_idx.contiguous().data_ptr(), + segms.contiguous().data_ptr(), + segms_first_idx.contiguous().data_ptr(), dists.data_ptr(), idxs.data_ptr(), B, P, S); + AT_CUDA_CHECK(cudaGetLastError()); return std::make_tuple(dists, idxs); } @@ -240,10 +241,10 @@ std::tuple PointEdgeDistanceBackwardCuda( const int threads = 512; PointEdgeBackwardKernel<<>>( - points.data_ptr(), - segms.data_ptr(), - idx_points.data_ptr(), - grad_dists.data_ptr(), + points.contiguous().data_ptr(), + segms.contiguous().data_ptr(), + idx_points.contiguous().data_ptr(), + grad_dists.contiguous().data_ptr(), grad_points.data_ptr(), grad_segms.data_ptr(), P); @@ -386,10 +387,10 @@ std::tuple EdgePointDistanceForwardCuda( size_t shared_size = threads * sizeof(size_t) + threads * sizeof(int64_t); EdgePointForwardKernel<<>>( - points.data_ptr(), - points_first_idx.data_ptr(), - segms.data_ptr(), - segms_first_idx.data_ptr(), + points.contiguous().data_ptr(), + points_first_idx.contiguous().data_ptr(), + segms.contiguous().data_ptr(), + segms_first_idx.contiguous().data_ptr(), dists.data_ptr(), idxs.data_ptr(), B, @@ -478,10 +479,10 @@ std::tuple EdgePointDistanceBackwardCuda( const int threads = 512; EdgePointBackwardKernel<<>>( - points.data_ptr(), - segms.data_ptr(), - idx_segms.data_ptr(), - grad_dists.data_ptr(), + points.contiguous().data_ptr(), + segms.contiguous().data_ptr(), + idx_segms.contiguous().data_ptr(), + grad_dists.contiguous().data_ptr(), grad_points.data_ptr(), grad_segms.data_ptr(), S); @@ -550,8 +551,8 @@ at::Tensor PointEdgeArrayDistanceForwardCuda( const size_t threads = 64; PointEdgeArrayForwardKernel<<>>( - points.data_ptr(), - segms.data_ptr(), + points.contiguous().data_ptr(), + segms.contiguous().data_ptr(), dists.data_ptr(), P, S); @@ -638,9 +639,9 @@ std::tuple PointEdgeArrayDistanceBackwardCuda( const size_t threads = 64; PointEdgeArrayBackwardKernel<<>>( - points.data_ptr(), - segms.data_ptr(), - grad_dists.data_ptr(), + points.contiguous().data_ptr(), + segms.contiguous().data_ptr(), + grad_dists.contiguous().data_ptr(), grad_points.data_ptr(), grad_segms.data_ptr(), P, diff --git a/pytorch3d/csrc/point_mesh/point_mesh_edge.h b/pytorch3d/csrc/point_mesh/point_mesh_edge.h index 2f72a746..96382017 100644 --- a/pytorch3d/csrc/point_mesh/point_mesh_edge.h +++ b/pytorch3d/csrc/point_mesh/point_mesh_edge.h @@ -54,10 +54,10 @@ std::tuple PointEdgeDistanceForward( const int64_t max_points) { if (points.is_cuda()) { #ifdef WITH_CUDA - CHECK_CONTIGUOUS_CUDA(points); - CHECK_CONTIGUOUS_CUDA(points_first_idx); - CHECK_CONTIGUOUS_CUDA(segms); - CHECK_CONTIGUOUS_CUDA(segms_first_idx); + CHECK_CUDA(points); + CHECK_CUDA(points_first_idx); + CHECK_CUDA(segms); + CHECK_CUDA(segms_first_idx); return PointEdgeDistanceForwardCuda( points, points_first_idx, segms, segms_first_idx, max_points); #else @@ -98,10 +98,10 @@ std::tuple PointEdgeDistanceBackward( const torch::Tensor& grad_dists) { if (points.is_cuda()) { #ifdef WITH_CUDA - CHECK_CONTIGUOUS_CUDA(points); - CHECK_CONTIGUOUS_CUDA(segms); - CHECK_CONTIGUOUS_CUDA(idx_points); - CHECK_CONTIGUOUS_CUDA(grad_dists); + CHECK_CUDA(points); + CHECK_CUDA(segms); + CHECK_CUDA(idx_points); + CHECK_CUDA(grad_dists); return PointEdgeDistanceBackwardCuda(points, segms, idx_points, grad_dists); #else AT_ERROR("Not compiled with GPU support."); @@ -158,10 +158,10 @@ std::tuple EdgePointDistanceForward( const int64_t max_segms) { if (points.is_cuda()) { #ifdef WITH_CUDA - CHECK_CONTIGUOUS_CUDA(points); - CHECK_CONTIGUOUS_CUDA(points_first_idx); - CHECK_CONTIGUOUS_CUDA(segms); - CHECK_CONTIGUOUS_CUDA(segms_first_idx); + CHECK_CUDA(points); + CHECK_CUDA(points_first_idx); + CHECK_CUDA(segms); + CHECK_CUDA(segms_first_idx); return EdgePointDistanceForwardCuda( points, points_first_idx, segms, segms_first_idx, max_segms); #else @@ -202,10 +202,10 @@ std::tuple EdgePointDistanceBackward( const torch::Tensor& grad_dists) { if (points.is_cuda()) { #ifdef WITH_CUDA - CHECK_CONTIGUOUS_CUDA(points); - CHECK_CONTIGUOUS_CUDA(segms); - CHECK_CONTIGUOUS_CUDA(idx_segms); - CHECK_CONTIGUOUS_CUDA(grad_dists); + CHECK_CUDA(points); + CHECK_CUDA(segms); + CHECK_CUDA(idx_segms); + CHECK_CUDA(grad_dists); return EdgePointDistanceBackwardCuda(points, segms, idx_segms, grad_dists); #else AT_ERROR("Not compiled with GPU support."); @@ -247,8 +247,8 @@ torch::Tensor PointEdgeArrayDistanceForward( const torch::Tensor& segms) { if (points.is_cuda()) { #ifdef WITH_CUDA - CHECK_CONTIGUOUS_CUDA(points); - CHECK_CONTIGUOUS_CUDA(segms); + CHECK_CUDA(points); + CHECK_CUDA(segms); return PointEdgeArrayDistanceForwardCuda(points, segms); #else AT_ERROR("Not compiled with GPU support."); @@ -283,9 +283,9 @@ std::tuple PointEdgeArrayDistanceBackward( const torch::Tensor& grad_dists) { if (points.is_cuda()) { #ifdef WITH_CUDA - CHECK_CONTIGUOUS_CUDA(points); - CHECK_CONTIGUOUS_CUDA(segms); - CHECK_CONTIGUOUS_CUDA(grad_dists); + CHECK_CUDA(points); + CHECK_CUDA(segms); + CHECK_CUDA(grad_dists); return PointEdgeArrayDistanceBackwardCuda(points, segms, grad_dists); #else AT_ERROR("Not compiled with GPU support."); diff --git a/pytorch3d/csrc/point_mesh/point_mesh_face.cu b/pytorch3d/csrc/point_mesh/point_mesh_face.cu index 9b1b22e4..d43cfe7b 100644 --- a/pytorch3d/csrc/point_mesh/point_mesh_face.cu +++ b/pytorch3d/csrc/point_mesh/point_mesh_face.cu @@ -145,10 +145,10 @@ std::tuple PointFaceDistanceForwardCuda( size_t shared_size = threads * sizeof(size_t) + threads * sizeof(int64_t); PointFaceForwardKernel<<>>( - points.data_ptr(), - points_first_idx.data_ptr(), - tris.data_ptr(), - tris_first_idx.data_ptr(), + points.contiguous().data_ptr(), + points_first_idx.contiguous().data_ptr(), + tris.contiguous().data_ptr(), + tris_first_idx.contiguous().data_ptr(), dists.data_ptr(), idxs.data_ptr(), B, @@ -249,10 +249,10 @@ std::tuple PointFaceDistanceBackwardCuda( const int threads = 512; PointFaceBackwardKernel<<>>( - points.data_ptr(), - tris.data_ptr(), - idx_points.data_ptr(), - grad_dists.data_ptr(), + points.contiguous().data_ptr(), + tris.contiguous().data_ptr(), + idx_points.contiguous().data_ptr(), + grad_dists.contiguous().data_ptr(), grad_points.data_ptr(), grad_tris.data_ptr(), P); @@ -396,10 +396,10 @@ std::tuple FacePointDistanceForwardCuda( size_t shared_size = threads * sizeof(size_t) + threads * sizeof(int64_t); FacePointForwardKernel<<>>( - points.data_ptr(), - points_first_idx.data_ptr(), - tris.data_ptr(), - tris_first_idx.data_ptr(), + points.contiguous().data_ptr(), + points_first_idx.contiguous().data_ptr(), + tris.contiguous().data_ptr(), + tris_first_idx.contiguous().data_ptr(), dists.data_ptr(), idxs.data_ptr(), B, @@ -501,10 +501,10 @@ std::tuple FacePointDistanceBackwardCuda( const int threads = 512; FacePointBackwardKernel<<>>( - points.data_ptr(), - tris.data_ptr(), - idx_tris.data_ptr(), - grad_dists.data_ptr(), + points.contiguous().data_ptr(), + tris.contiguous().data_ptr(), + idx_tris.contiguous().data_ptr(), + grad_dists.contiguous().data_ptr(), grad_points.data_ptr(), grad_tris.data_ptr(), T); @@ -575,8 +575,8 @@ at::Tensor PointFaceArrayDistanceForwardCuda( const size_t threads = 64; PointFaceArrayForwardKernel<<>>( - points.data_ptr(), - tris.data_ptr(), + points.contiguous().data_ptr(), + tris.contiguous().data_ptr(), dists.data_ptr(), P, T); @@ -672,9 +672,9 @@ std::tuple PointFaceArrayDistanceBackwardCuda( const size_t threads = 64; PointFaceArrayBackwardKernel<<>>( - points.data_ptr(), - tris.data_ptr(), - grad_dists.data_ptr(), + points.contiguous().data_ptr(), + tris.contiguous().data_ptr(), + grad_dists.contiguous().data_ptr(), grad_points.data_ptr(), grad_tris.data_ptr(), P, diff --git a/pytorch3d/csrc/point_mesh/point_mesh_face.h b/pytorch3d/csrc/point_mesh/point_mesh_face.h index 39b9b359..00f5eb0a 100644 --- a/pytorch3d/csrc/point_mesh/point_mesh_face.h +++ b/pytorch3d/csrc/point_mesh/point_mesh_face.h @@ -56,10 +56,10 @@ std::tuple PointFaceDistanceForward( const int64_t max_points) { if (points.is_cuda()) { #ifdef WITH_CUDA - CHECK_CONTIGUOUS_CUDA(points); - CHECK_CONTIGUOUS_CUDA(points_first_idx); - CHECK_CONTIGUOUS_CUDA(tris); - CHECK_CONTIGUOUS_CUDA(tris_first_idx); + CHECK_CUDA(points); + CHECK_CUDA(points_first_idx); + CHECK_CUDA(tris); + CHECK_CUDA(tris_first_idx); return PointFaceDistanceForwardCuda( points, points_first_idx, tris, tris_first_idx, max_points); #else @@ -100,10 +100,10 @@ std::tuple PointFaceDistanceBackward( const torch::Tensor& grad_dists) { if (points.is_cuda()) { #ifdef WITH_CUDA - CHECK_CONTIGUOUS_CUDA(points); - CHECK_CONTIGUOUS_CUDA(tris); - CHECK_CONTIGUOUS_CUDA(idx_points); - CHECK_CONTIGUOUS_CUDA(grad_dists); + CHECK_CUDA(points); + CHECK_CUDA(tris); + CHECK_CUDA(idx_points); + CHECK_CUDA(grad_dists); return PointFaceDistanceBackwardCuda(points, tris, idx_points, grad_dists); #else AT_ERROR("Not compiled with GPU support."); @@ -160,10 +160,10 @@ std::tuple FacePointDistanceForward( const int64_t max_tris) { if (points.is_cuda()) { #ifdef WITH_CUDA - CHECK_CONTIGUOUS_CUDA(points); - CHECK_CONTIGUOUS_CUDA(points_first_idx); - CHECK_CONTIGUOUS_CUDA(tris); - CHECK_CONTIGUOUS_CUDA(tris_first_idx); + CHECK_CUDA(points); + CHECK_CUDA(points_first_idx); + CHECK_CUDA(tris); + CHECK_CUDA(tris_first_idx); return FacePointDistanceForwardCuda( points, points_first_idx, tris, tris_first_idx, max_tris); #else @@ -204,10 +204,10 @@ std::tuple FacePointDistanceBackward( const torch::Tensor& grad_dists) { if (points.is_cuda()) { #ifdef WITH_CUDA - CHECK_CONTIGUOUS_CUDA(points); - CHECK_CONTIGUOUS_CUDA(tris); - CHECK_CONTIGUOUS_CUDA(idx_tris); - CHECK_CONTIGUOUS_CUDA(grad_dists); + CHECK_CUDA(points); + CHECK_CUDA(tris); + CHECK_CUDA(idx_tris); + CHECK_CUDA(grad_dists); return FacePointDistanceBackwardCuda(points, tris, idx_tris, grad_dists); #else AT_ERROR("Not compiled with GPU support."); @@ -250,8 +250,8 @@ torch::Tensor PointFaceArrayDistanceForward( const torch::Tensor& tris) { if (points.is_cuda()) { #ifdef WITH_CUDA - CHECK_CONTIGUOUS_CUDA(points); - CHECK_CONTIGUOUS_CUDA(tris); + CHECK_CUDA(points); + CHECK_CUDA(tris); return PointFaceArrayDistanceForwardCuda(points, tris); #else AT_ERROR("Not compiled with GPU support."); @@ -285,9 +285,9 @@ std::tuple PointFaceArrayDistanceBackward( const torch::Tensor& grad_dists) { if (points.is_cuda()) { #ifdef WITH_CUDA - CHECK_CONTIGUOUS_CUDA(points); - CHECK_CONTIGUOUS_CUDA(tris); - CHECK_CONTIGUOUS_CUDA(grad_dists); + CHECK_CUDA(points); + CHECK_CUDA(tris); + CHECK_CUDA(grad_dists); return PointFaceArrayDistanceBackwardCuda(points, tris, grad_dists); #else AT_ERROR("Not compiled with GPU support."); diff --git a/pytorch3d/csrc/rasterize_meshes/rasterize_meshes.cu b/pytorch3d/csrc/rasterize_meshes/rasterize_meshes.cu index 21f72c8f..e280b3f7 100644 --- a/pytorch3d/csrc/rasterize_meshes/rasterize_meshes.cu +++ b/pytorch3d/csrc/rasterize_meshes/rasterize_meshes.cu @@ -348,10 +348,10 @@ RasterizeMeshesNaiveCuda( H, W, K, - face_idxs.contiguous().data_ptr(), - zbuf.contiguous().data_ptr(), - pix_dists.contiguous().data_ptr(), - bary.contiguous().data_ptr()); + face_idxs.data_ptr(), + zbuf.data_ptr(), + pix_dists.data_ptr(), + bary.data_ptr()); AT_CUDA_CHECK(cudaGetLastError()); return std::make_tuple(face_idxs, zbuf, bary, pix_dists); @@ -530,7 +530,7 @@ at::Tensor RasterizeMeshesBackwardCuda( grad_zbuf.contiguous().data_ptr(), grad_bary.contiguous().data_ptr(), grad_dists.contiguous().data_ptr(), - grad_face_verts.contiguous().data_ptr()); + grad_face_verts.data_ptr()); AT_CUDA_CHECK(cudaGetLastError()); return grad_face_verts; @@ -727,8 +727,8 @@ at::Tensor RasterizeMeshesCoarseCuda( bin_size, chunk_size, M, - faces_per_bin.contiguous().data_ptr(), - bin_faces.contiguous().data_ptr()); + faces_per_bin.data_ptr(), + bin_faces.data_ptr()); AT_CUDA_CHECK(cudaGetLastError()); return bin_faces; @@ -897,10 +897,10 @@ RasterizeMeshesFineCuda( H, W, K, - face_idxs.contiguous().data_ptr(), - zbuf.contiguous().data_ptr(), - pix_dists.contiguous().data_ptr(), - bary.contiguous().data_ptr()); + face_idxs.data_ptr(), + zbuf.data_ptr(), + pix_dists.data_ptr(), + bary.data_ptr()); return std::make_tuple(face_idxs, zbuf, bary, pix_dists); } diff --git a/pytorch3d/csrc/rasterize_meshes/rasterize_meshes.h b/pytorch3d/csrc/rasterize_meshes/rasterize_meshes.h index 4f8f4044..54031b17 100644 --- a/pytorch3d/csrc/rasterize_meshes/rasterize_meshes.h +++ b/pytorch3d/csrc/rasterize_meshes/rasterize_meshes.h @@ -96,9 +96,9 @@ RasterizeMeshesNaive( // TODO: Better type checking. if (face_verts.is_cuda()) { #ifdef WITH_CUDA - CHECK_CONTIGUOUS_CUDA(face_verts); - CHECK_CONTIGUOUS_CUDA(mesh_to_face_first_idx); - CHECK_CONTIGUOUS_CUDA(num_faces_per_mesh); + CHECK_CUDA(face_verts); + CHECK_CUDA(mesh_to_face_first_idx); + CHECK_CUDA(num_faces_per_mesh); return RasterizeMeshesNaiveCuda( face_verts, mesh_to_face_first_idx, @@ -179,11 +179,11 @@ torch::Tensor RasterizeMeshesBackward( const bool perspective_correct) { if (face_verts.is_cuda()) { #ifdef WITH_CUDA - CHECK_CONTIGUOUS_CUDA(face_verts); - CHECK_CONTIGUOUS_CUDA(pix_to_face); - CHECK_CONTIGUOUS_CUDA(grad_zbuf); - CHECK_CONTIGUOUS_CUDA(grad_bary); - CHECK_CONTIGUOUS_CUDA(grad_dists); + CHECK_CUDA(face_verts); + CHECK_CUDA(pix_to_face); + CHECK_CUDA(grad_zbuf); + CHECK_CUDA(grad_bary); + CHECK_CUDA(grad_dists); return RasterizeMeshesBackwardCuda( face_verts, pix_to_face, @@ -260,9 +260,9 @@ torch::Tensor RasterizeMeshesCoarse( const int max_faces_per_bin) { if (face_verts.is_cuda()) { #ifdef WITH_CUDA - CHECK_CONTIGUOUS_CUDA(face_verts); - CHECK_CONTIGUOUS_CUDA(mesh_to_face_first_idx); - CHECK_CONTIGUOUS_CUDA(num_faces_per_mesh); + CHECK_CUDA(face_verts); + CHECK_CUDA(mesh_to_face_first_idx); + CHECK_CUDA(num_faces_per_mesh); return RasterizeMeshesCoarseCuda( face_verts, mesh_to_face_first_idx, @@ -359,8 +359,8 @@ RasterizeMeshesFine( const bool cull_backfaces) { if (face_verts.is_cuda()) { #ifdef WITH_CUDA - CHECK_CONTIGUOUS_CUDA(face_verts); - CHECK_CONTIGUOUS_CUDA(bin_faces); + CHECK_CUDA(face_verts); + CHECK_CUDA(bin_faces); return RasterizeMeshesFineCuda( face_verts, bin_faces, diff --git a/pytorch3d/csrc/rasterize_points/rasterize_points.h b/pytorch3d/csrc/rasterize_points/rasterize_points.h index 9360c020..6f557e05 100644 --- a/pytorch3d/csrc/rasterize_points/rasterize_points.h +++ b/pytorch3d/csrc/rasterize_points/rasterize_points.h @@ -67,9 +67,9 @@ std::tuple RasterizePointsNaive( if (points.is_cuda() && cloud_to_packed_first_idx.is_cuda() && num_points_per_cloud.is_cuda()) { #ifdef WITH_CUDA - CHECK_CONTIGUOUS_CUDA(points); - CHECK_CONTIGUOUS_CUDA(cloud_to_packed_first_idx); - CHECK_CONTIGUOUS_CUDA(num_points_per_cloud); + CHECK_CUDA(points); + CHECK_CUDA(cloud_to_packed_first_idx); + CHECK_CUDA(num_points_per_cloud); return RasterizePointsNaiveCuda( points, cloud_to_packed_first_idx, @@ -144,9 +144,9 @@ torch::Tensor RasterizePointsCoarse( if (points.is_cuda() && cloud_to_packed_first_idx.is_cuda() && num_points_per_cloud.is_cuda()) { #ifdef WITH_CUDA - CHECK_CONTIGUOUS_CUDA(points); - CHECK_CONTIGUOUS_CUDA(cloud_to_packed_first_idx); - CHECK_CONTIGUOUS_CUDA(num_points_per_cloud); + CHECK_CUDA(points); + CHECK_CUDA(cloud_to_packed_first_idx); + CHECK_CUDA(num_points_per_cloud); return RasterizePointsCoarseCuda( points, cloud_to_packed_first_idx, @@ -215,8 +215,8 @@ std::tuple RasterizePointsFine( const int points_per_pixel) { if (points.is_cuda()) { #ifdef WITH_CUDA - CHECK_CONTIGUOUS_CUDA(points); - CHECK_CONTIGUOUS_CUDA(bin_points); + CHECK_CUDA(points); + CHECK_CUDA(bin_points); return RasterizePointsFineCuda( points, bin_points, image_size, radius, bin_size, points_per_pixel); #else @@ -266,10 +266,10 @@ torch::Tensor RasterizePointsBackward( const torch::Tensor& grad_dists) { if (points.is_cuda()) { #ifdef WITH_CUDA - CHECK_CONTIGUOUS_CUDA(points); - CHECK_CONTIGUOUS_CUDA(idxs); - CHECK_CONTIGUOUS_CUDA(grad_zbuf); - CHECK_CONTIGUOUS_CUDA(grad_dists); + CHECK_CUDA(points); + CHECK_CUDA(idxs); + CHECK_CUDA(grad_zbuf); + CHECK_CUDA(grad_dists); return RasterizePointsBackwardCuda(points, idxs, grad_zbuf, grad_dists); #else AT_ERROR("Not compiled with GPU support");