mirror of
https://github.com/facebookresearch/pytorch3d.git
synced 2025-12-20 14:20:38 +08:00
examples and docs.
Summary: This diff updates the documentation and tutorials with information about the new pulsar backend. For more information about the pulsar backend, see the release notes and the paper (https://arxiv.org/abs/2004.07484). For information on how to use the backend, see the point cloud rendering notebook and the examples in the folder docs/examples. Reviewed By: nikhilaravi Differential Revision: D24498129 fbshipit-source-id: e312b0169a72b13590df6e4db36bfe6190d742f9
This commit is contained in:
committed by
Facebook GitHub Bot
parent
960fd6d8b6
commit
039e02601d
@@ -38,9 +38,27 @@
|
||||
// Don't care about pytorch warnings; they shouldn't clutter our warnings.
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Weverything"
|
||||
#include <ATen/cuda/CUDAContext.h>
|
||||
#include <torch/extension.h>
|
||||
#pragma clang diagnostic pop
|
||||
#ifdef WITH_CUDA
|
||||
#include <ATen/cuda/CUDAContext.h>
|
||||
#else
|
||||
#ifndef cudaStream_t
|
||||
typedef void* cudaStream_t;
|
||||
#endif
|
||||
struct int2 {
|
||||
int x, y;
|
||||
};
|
||||
struct ushort2 {
|
||||
unsigned short x, y;
|
||||
};
|
||||
struct float2 {
|
||||
float x, y;
|
||||
};
|
||||
struct float3 {
|
||||
float x, y, z;
|
||||
};
|
||||
#endif
|
||||
namespace py = pybind11;
|
||||
inline float3 make_float3(const float& x, const float& y, const float& z) {
|
||||
float3 res;
|
||||
|
||||
@@ -5,8 +5,10 @@
|
||||
#include "./util.h"
|
||||
|
||||
#include <ATen/ATen.h>
|
||||
#ifdef WITH_CUDA
|
||||
#include <ATen/cuda/CUDAContext.h>
|
||||
#include <c10/cuda/CUDAGuard.h>
|
||||
#endif
|
||||
|
||||
namespace PRE = ::pulsar::Renderer;
|
||||
|
||||
@@ -58,10 +60,13 @@ Renderer::Renderer(
|
||||
|
||||
Renderer::~Renderer() {
|
||||
if (this->device_type == c10::DeviceType::CUDA) {
|
||||
// Can't happen in the case that not compiled with CUDA.
|
||||
#ifdef WITH_CUDA
|
||||
at::cuda::CUDAGuard device_guard(this->device_tracker.device());
|
||||
for (auto nrend : this->renderer_vec) {
|
||||
PRE::destruct<true>(&nrend);
|
||||
}
|
||||
#endif
|
||||
} else {
|
||||
for (auto nrend : this->renderer_vec) {
|
||||
PRE::destruct<false>(&nrend);
|
||||
@@ -87,6 +92,7 @@ void Renderer::ensure_on_device(torch::Device device, bool /*non_blocking*/) {
|
||||
"Only CPU and CUDA device types are supported.");
|
||||
if (device.type() != this->device_type ||
|
||||
device.index() != this->device_index) {
|
||||
#ifdef WITH_CUDA
|
||||
LOG_IF(INFO, PULSAR_LOG_INIT)
|
||||
<< "Transferring render buffers between devices.";
|
||||
int prev_active;
|
||||
@@ -136,6 +142,11 @@ void Renderer::ensure_on_device(torch::Device device, bool /*non_blocking*/) {
|
||||
cudaSetDevice(prev_active);
|
||||
this->device_type = device.type();
|
||||
this->device_index = device.index();
|
||||
#else
|
||||
throw std::runtime_error(
|
||||
"pulsar was built without CUDA "
|
||||
"but a device move to a CUDA device was initiated.");
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
@@ -148,6 +159,7 @@ void Renderer::ensure_n_renderers_gte(const size_t& batch_size) {
|
||||
for (ptrdiff_t i = 0; i < diff; ++i) {
|
||||
this->renderer_vec.emplace_back();
|
||||
if (this->device_type == c10::DeviceType::CUDA) {
|
||||
#ifdef WITH_CUDA
|
||||
PRE::construct<true>(
|
||||
&this->renderer_vec[this->renderer_vec.size() - 1],
|
||||
this->max_num_balls(),
|
||||
@@ -158,6 +170,7 @@ void Renderer::ensure_n_renderers_gte(const size_t& batch_size) {
|
||||
this->renderer_vec[0].cam.background_normalization_depth,
|
||||
this->renderer_vec[0].cam.n_channels,
|
||||
this->n_track());
|
||||
#endif
|
||||
} else {
|
||||
PRE::construct<false>(
|
||||
&this->renderer_vec[this->renderer_vec.size() - 1],
|
||||
@@ -708,6 +721,10 @@ std::tuple<torch::Tensor, torch::Tensor> Renderer::forward(
|
||||
opacity_ptr = opacity_contiguous.data_ptr<float>();
|
||||
}
|
||||
if (this->device_type == c10::DeviceType::CUDA) {
|
||||
// No else check necessary - if not compiled with CUDA
|
||||
// we can't even reach this code (the renderer can't be
|
||||
// moved to a CUDA device).
|
||||
#ifdef WITH_CUDA
|
||||
int prev_active;
|
||||
cudaGetDevice(&prev_active);
|
||||
cudaSetDevice(this->device_index);
|
||||
@@ -756,6 +773,7 @@ std::tuple<torch::Tensor, torch::Tensor> Renderer::forward(
|
||||
<< time_ms / static_cast<float>(batch_size) << "ms" << std::endl;
|
||||
#endif
|
||||
cudaSetDevice(prev_active);
|
||||
#endif
|
||||
} else {
|
||||
#ifdef PULSAR_TIMINGS_BATCHED_ENABLED
|
||||
START_TIME(batch_forward);
|
||||
@@ -816,7 +834,11 @@ std::tuple<torch::Tensor, torch::Tensor> Renderer::forward(
|
||||
this->device_index,
|
||||
torch::kFloat,
|
||||
this->device_type == c10::DeviceType::CUDA
|
||||
#ifdef WITH_CUDA
|
||||
? at::cuda::getCurrentCUDAStream()
|
||||
#else
|
||||
? (cudaStream_t) nullptr
|
||||
#endif
|
||||
: (cudaStream_t) nullptr);
|
||||
if (mode == 1)
|
||||
results[batch_i] = results[batch_i].slice(2, 0, 1, 1);
|
||||
@@ -829,7 +851,11 @@ std::tuple<torch::Tensor, torch::Tensor> Renderer::forward(
|
||||
this->device_index,
|
||||
torch::kFloat,
|
||||
this->device_type == c10::DeviceType::CUDA
|
||||
#ifdef WITH_CUDA
|
||||
? at::cuda::getCurrentCUDAStream()
|
||||
#else
|
||||
? (cudaStream_t) nullptr
|
||||
#endif
|
||||
: (cudaStream_t) nullptr);
|
||||
}
|
||||
LOG_IF(INFO, PULSAR_LOG_FORWARD) << "Forward render complete.";
|
||||
@@ -1048,6 +1074,9 @@ Renderer::backward(
|
||||
opacity_ptr = opacity_contiguous.data_ptr<float>();
|
||||
}
|
||||
if (this->device_type == c10::DeviceType::CUDA) {
|
||||
// No else check necessary - it's not possible to move
|
||||
// the renderer to a CUDA device if not built with CUDA.
|
||||
#ifdef WITH_CUDA
|
||||
int prev_active;
|
||||
cudaGetDevice(&prev_active);
|
||||
cudaSetDevice(this->device_index);
|
||||
@@ -1162,6 +1191,7 @@ Renderer::backward(
|
||||
std::cout << "Backward render batched time per example: "
|
||||
<< time_ms / static_cast<float>(batch_size) << "ms" << std::endl;
|
||||
#endif
|
||||
#endif // WITH_CUDA
|
||||
} else {
|
||||
#ifdef PULSAR_TIMINGS_BATCHED_ENABLED
|
||||
START_TIME(batch_backward);
|
||||
@@ -1285,7 +1315,11 @@ Renderer::backward(
|
||||
this->device_index,
|
||||
torch::kFloat,
|
||||
this->device_type == c10::DeviceType::CUDA
|
||||
#ifdef WITH_CUDA
|
||||
? at::cuda::getCurrentCUDAStream()
|
||||
#else
|
||||
? (cudaStream_t) nullptr
|
||||
#endif
|
||||
: (cudaStream_t) nullptr);
|
||||
}
|
||||
std::get<0>(ret) = torch::stack(results);
|
||||
@@ -1297,7 +1331,11 @@ Renderer::backward(
|
||||
this->device_index,
|
||||
torch::kFloat,
|
||||
this->device_type == c10::DeviceType::CUDA
|
||||
#ifdef WITH_CUDA
|
||||
? at::cuda::getCurrentCUDAStream()
|
||||
#else
|
||||
? (cudaStream_t) nullptr
|
||||
#endif
|
||||
: (cudaStream_t) nullptr);
|
||||
}
|
||||
}
|
||||
@@ -1313,7 +1351,11 @@ Renderer::backward(
|
||||
this->device_index,
|
||||
torch::kFloat,
|
||||
this->device_type == c10::DeviceType::CUDA
|
||||
#ifdef WITH_CUDA
|
||||
? at::cuda::getCurrentCUDAStream()
|
||||
#else
|
||||
? (cudaStream_t) nullptr
|
||||
#endif
|
||||
: (cudaStream_t) nullptr);
|
||||
}
|
||||
std::get<1>(ret) = torch::stack(results);
|
||||
@@ -1326,7 +1368,11 @@ Renderer::backward(
|
||||
this->device_index,
|
||||
torch::kFloat,
|
||||
this->device_type == c10::DeviceType::CUDA
|
||||
#ifdef WITH_CUDA
|
||||
? at::cuda::getCurrentCUDAStream()
|
||||
#else
|
||||
? (cudaStream_t) nullptr
|
||||
#endif
|
||||
: (cudaStream_t) nullptr);
|
||||
}
|
||||
}
|
||||
@@ -1341,7 +1387,11 @@ Renderer::backward(
|
||||
this->device_index,
|
||||
torch::kFloat,
|
||||
this->device_type == c10::DeviceType::CUDA
|
||||
#ifdef WITH_CUDA
|
||||
? at::cuda::getCurrentCUDAStream()
|
||||
#else
|
||||
? (cudaStream_t) nullptr
|
||||
#endif
|
||||
: (cudaStream_t) nullptr);
|
||||
}
|
||||
std::get<2>(ret) = torch::stack(results);
|
||||
@@ -1353,7 +1403,11 @@ Renderer::backward(
|
||||
this->device_index,
|
||||
torch::kFloat,
|
||||
this->device_type == c10::DeviceType::CUDA
|
||||
#ifdef WITH_CUDA
|
||||
? at::cuda::getCurrentCUDAStream()
|
||||
#else
|
||||
? (cudaStream_t) nullptr
|
||||
#endif
|
||||
: (cudaStream_t) nullptr);
|
||||
}
|
||||
}
|
||||
@@ -1371,7 +1425,11 @@ Renderer::backward(
|
||||
this->device_index,
|
||||
torch::kFloat,
|
||||
this->device_type == c10::DeviceType::CUDA
|
||||
#ifdef WITH_CUDA
|
||||
? at::cuda::getCurrentCUDAStream()
|
||||
#else
|
||||
? (cudaStream_t) nullptr
|
||||
#endif
|
||||
: (cudaStream_t) nullptr);
|
||||
res_p2[batch_i] = from_blob(
|
||||
reinterpret_cast<float*>(
|
||||
@@ -1381,7 +1439,11 @@ Renderer::backward(
|
||||
this->device_index,
|
||||
torch::kFloat,
|
||||
this->device_type == c10::DeviceType::CUDA
|
||||
#ifdef WITH_CUDA
|
||||
? at::cuda::getCurrentCUDAStream()
|
||||
#else
|
||||
? (cudaStream_t) nullptr
|
||||
#endif
|
||||
: (cudaStream_t) nullptr);
|
||||
res_p3[batch_i] = from_blob(
|
||||
reinterpret_cast<float*>(
|
||||
@@ -1391,7 +1453,11 @@ Renderer::backward(
|
||||
this->device_index,
|
||||
torch::kFloat,
|
||||
this->device_type == c10::DeviceType::CUDA
|
||||
#ifdef WITH_CUDA
|
||||
? at::cuda::getCurrentCUDAStream()
|
||||
#else
|
||||
? (cudaStream_t) nullptr
|
||||
#endif
|
||||
: (cudaStream_t) nullptr);
|
||||
res_p4[batch_i] = from_blob(
|
||||
reinterpret_cast<float*>(
|
||||
@@ -1401,7 +1467,11 @@ Renderer::backward(
|
||||
this->device_index,
|
||||
torch::kFloat,
|
||||
this->device_type == c10::DeviceType::CUDA
|
||||
#ifdef WITH_CUDA
|
||||
? at::cuda::getCurrentCUDAStream()
|
||||
#else
|
||||
? (cudaStream_t) nullptr
|
||||
#endif
|
||||
: (cudaStream_t) nullptr);
|
||||
}
|
||||
std::get<3>(ret) = torch::stack(res_p1);
|
||||
@@ -1416,7 +1486,11 @@ Renderer::backward(
|
||||
this->device_index,
|
||||
torch::kFloat,
|
||||
this->device_type == c10::DeviceType::CUDA
|
||||
#ifdef WITH_CUDA
|
||||
? at::cuda::getCurrentCUDAStream()
|
||||
#else
|
||||
? (cudaStream_t) nullptr
|
||||
#endif
|
||||
: (cudaStream_t) nullptr);
|
||||
std::get<4>(ret) = from_blob(
|
||||
reinterpret_cast<float*>(this->renderer_vec[0].grad_cam_d + 3),
|
||||
@@ -1425,7 +1499,11 @@ Renderer::backward(
|
||||
this->device_index,
|
||||
torch::kFloat,
|
||||
this->device_type == c10::DeviceType::CUDA
|
||||
#ifdef WITH_CUDA
|
||||
? at::cuda::getCurrentCUDAStream()
|
||||
#else
|
||||
? (cudaStream_t) nullptr
|
||||
#endif
|
||||
: (cudaStream_t) nullptr);
|
||||
std::get<5>(ret) = from_blob(
|
||||
reinterpret_cast<float*>(this->renderer_vec[0].grad_cam_d + 6),
|
||||
@@ -1434,7 +1512,11 @@ Renderer::backward(
|
||||
this->device_index,
|
||||
torch::kFloat,
|
||||
this->device_type == c10::DeviceType::CUDA
|
||||
#ifdef WITH_CUDA
|
||||
? at::cuda::getCurrentCUDAStream()
|
||||
#else
|
||||
? (cudaStream_t) nullptr
|
||||
#endif
|
||||
: (cudaStream_t) nullptr);
|
||||
std::get<6>(ret) = from_blob(
|
||||
reinterpret_cast<float*>(this->renderer_vec[0].grad_cam_d + 9),
|
||||
@@ -1443,7 +1525,11 @@ Renderer::backward(
|
||||
this->device_index,
|
||||
torch::kFloat,
|
||||
this->device_type == c10::DeviceType::CUDA
|
||||
#ifdef WITH_CUDA
|
||||
? at::cuda::getCurrentCUDAStream()
|
||||
#else
|
||||
? (cudaStream_t) nullptr
|
||||
#endif
|
||||
: (cudaStream_t) nullptr);
|
||||
}
|
||||
}
|
||||
@@ -1458,7 +1544,11 @@ Renderer::backward(
|
||||
this->device_index,
|
||||
torch::kFloat,
|
||||
this->device_type == c10::DeviceType::CUDA
|
||||
#ifdef WITH_CUDA
|
||||
? at::cuda::getCurrentCUDAStream()
|
||||
#else
|
||||
? (cudaStream_t) nullptr
|
||||
#endif
|
||||
: (cudaStream_t) nullptr);
|
||||
}
|
||||
std::get<7>(ret) = torch::stack(results);
|
||||
@@ -1470,7 +1560,11 @@ Renderer::backward(
|
||||
this->device_index,
|
||||
torch::kFloat,
|
||||
this->device_type == c10::DeviceType::CUDA
|
||||
#ifdef WITH_CUDA
|
||||
? at::cuda::getCurrentCUDAStream()
|
||||
#else
|
||||
? (cudaStream_t) nullptr
|
||||
#endif
|
||||
: (cudaStream_t) nullptr);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
// Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.
|
||||
#ifdef WITH_CUDA
|
||||
#include <ATen/cuda/CUDAContext.h>
|
||||
#include <cuda_runtime_api.h>
|
||||
#endif
|
||||
#include <torch/extension.h>
|
||||
|
||||
#include "./tensor_util.h"
|
||||
@@ -23,6 +25,7 @@ torch::Tensor sphere_ids_from_result_info_nograd(
|
||||
/*dim=*/3, /*start=*/3, /*end=*/forw_info.size(3), /*step=*/2)
|
||||
.contiguous();
|
||||
if (forw_info.device().type() == c10::DeviceType::CUDA) {
|
||||
#ifdef WITH_CUDA
|
||||
cudaMemcpyAsync(
|
||||
result.data_ptr(),
|
||||
tmp.data_ptr(),
|
||||
@@ -30,6 +33,11 @@ torch::Tensor sphere_ids_from_result_info_nograd(
|
||||
tmp.size(3),
|
||||
cudaMemcpyDeviceToDevice,
|
||||
at::cuda::getCurrentCUDAStream());
|
||||
#else
|
||||
throw std::runtime_error(
|
||||
"Copy on CUDA device initiated but built "
|
||||
"without CUDA support.");
|
||||
#endif
|
||||
} else {
|
||||
memcpy(
|
||||
result.data_ptr(),
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
// Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.
|
||||
#ifdef WITH_CUDA
|
||||
#include <cuda_runtime_api.h>
|
||||
|
||||
namespace pulsar {
|
||||
@@ -22,3 +23,4 @@ void cudaDevToHost(
|
||||
|
||||
} // namespace pytorch
|
||||
} // namespace pulsar
|
||||
#endif
|
||||
|
||||
@@ -41,11 +41,16 @@ torch::Tensor from_blob(
|
||||
const int num_elements =
|
||||
std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<int>{});
|
||||
if (device_type == c10::DeviceType::CUDA) {
|
||||
#ifdef WITH_CUDA
|
||||
cudaDevToDev(
|
||||
ret.data_ptr(),
|
||||
static_cast<const void*>(ptr),
|
||||
sizeof(T) * num_elements,
|
||||
stream);
|
||||
#else
|
||||
throw std::runtime_error(
|
||||
"Initiating devToDev copy on a build without CUDA.");
|
||||
#endif
|
||||
// TODO: check for synchronization.
|
||||
} else {
|
||||
memcpy(ret.data_ptr(), ptr, sizeof(T) * num_elements);
|
||||
|
||||
Reference in New Issue
Block a user