diff --git a/pytorch3d/csrc/pulsar/include/renderer.destruct.device.h b/pytorch3d/csrc/pulsar/include/renderer.destruct.device.h index 8520233c..d63bbd62 100644 --- a/pytorch3d/csrc/pulsar/include/renderer.destruct.device.h +++ b/pytorch3d/csrc/pulsar/include/renderer.destruct.device.h @@ -18,68 +18,89 @@ namespace Renderer { template HOST void destruct(Renderer* self) { - if (self->result_d != NULL) + if (self->result_d != NULL) { FREE(self->result_d); + } self->result_d = NULL; - if (self->min_depth_d != NULL) + if (self->min_depth_d != NULL) { FREE(self->min_depth_d); + } self->min_depth_d = NULL; - if (self->min_depth_sorted_d != NULL) + if (self->min_depth_sorted_d != NULL) { FREE(self->min_depth_sorted_d); + } self->min_depth_sorted_d = NULL; - if (self->ii_d != NULL) + if (self->ii_d != NULL) { FREE(self->ii_d); + } self->ii_d = NULL; - if (self->ii_sorted_d != NULL) + if (self->ii_sorted_d != NULL) { FREE(self->ii_sorted_d); + } self->ii_sorted_d = NULL; - if (self->ids_d != NULL) + if (self->ids_d != NULL) { FREE(self->ids_d); + } self->ids_d = NULL; - if (self->ids_sorted_d != NULL) + if (self->ids_sorted_d != NULL) { FREE(self->ids_sorted_d); + } self->ids_sorted_d = NULL; - if (self->workspace_d != NULL) + if (self->workspace_d != NULL) { FREE(self->workspace_d); + } self->workspace_d = NULL; - if (self->di_d != NULL) + if (self->di_d != NULL) { FREE(self->di_d); + } self->di_d = NULL; - if (self->di_sorted_d != NULL) + if (self->di_sorted_d != NULL) { FREE(self->di_sorted_d); + } self->di_sorted_d = NULL; - if (self->region_flags_d != NULL) + if (self->region_flags_d != NULL) { FREE(self->region_flags_d); + } self->region_flags_d = NULL; - if (self->num_selected_d != NULL) + if (self->num_selected_d != NULL) { FREE(self->num_selected_d); + } self->num_selected_d = NULL; - if (self->forw_info_d != NULL) + if (self->forw_info_d != NULL) { FREE(self->forw_info_d); + } self->forw_info_d = NULL; - if (self->min_max_pixels_d != NULL) + if (self->min_max_pixels_d != NULL) { FREE(self->min_max_pixels_d); + } self->min_max_pixels_d = NULL; - if (self->grad_pos_d != NULL) + if (self->grad_pos_d != NULL) { FREE(self->grad_pos_d); + } self->grad_pos_d = NULL; - if (self->grad_col_d != NULL) + if (self->grad_col_d != NULL) { FREE(self->grad_col_d); + } self->grad_col_d = NULL; - if (self->grad_rad_d != NULL) + if (self->grad_rad_d != NULL) { FREE(self->grad_rad_d); + } self->grad_rad_d = NULL; - if (self->grad_cam_d != NULL) + if (self->grad_cam_d != NULL) { FREE(self->grad_cam_d); + } self->grad_cam_d = NULL; - if (self->grad_cam_buf_d != NULL) + if (self->grad_cam_buf_d != NULL) { FREE(self->grad_cam_buf_d); + } self->grad_cam_buf_d = NULL; - if (self->grad_opy_d != NULL) + if (self->grad_opy_d != NULL) { FREE(self->grad_opy_d); + } self->grad_opy_d = NULL; - if (self->n_grad_contributions_d != NULL) + if (self->n_grad_contributions_d != NULL) { FREE(self->n_grad_contributions_d); + } self->n_grad_contributions_d = NULL; } diff --git a/pytorch3d/csrc/pulsar/include/renderer.norm_sphere_gradients.device.h b/pytorch3d/csrc/pulsar/include/renderer.norm_sphere_gradients.device.h index 37e0eb00..f69d3be2 100644 --- a/pytorch3d/csrc/pulsar/include/renderer.norm_sphere_gradients.device.h +++ b/pytorch3d/csrc/pulsar/include/renderer.norm_sphere_gradients.device.h @@ -64,8 +64,9 @@ GLOBAL void norm_sphere_gradients(Renderer renderer, const int num_balls) { // The sphere only contributes to the camera gradients if it is // large enough in screen space. if (renderer.ids_sorted_d[idx] > 0 && ii.max.x >= ii.min.x + 3 && - ii.max.y >= ii.min.y + 3) + ii.max.y >= ii.min.y + 3) { renderer.ids_sorted_d[idx] = 1; + } END_PARALLEL_NORET(); }; diff --git a/pytorch3d/csrc/pulsar/include/renderer.render.device.h b/pytorch3d/csrc/pulsar/include/renderer.render.device.h index ab13c66d..592d0068 100644 --- a/pytorch3d/csrc/pulsar/include/renderer.render.device.h +++ b/pytorch3d/csrc/pulsar/include/renderer.render.device.h @@ -139,8 +139,9 @@ GLOBAL void render( coord_y < cam_norm.film_border_top + cam_norm.film_height) { // Initialize the result. if (mode == 0u) { - for (uint c_id = 0; c_id < cam_norm.n_channels; ++c_id) + for (uint c_id = 0; c_id < cam_norm.n_channels; ++c_id) { result[c_id] = bg_col[c_id]; + } } else { result[0] = 0.f; } @@ -190,20 +191,22 @@ GLOBAL void render( "render|found intersection with sphere %u.\n", sphere_id_l[write_idx]); } - if (ii.min.x == MAX_USHORT) + if (ii.min.x == MAX_USHORT) { // This is an invalid sphere (out of image). These spheres have // maximum depth. Since we ordered the spheres by earliest possible // intersection depth we re certain that there will no other sphere // that is relevant after this one. loading_done = true; + } } // Reset n_pixels_done. n_pixels_done = 0; thread_block.sync(); // Make sure n_loaded is updated. if (n_loaded > RENDER_BUFFER_LOAD_THRESH) { // The load buffer is full enough. Draw. - if (thread_block.thread_rank() == 0) + if (thread_block.thread_rank() == 0) { n_balls_loaded += n_loaded; + } max_closest_possible_intersection = 0.f; // This excludes threads outside of the image boundary. Also, it reduces // block artifacts. @@ -290,8 +293,9 @@ GLOBAL void render( uint warp_done = thread_warp.ballot(done); int warp_done_bit_cnt = POPC(warp_done); #endif //__CUDACC__ && __HIP_PLATFORM_AMD__ - if (thread_warp.thread_rank() == 0) + if (thread_warp.thread_rank() == 0) { ATOMICADD_B(&n_pixels_done, warp_done_bit_cnt); + } // This sync is necessary to keep n_loaded until all threads are done with // painting. thread_block.sync(); @@ -299,8 +303,9 @@ GLOBAL void render( } thread_block.sync(); } - if (thread_block.thread_rank() == 0) + if (thread_block.thread_rank() == 0) { n_balls_loaded += n_loaded; + } PULSAR_LOG_DEV_PIX( PULSAR_LOG_RENDER_PIX, "render|loaded %d balls in total.\n", @@ -386,8 +391,9 @@ GLOBAL void render( static_cast(tracker.get_n_hits()); } else { float sm_d_normfac = FRCP(FMAX(sm_d, FEPS)); - for (uint c_id = 0; c_id < cam_norm.n_channels; ++c_id) + for (uint c_id = 0; c_id < cam_norm.n_channels; ++c_id) { result[c_id] *= sm_d_normfac; + } int write_loc = (coord_y - cam_norm.film_border_top) * cam_norm.film_width * (3 + 2 * n_track) + (coord_x - cam_norm.film_border_left) * (3 + 2 * n_track); diff --git a/pytorch3d/csrc/pulsar/pytorch/renderer.cpp b/pytorch3d/csrc/pulsar/pytorch/renderer.cpp index 018ca1ad..928e8c6f 100644 --- a/pytorch3d/csrc/pulsar/pytorch/renderer.cpp +++ b/pytorch3d/csrc/pulsar/pytorch/renderer.cpp @@ -860,8 +860,9 @@ std::tuple Renderer::forward( ? (cudaStream_t) nullptr #endif : (cudaStream_t) nullptr); - if (mode == 1) + if (mode == 1) { results[batch_i] = results[batch_i].slice(2, 0, 1, 1); + } forw_infos[batch_i] = from_blob( this->renderer_vec[batch_i].forw_info_d, {this->renderer_vec[0].cam.film_height, diff --git a/pytorch3d/csrc/pulsar/pytorch/renderer.h b/pytorch3d/csrc/pulsar/pytorch/renderer.h index 90bc3c82..97a674c4 100644 --- a/pytorch3d/csrc/pulsar/pytorch/renderer.h +++ b/pytorch3d/csrc/pulsar/pytorch/renderer.h @@ -128,8 +128,9 @@ struct Renderer { stream << "pulsar::Renderer["; // Device info. stream << self.device_type; - if (self.device_index != -1) + if (self.device_index != -1) { stream << ", ID " << self.device_index; + } stream << "]"; return stream; }