/* * Copyright (c) Meta Platforms, Inc. and affiliates. * All rights reserved. * * This source code is licensed under the BSD-style license found in the * LICENSE file in the root directory of this source tree. */ #ifndef PULSAR_NATIVE_INCLUDE_RENDERER_H_ #define PULSAR_NATIVE_INCLUDE_RENDERER_H_ #include #include "../global.h" #include "./camera.h" namespace pulsar { namespace Renderer { //! Remember to order struct members from larger size to smaller size //! to avoid padding (for more info, see for example here: //! http://www.catb.org/esr/structure-packing/). /** * This is the information that's needed to do a fast screen point * intersection with one of the balls. * * Aim to keep this below 8 bytes (256 bytes per cache-line / 32 threads in a * warp = 8 bytes per thread). */ struct IntersectInfo { ushort2 min; /** minimum x, y in pixel coordinates. */ ushort2 max; /** maximum x, y in pixel coordinates. */ }; static_assert( sizeof(IntersectInfo) == 8, "The compiled size of `IntersectInfo` is wrong."); /** * Reduction operation to find the limits of multiple IntersectInfo objects. */ struct IntersectInfoMinMax { IHD IntersectInfo operator()(const IntersectInfo& a, const IntersectInfo& b) const { // Treat the special case of an invalid intersect info object or one for // a ball out of bounds. if (b.max.x == MAX_USHORT && b.min.x == MAX_USHORT && b.max.y == MAX_USHORT && b.min.y == MAX_USHORT) { return a; } if (a.max.x == MAX_USHORT && a.min.x == MAX_USHORT && a.max.y == MAX_USHORT && a.min.y == MAX_USHORT) { return b; } IntersectInfo result; result.min.x = std::min(a.min.x, b.min.x); result.min.y = std::min(a.min.y, b.min.y); result.max.x = std::max(a.max.x, b.max.x); result.max.y = std::max(a.max.y, b.max.y); return result; } }; /** * All information that's needed to draw a ball. * * It's necessary to keep this information in float (not half) format, * because the loss in accuracy would be too high and lead to artifacts. */ struct DrawInfo { float3 ray_center_norm; /** Ray to the ball center, normalized. */ /** Ball color. * * This might be the full color in the case of n_channels <= 3. Otherwise, * a pointer to the original 'color' data is stored in the following union. */ float first_color; union { float color[2]; float* ptr; } color_union; float t_center; /** Distance from the camera to the ball center. */ float radius; /** Ball radius. */ }; static_assert( sizeof(DrawInfo) == 8 * 4, "The compiled size of `DrawInfo` is wrong."); /** * An object to collect all associated data with the renderer. * * The `_d` suffixed pointers point to memory 'on-device', potentially on the * GPU. All other variables are expected to point to CPU memory. */ struct Renderer { /** Dummy initializer to make sure all pointers are set to NULL to * be safe for the device-specific 'construct' and 'destruct' methods. */ inline Renderer() { max_num_balls = 0; result_d = NULL; min_depth_d = NULL; min_depth_sorted_d = NULL; ii_d = NULL; ii_sorted_d = NULL; ids_d = NULL; ids_sorted_d = NULL; workspace_d = NULL; di_d = NULL; di_sorted_d = NULL; region_flags_d = NULL; num_selected_d = NULL; forw_info_d = NULL; grad_pos_d = NULL; grad_col_d = NULL; grad_rad_d = NULL; grad_cam_d = NULL; grad_opy_d = NULL; grad_cam_buf_d = NULL; n_grad_contributions_d = NULL; }; /** The camera for this renderer. In world-coordinates. */ CamInfo cam; /** * The maximum amount of balls the renderer can handle. Resources are * pre-allocated to account for this size. Less than this amount of balls * can be rendered, but not more. */ int max_num_balls; /** The result buffer. */ float* result_d; /** Closest possible intersection depth per sphere w.r.t. the camera. */ float* min_depth_d; /** Closest possible intersection depth per sphere, ordered ascending. */ float* min_depth_sorted_d; /** The intersect infos per sphere. */ IntersectInfo* ii_d; /** The intersect infos per sphere, ordered by their closest possible * intersection depth (asc.). */ IntersectInfo* ii_sorted_d; /** Original sphere IDs. */ int* ids_d; /** Original sphere IDs, ordered by their closest possible intersection depth * (asc.). */ int* ids_sorted_d; /** Workspace for CUB routines. */ char* workspace_d; /** Workspace size for CUB routines. */ size_t workspace_size; /** The draw information structures for each sphere. */ DrawInfo* di_d; /** The draw information structures sorted by closest possible intersection * depth (asc.). */ DrawInfo* di_sorted_d; /** Region association buffer. */ char* region_flags_d; /** Num spheres in the current region. */ size_t* num_selected_d; /** Pointer to information from the forward pass. */ float* forw_info_d; /** Struct containing information about the min max pixels that contain * rendered information in the image. */ IntersectInfo* min_max_pixels_d; /** Gradients w.r.t. position. */ float3* grad_pos_d; /** Gradients w.r.t. color. */ float* grad_col_d; /** Gradients w.r.t. radius. */ float* grad_rad_d; /** Gradients w.r.t. camera parameters. */ float* grad_cam_d; /** Gradients w.r.t. opacity. */ float* grad_opy_d; /** Camera gradient information by sphere. * * Here, every sphere's contribution to the camera gradients is stored. It is * aggregated and written to grad_cam_d in a separate step. This avoids write * conflicts when processing the spheres. */ CamGradInfo* grad_cam_buf_d; /** Total of all gradient contributions for this image. */ int* n_grad_contributions_d; /** The number of spheres to track for backpropagation. */ int n_track; }; inline bool operator==(const Renderer& a, const Renderer& b) { return a.cam == b.cam && a.max_num_balls == b.max_num_balls; } /** * Construct a renderer. */ template void construct( Renderer* self, const size_t& max_num_balls, const int& width, const int& height, const bool& orthogonal_projection, const bool& right_handed_system, const float& background_normalization_depth, const uint& n_channels, const uint& n_track); /** * Destruct the renderer and free the associated memory. */ template void destruct(Renderer* self); /** * Create a selection of points inside a rectangle. * * This write boolen values into `region_flags_d', which can * for example be used by a CUB function to extract the selection. */ template GLOBAL void create_selector( IntersectInfo const* const RESTRICT ii_sorted_d, const uint num_balls, const int min_x, const int max_x, const int min_y, const int max_y, /* Out variables. */ char* RESTRICT region_flags_d); /** * Calculate a signature for a ball. * * Populate the `ids_d`, `ii_d`, `di_d` and `min_depth_d` fields of the * renderer. For spheres not visible in the image, sets the id field to -1, * min_depth_d to MAX_FLOAT and the ii_d.min.x fields to MAX_USHORT. */ template GLOBAL void calc_signature( Renderer renderer, float3 const* const RESTRICT vert_poss, float const* const RESTRICT vert_cols, float const* const RESTRICT vert_rads, const uint num_balls); /** * The block size for rendering. * * This should be as large as possible, but is limited due to the amount * of variables we use and the memory required per thread. */ #define RENDER_BLOCK_SIZE 16 /** * The buffer size of spheres to be loaded and analyzed for relevance. * * This must be at least RENDER_BLOCK_SIZE * RENDER_BLOCK_SIZE so that * for every iteration through the loading loop every thread could add a * 'hit' to the buffer. */ #define RENDER_BUFFER_SIZE RENDER_BLOCK_SIZE * RENDER_BLOCK_SIZE * 2 /** * The threshold after which the spheres that are in the render buffer * are rendered and the buffer is flushed. * * Must be less than RENDER_BUFFER_SIZE. */ #define RENDER_BUFFER_LOAD_THRESH 16 * 4 /** * The render function. * * Assumptions: * * the focal length is appropriately chosen, * * ray_dir_norm.z is > EPS. * * to be completed... */ template GLOBAL void render( size_t const* const RESTRICT num_balls, /** Number of balls relevant for this pass. */ IntersectInfo const* const RESTRICT ii_d, /** Intersect information. */ DrawInfo const* const RESTRICT di_d, /** Draw information. */ float const* const RESTRICT min_depth_d, /** Minimum depth per sphere. */ int const* const RESTRICT id_d, /** IDs. */ float const* const RESTRICT op_d, /** Opacity. */ const CamInfo cam_norm, /** Camera normalized with all vectors to be in the * camera coordinate system. */ const float gamma, /** Transparency parameter. **/ const float percent_allowed_difference, /** Maximum allowed error in color. */ const uint max_n_hits, const float* bg_col_d, const uint mode, const int x_min, const int y_min, const int x_step, const int y_step, // Out variables. float* const RESTRICT result_d, /** The result image. */ float* const RESTRICT forw_info_d, /** Additional information needed for the grad computation. */ // Infrastructure. const int n_track /** The number of spheres to track. */ ); /** * Makes sure to paint background information. * * This is required as a separate post-processing step because certain * pixels may not be processed during the forward pass if there is no * possibility for a sphere to be present at their location. */ template GLOBAL void fill_bg( Renderer renderer, const CamInfo norm, float const* const bg_col_d, const float gamma, const uint mode); /** * Rendering forward pass. * * Takes a renderer and sphere data as inputs and creates a rendering. */ template void forward( Renderer* self, const float* vert_pos, const float* vert_col, const float* vert_rad, const CamInfo& cam, const float& gamma, float percent_allowed_difference, const uint& max_n_hits, const float* bg_col_d, const float* opacity_d, const size_t& num_balls, const uint& mode, cudaStream_t stream); /** * Normalize the camera gradients by the number of spheres that contributed. */ template GLOBAL void norm_cam_gradients(Renderer renderer); /** * Normalize the sphere gradients. * * We're assuming that the samples originate from a Monte Carlo * sampling process and normalize by number and sphere area. */ template GLOBAL void norm_sphere_gradients(Renderer renderer, const int num_balls); #define GRAD_BLOCK_SIZE 16 /** Calculate the gradients. */ template GLOBAL void calc_gradients( const CamInfo cam, /** Camera in world coordinates. */ float const* const RESTRICT grad_im, /** The gradient image. */ const float gamma, /** The transparency parameter used in the forward pass. */ float3 const* const RESTRICT vert_poss, /** Vertex position vector. */ float const* const RESTRICT vert_cols, /** Vertex color vector. */ float const* const RESTRICT vert_rads, /** Vertex radius vector. */ float const* const RESTRICT opacity, /** Vertex opacity. */ const uint num_balls, /** Number of balls. */ float const* const RESTRICT result_d, /** Result image. */ float const* const RESTRICT forw_info_d, /** Forward pass info. */ DrawInfo const* const RESTRICT di_d, /** Draw information. */ IntersectInfo const* const RESTRICT ii_d, /** Intersect information. */ // Mode switches. const bool calc_grad_pos, const bool calc_grad_col, const bool calc_grad_rad, const bool calc_grad_cam, const bool calc_grad_opy, // Out variables. float* const RESTRICT grad_rad_d, /** Radius gradients. */ float* const RESTRICT grad_col_d, /** Color gradients. */ float3* const RESTRICT grad_pos_d, /** Position gradients. */ CamGradInfo* const RESTRICT grad_cam_buf_d, /** Camera gradient buffer. */ float* const RESTRICT grad_opy_d, /** Opacity gradient buffer. */ int* const RESTRICT grad_contributed_d, /** Gradient contribution counter. */ // Infrastructure. const int n_track, const uint offs_x = 0, const uint offs_y = 0); /** * A full backward pass. * * Creates the gradients for the given gradient_image and the spheres. */ template void backward( Renderer* self, const float* grad_im, const float* image, const float* forw_info, const float* vert_pos, const float* vert_col, const float* vert_rad, const CamInfo& cam, const float& gamma, float percent_allowed_difference, const uint& max_n_hits, const float* vert_opy, const size_t& num_balls, const uint& mode, const bool& dif_pos, const bool& dif_col, const bool& dif_rad, const bool& dif_cam, const bool& dif_opy, cudaStream_t stream); /** * A debug backward pass. * * This is a function to debug the gradient calculation. It calculates the * gradients for exactly one pixel (set with pos_x and pos_y) without averaging. * * *Uses only the first sphere for camera gradient calculation!* */ template void backward_dbg( Renderer* self, const float* grad_im, const float* image, const float* forw_info, const float* vert_pos, const float* vert_col, const float* vert_rad, const CamInfo& cam, const float& gamma, float percent_allowed_difference, const uint& max_n_hits, const float* vert_opy, const size_t& num_balls, const uint& mode, const bool& dif_pos, const bool& dif_col, const bool& dif_rad, const bool& dif_cam, const bool& dif_opy, const uint& pos_x, const uint& pos_y, cudaStream_t stream); template void nn( const float* ref_ptr, const float* tar_ptr, const uint& k, const uint& d, const uint& n, float* dist_ptr, int32_t* inds_ptr, cudaStream_t stream); } // namespace Renderer } // namespace pulsar #endif