diff --git a/pytorch3d/renderer/points/rasterizer.py b/pytorch3d/renderer/points/rasterizer.py index c533126d..5831994c 100644 --- a/pytorch3d/renderer/points/rasterizer.py +++ b/pytorch3d/renderer/points/rasterizer.py @@ -16,8 +16,30 @@ from pytorch3d.structures import Pointclouds from .rasterize_points import rasterize_points -# Class to store the outputs of point rasterization class PointFragments(NamedTuple): + """ + Class to store the outputs of point rasterization + + Members: + idx: int32 Tensor of shape (N, image_size, image_size, points_per_pixel) + giving the indices of the nearest points at each pixel, in ascending + z-order. Concretely `idx[n, y, x, k] = p` means that `points[p]` is the kth + closest point (along the z-direction) to pixel (y, x) - note that points + represents the packed points of shape (P, 3). + Pixels that are hit by fewer than points_per_pixel are padded with -1. + zbuf: Tensor of shape (N, image_size, image_size, points_per_pixel) + giving the z-coordinates of the nearest points at each pixel, sorted in + z-order. Concretely, if `idx[n, y, x, k] = p` then + `zbuf[n, y, x, k] = points[n, p, 2]`. Pixels hit by fewer than + points_per_pixel are padded with -1. + dists: Tensor of shape (N, image_size, image_size, points_per_pixel) + giving the squared Euclidean distance (in NDC units) in the x/y plane + for each point closest to the pixel. Concretely if `idx[n, y, x, k] = p` + then `dists[n, y, x, k]` is the squared distance between the pixel (y, x) + and the point `(points[n, p, 0], points[n, p, 1])`. Pixels hit with fewer + than points_per_pixel are padded with -1. + """ + idx: torch.Tensor zbuf: torch.Tensor dists: torch.Tensor diff --git a/pytorch3d/renderer/points/renderer.py b/pytorch3d/renderer/points/renderer.py index 42e6628d..0a83ec40 100644 --- a/pytorch3d/renderer/points/renderer.py +++ b/pytorch3d/renderer/points/renderer.py @@ -30,6 +30,12 @@ class PointsRenderer(nn.Module): A class for rendering a batch of points. The class should be initialized with a rasterizer and compositor class which each have a forward function. + + The points are rendered with with varying alpha (weights) values depending on + the distance of the pixel center to the true point in the xy plane. The purpose + of this is to soften the hard decision boundary, for differentiability. + See Section 3.2 of "SynSin: End-to-end View Synthesis from a Single Image" + (https://arxiv.org/pdf/1912.08804.pdf) for more details. """ def __init__(self, rasterizer, compositor) -> None: