Blending fixes and test updates

Summary: Changed `torch.cumprod` to `torch.prod` in blending functions and added more tests and benchmark tests. This should fix the issue raised on GitHub. Reviewed By: gkioxari Differential Revision: D20163073 fbshipit-source-id: 4569fd37be11aa4435a3ce8736b55622c00ec718
2026-02-06 22:12:16 +08:00 · 2020-02-29 17:49:14 -08:00
parent ff19c642cb
commit ba11c0b59c
3 changed files with 312 additions and 95 deletions
--- a/pytorch3d/renderer/blending.py
+++ b/pytorch3d/renderer/blending.py
@@ -45,7 +45,7 @@ def sigmoid_alpha_blend(colors, fragments, blend_params) -> torch.Tensor:
    """
    Silhouette blending to return an RGBA image
      - **RGB** - choose color of the closest point.
-      - **A** - blend based on the 2D distance based probability map [0].
+      - **A** - blend based on the 2D distance based probability map [1].

    Args:
        colors: (N, H, W, K, 3) RGB color for each of the top K faces per pixel.
@@ -60,7 +60,7 @@ def sigmoid_alpha_blend(colors, fragments, blend_params) -> torch.Tensor:
    Returns:
        RGBA pixel_colors: (N, H, W, 4)

-    [0] Liu et al, 'Soft Rasterizer: A Differentiable Renderer for Image-based
+    [1] Liu et al, 'Soft Rasterizer: A Differentiable Renderer for Image-based
        3D Reasoning', ICCV 2019
    """
    N, H, W, K = fragments.pix_to_face.shape
@@ -73,20 +73,13 @@ def sigmoid_alpha_blend(colors, fragments, blend_params) -> torch.Tensor:
    # the face. Therefore use -1.0 *  fragments.dists to get the correct sign.
    prob = torch.sigmoid(-fragments.dists / blend_params.sigma) * mask

-    # The cumulative product ensures that alpha will be 1 if at least 1 face
-    # fully covers the pixel as for that face prob will be 1.0
-    # TODO: investigate why torch.cumprod backwards is very slow for large
-    # values of K.
-    # Temporarily replace this with exp(sum(log))) using the fact that
-    # a*b = exp(log(a*b)) = exp(log(a) + log(b))
-    # alpha = 1.0 - torch.cumprod((1.0 - prob), dim=-1)[..., -1]
-
-    alpha = 1.0 - torch.exp(torch.log((1.0 - prob)).sum(dim=-1))
-
+    # The cumulative product ensures that alpha will be 0.0 if at least 1
+    # face fully covers the pixel as for that face, prob will be 1.0.
+    # This results in a multiplication by 0.0 because of the (1.0 - prob)
+    # term. Therefore 1.0 - alpha will be 1.0.
+    alpha = torch.prod((1.0 - prob), dim=-1)
    pixel_colors[..., :3] = colors[..., 0, :]  # Hard assign for RGB
-    pixel_colors[..., 3] = alpha
-
-    pixel_colors = torch.clamp(pixel_colors, min=0, max=1.0)
+    pixel_colors[..., 3] = 1.0 - alpha
    return torch.flip(pixel_colors, [1])


@@ -95,7 +88,7 @@ def softmax_rgb_blend(
 ) -> torch.Tensor:
    """
    RGB and alpha channel blending to return an RGBA image based on the method
-    proposed in [0]
+    proposed in [1]
      - **RGB** - blend the colors based on the 2D distance based probability map and
        relative z distances.
      - **A** - blend based on the 2D distance based probability map.
@@ -151,15 +144,11 @@ def softmax_rgb_blend(
    # Sigmoid probability map based on the distance of the pixel to the face.
    prob_map = torch.sigmoid(-fragments.dists / blend_params.sigma) * mask

-    # The cumulative product ensures that alpha will be 1 if at least 1 face
-    # fully covers the pixel as for that face prob will be 1.0
-    # TODO: investigate why torch.cumprod backwards is very slow for large
-    # values of K.
-    # Temporarily replace this with exp(sum(log))) using the fact that
-    # a*b = exp(log(a*b)) = exp(log(a) + log(b))
-    # alpha = 1.0 - torch.cumprod((1.0 - prob), dim=-1)[..., -1]
-
-    alpha = 1.0 - torch.exp(torch.log((1.0 - prob_map)).sum(dim=-1))
+    # The cumulative product ensures that alpha will be 0.0 if at least 1
+    # face fully covers the pixel as for that face, prob will be 1.0.
+    # This results in a multiplication by 0.0 because of the (1.0 - prob)
+    # term. Therefore 1.0 - alpha will be 1.0.
+    alpha = torch.prod((1.0 - prob_map), dim=-1)

    # Weights for each face. Adjust the exponential by the max z to prevent
    # overflow. zbuf shape (N, H, W, K), find max over K.
@@ -178,8 +167,6 @@ def softmax_rgb_blend(
    weighted_colors = (weights[..., None] * colors).sum(dim=-2)
    weighted_background = (delta / denom) * background
    pix_colors[..., :3] = weighted_colors + weighted_background
-    pix_colors[..., 3] = alpha
+    pix_colors[..., 3] = 1.0 - alpha

-    # Clamp colors to the range 0-1 and flip y axis.
-    pix_colors = torch.clamp(pix_colors, min=0, max=1.0)
    return torch.flip(pix_colors, [1])