coarse rasterization bug fix

Summary: Fix a bug which resulted in a rendering artifacts if the image size was not a multiple of 16. Fix: Revert coarse rasterization to original implementation and only update fine rasterization to reverse the ordering of Y and X axis. This is much simpler than the previous approach! Additional changes: - updated mesh rendering end-end tests to check outputs from both naive and coarse to fine rasterization. - added pointcloud rendering end-end tests Reviewed By: gkioxari Differential Revision: D21102725 fbshipit-source-id: 2e7e1b013dd6dd12b3a00b79eb8167deddb2e89a
2026-02-07 14:52:21 +08:00 · 2020-04-20 14:51:19 -07:00
parent 1e4749602d
commit 9ef1ee8455
15 changed files with 381 additions and 173 deletions
--- a/pytorch3d/csrc/rasterize_meshes/rasterize_meshes.cu
+++ b/pytorch3d/csrc/rasterize_meshes/rasterize_meshes.cu
@@ -556,18 +556,16 @@ __global__ void RasterizeMeshesCoarseCudaKernel(
        // PixToNdc gives the location of the center of each pixel, so we
        // need to add/subtract a half pixel to get the true extent of the bin.
        // Reverse ordering of Y axis so that +Y is upwards in the image.
-        const int yidx = num_bins - by;
-        const float bin_y_max = PixToNdc(yidx * bin_size - 1, H) + half_pix;
-        const float bin_y_min = PixToNdc((yidx - 1) * bin_size, H) - half_pix;
-
+        const float bin_y_min = PixToNdc(by * bin_size, H) - half_pix;
+        const float bin_y_max = PixToNdc((by + 1) * bin_size - 1, H) + half_pix;
        const bool y_overlap = (ymin <= bin_y_max) && (bin_y_min < ymax);

        for (int bx = 0; bx < num_bins; ++bx) {
          // X coordinate of the left and right of the bin.
          // Reverse ordering of x axis so that +X is left.
-          const int xidx = num_bins - bx;
-          const float bin_x_max = PixToNdc(xidx * bin_size - 1, W) + half_pix;
-          const float bin_x_min = PixToNdc((xidx - 1) * bin_size, W) - half_pix;
+          const float bin_x_max =
+              PixToNdc((bx + 1) * bin_size - 1, W) + half_pix;
+          const float bin_x_min = PixToNdc(bx * bin_size, W) - half_pix;

          const bool x_overlap = (xmin <= bin_x_max) && (bin_x_min < xmax);
          if (y_overlap && x_overlap) {
@@ -629,6 +627,7 @@ torch::Tensor RasterizeMeshesCoarseCuda(
  const int N = num_faces_per_mesh.size(0);
  const int num_bins = 1 + (image_size - 1) / bin_size; // Divide round up.
  const int M = max_faces_per_bin;
+
  if (num_bins >= 22) {
    std::stringstream ss;
    ss << "Got " << num_bins << "; that's too many!";
@@ -702,13 +701,8 @@ __global__ void RasterizeMeshesFineCudaKernel(
    if (yi >= H || xi >= W)
      continue;

-    // Reverse ordering of the X and Y axis so that
-    // in the image +Y is pointing up and +X is pointing left.
-    const int yidx = H - 1 - yi;
-    const int xidx = W - 1 - xi;
-
-    const float xf = PixToNdc(xidx, W);
-    const float yf = PixToNdc(yidx, H);
+    const float xf = PixToNdc(xi, W);
+    const float yf = PixToNdc(yi, H);
    const float2 pxy = make_float2(xf, yf);

    // This part looks like the naive rasterization kernel, except we use
@@ -743,7 +737,12 @@ __global__ void RasterizeMeshesFineCudaKernel(
    // output for the current pixel.
    // TODO: make sorting an option as only top k is needed, not sorted values.
    BubbleSort(q, q_size);
-    const int pix_idx = n * H * W * K + yi * H * K + xi * K;
+
+    // Reverse ordering of the X and Y axis so that
+    // in the image +Y is pointing up and +X is pointing left.
+    const int yidx = H - 1 - yi;
+    const int xidx = W - 1 - xi;
+    const int pix_idx = n * H * W * K + yidx * H * K + xidx * K;
    for (int k = 0; k < q_size; k++) {
      face_idxs[pix_idx + k] = q[k].idx;
      zbuf[pix_idx + k] = q[k].z;
--- a/pytorch3d/csrc/rasterize_meshes/rasterize_meshes_cpu.cpp
+++ b/pytorch3d/csrc/rasterize_meshes/rasterize_meshes_cpu.cpp
@@ -430,13 +430,13 @@ torch::Tensor RasterizeMeshesCoarseCpu(
    const int face_stop_idx =
        (face_start_idx + num_faces_per_mesh[n].item().to<int32_t>());

-    float bin_y_max = 1.0f;
-    float bin_y_min = bin_y_max - bin_width;
+    float bin_y_min = -1.0f;
+    float bin_y_max = bin_y_min + bin_width;

    // Iterate through the horizontal bins from top to bottom.
    for (int by = 0; by < BH; ++by) {
-      float bin_x_max = 1.0f;
-      float bin_x_min = bin_x_max - bin_width;
+      float bin_x_min = -1.0f;
+      float bin_x_max = bin_x_min + bin_width;

      // Iterate through bins on this horizontal line, left to right.
      for (int bx = 0; bx < BW; ++bx) {
@@ -473,13 +473,13 @@ torch::Tensor RasterizeMeshesCoarseCpu(
          }
        }

-        // Shift the bin to the left for the next loop iteration.
-        bin_x_max = bin_x_min;
-        bin_x_min = bin_x_min - bin_width;
+        // Shift the bin to the right for the next loop iteration
+        bin_x_min = bin_x_max;
+        bin_x_max = bin_x_min + bin_width;
      }
-      // Shift the bin down for the next loop iteration.
-      bin_y_max = bin_y_min;
-      bin_y_min = bin_y_min - bin_width;
+      // Shift the bin down for the next loop iteration
+      bin_y_min = bin_y_max;
+      bin_y_max = bin_y_min + bin_width;
    }
  }
  return bin_faces;
--- a/pytorch3d/csrc/rasterize_points/rasterize_points.cu
+++ b/pytorch3d/csrc/rasterize_points/rasterize_points.cu
@@ -95,7 +95,8 @@ __global__ void RasterizePointsNaiveCudaKernel(
    const int n = i / (S * S); // Batch index
    const int pix_idx = i % (S * S);

-    // Reverse ordering of X and Y axes.
+    // Reverse ordering of the X and Y axis as the camera coordinates
+    // assume that +Y is pointing up and +X is pointing left.
    const int yi = S - 1 - pix_idx / S;
    const int xi = S - 1 - pix_idx % S;

@@ -260,23 +261,20 @@ __global__ void RasterizePointsCoarseCudaKernel(
        // Get y extent for the bin. PixToNdc gives us the location of
        // the center of each pixel, so we need to add/subtract a half
        // pixel to get the true extent of the bin.
-        // Reverse ordering of Y axis so that +Y is upwards in the image.
-        const int yidx = num_bins - by;
-        const float bin_y_max = PixToNdc(yidx * bin_size - 1, S) + half_pix;
-        const float bin_y_min = PixToNdc((yidx - 1) * bin_size, S) - half_pix;
+        const float by0 = PixToNdc(by * bin_size, S) - half_pix;
+        const float by1 = PixToNdc((by + 1) * bin_size - 1, S) + half_pix;
+        const bool y_overlap = (py0 <= by1) && (by0 <= py1);

-        const bool y_overlap = (py0 <= bin_y_max) && (bin_y_min <= py1);
        if (!y_overlap) {
          continue;
        }
        for (int bx = 0; bx < num_bins; ++bx) {
          // Get x extent for the bin; again we need to adjust the
          // output of PixToNdc by half a pixel.
-          // Reverse ordering of x axis so that +X is left.
-          const int xidx = num_bins - bx;
-          const float bin_x_max = PixToNdc(xidx * bin_size - 1, S) + half_pix;
-          const float bin_x_min = PixToNdc((xidx - 1) * bin_size, S) - half_pix;
-          const bool x_overlap = (px0 <= bin_x_max) && (bin_x_min <= px1);
+          const float bx0 = PixToNdc(bx * bin_size, S) - half_pix;
+          const float bx1 = PixToNdc((bx + 1) * bin_size - 1, S) + half_pix;
+          const bool x_overlap = (px0 <= bx1) && (bx0 <= px1);
+
          if (x_overlap) {
            binmask.set(by, bx, p);
          }
@@ -330,6 +328,7 @@ torch::Tensor RasterizePointsCoarseCuda(
  const int N = num_points_per_cloud.size(0);
  const int num_bins = 1 + (image_size - 1) / bin_size; // divide round up
  const int M = max_points_per_bin;
+
  if (points.ndimension() != 2 || points.size(1) != 3) {
    AT_ERROR("points must have dimensions (num_points, 3)");
  }
@@ -346,6 +345,7 @@ torch::Tensor RasterizePointsCoarseCuda(
  const size_t shared_size = num_bins * num_bins * chunk_size / 8;
  const size_t blocks = 64;
  const size_t threads = 512;
+
  RasterizePointsCoarseCudaKernel<<<blocks, threads, shared_size>>>(
      points.contiguous().data_ptr<float>(),
      cloud_to_packed_first_idx.contiguous().data_ptr<int64_t>(),
@@ -372,7 +372,7 @@ __global__ void RasterizePointsFineCudaKernel(
    const float radius,
    const int bin_size,
    const int N,
-    const int B,
+    const int B, // num_bins
    const int M,
    const int S,
    const int K,
@@ -397,19 +397,15 @@ __global__ void RasterizePointsFineCudaKernel(
    i %= B * bin_size * bin_size;
    const int bx = i / (bin_size * bin_size);
    i %= bin_size * bin_size;
+
    const int yi = i / bin_size + by * bin_size;
    const int xi = i % bin_size + bx * bin_size;

    if (yi >= S || xi >= S)
      continue;

-    // Reverse ordering of the X and Y axis so that
-    // in the image +Y is pointing up and +X is pointing left.
-    const int yidx = S - 1 - yi;
-    const int xidx = S - 1 - xi;
-
-    const float xf = PixToNdc(xidx, S);
-    const float yf = PixToNdc(yidx, S);
+    const float xf = PixToNdc(xi, S);
+    const float yf = PixToNdc(yi, S);

    // This part looks like the naive rasterization kernel, except we use
    // bin_points to only look at a subset of points already known to fall
@@ -431,7 +427,13 @@ __global__ void RasterizePointsFineCudaKernel(
    // Now we've looked at all the points for this bin, so we can write
    // output for the current pixel.
    BubbleSort(q, q_size);
-    const int pix_idx = n * S * S * K + yi * S * K + xi * K;
+
+    // Reverse ordering of the X and Y axis as the camera coordinates
+    // assume that +Y is pointing up and +X is pointing left.
+    const int yidx = S - 1 - yi;
+    const int xidx = S - 1 - xi;
+
+    const int pix_idx = n * S * S * K + yidx * S * K + xidx * K;
    for (int k = 0; k < q_size; ++k) {
      point_idxs[pix_idx + k] = q[k].idx;
      zbuf[pix_idx + k] = q[k].z;
@@ -448,7 +450,7 @@ std::tuple<torch::Tensor, torch::Tensor, torch::Tensor> RasterizePointsFineCuda(
    const int bin_size,
    const int points_per_pixel) {
  const int N = bin_points.size(0);
-  const int B = bin_points.size(1);
+  const int B = bin_points.size(1); // num_bins
  const int M = bin_points.size(3);
  const int S = image_size;
  const int K = points_per_pixel;
--- a/pytorch3d/csrc/rasterize_points/rasterize_points_cpu.cpp
+++ b/pytorch3d/csrc/rasterize_points/rasterize_points_cpu.cpp
@@ -125,13 +125,13 @@ torch::Tensor RasterizePointsCoarseCpu(
    const int point_stop_idx =
        (point_start_idx + num_points_per_cloud[n].item().to<int32_t>());

-    float bin_y_max = 1.0f;
-    float bin_y_min = bin_y_max - bin_width;
+    float bin_y_min = -1.0f;
+    float bin_y_max = bin_y_min + bin_width;

    // Iterate through the horizontal bins from top to bottom.
    for (int by = 0; by < B; by++) {
-      float bin_x_max = 1.0f;
-      float bin_x_min = bin_x_max - bin_width;
+      float bin_x_min = -1.0f;
+      float bin_x_max = bin_x_min + bin_width;

      // Iterate through bins on this horizontal line, left to right.
      for (int bx = 0; bx < B; bx++) {
@@ -166,13 +166,13 @@ torch::Tensor RasterizePointsCoarseCpu(
        // Record the number of points found in this bin
        points_per_bin_a[n][by][bx] = points_hit;

-        // Shift the bin to the left for the next loop iteration.
-        bin_x_max = bin_x_min;
-        bin_x_min = bin_x_min - bin_width;
+        // Shift the bin to the right for the next loop iteration
+        bin_x_min = bin_x_max;
+        bin_x_max = bin_x_min + bin_width;
      }
-      // Shift the bin down for the next loop iteration.
-      bin_y_max = bin_y_min;
-      bin_y_min = bin_y_min - bin_width;
+      // Shift the bin down for the next loop iteration
+      bin_y_min = bin_y_max;
+      bin_y_max = bin_y_min + bin_width;
    }
  }
  return bin_points;