coarse rasterization bug fix

Summary:
Fix a bug which resulted in a rendering artifacts if the image size was not a multiple of 16.
Fix: Revert coarse rasterization to original implementation and only update fine rasterization to reverse the ordering of Y and X axis. This is much simpler than the previous approach!

Additional changes:
- updated mesh rendering end-end tests to check outputs from both naive and coarse to fine rasterization.
- added pointcloud rendering end-end tests

Reviewed By: gkioxari

Differential Revision: D21102725

fbshipit-source-id: 2e7e1b013dd6dd12b3a00b79eb8167deddb2e89a
This commit is contained in:
Nikhila Ravi
2020-04-20 14:51:19 -07:00
committed by Facebook GitHub Bot
parent 1e4749602d
commit 9ef1ee8455
15 changed files with 381 additions and 173 deletions

View File

@@ -556,18 +556,16 @@ __global__ void RasterizeMeshesCoarseCudaKernel(
// PixToNdc gives the location of the center of each pixel, so we
// need to add/subtract a half pixel to get the true extent of the bin.
// Reverse ordering of Y axis so that +Y is upwards in the image.
const int yidx = num_bins - by;
const float bin_y_max = PixToNdc(yidx * bin_size - 1, H) + half_pix;
const float bin_y_min = PixToNdc((yidx - 1) * bin_size, H) - half_pix;
const float bin_y_min = PixToNdc(by * bin_size, H) - half_pix;
const float bin_y_max = PixToNdc((by + 1) * bin_size - 1, H) + half_pix;
const bool y_overlap = (ymin <= bin_y_max) && (bin_y_min < ymax);
for (int bx = 0; bx < num_bins; ++bx) {
// X coordinate of the left and right of the bin.
// Reverse ordering of x axis so that +X is left.
const int xidx = num_bins - bx;
const float bin_x_max = PixToNdc(xidx * bin_size - 1, W) + half_pix;
const float bin_x_min = PixToNdc((xidx - 1) * bin_size, W) - half_pix;
const float bin_x_max =
PixToNdc((bx + 1) * bin_size - 1, W) + half_pix;
const float bin_x_min = PixToNdc(bx * bin_size, W) - half_pix;
const bool x_overlap = (xmin <= bin_x_max) && (bin_x_min < xmax);
if (y_overlap && x_overlap) {
@@ -629,6 +627,7 @@ torch::Tensor RasterizeMeshesCoarseCuda(
const int N = num_faces_per_mesh.size(0);
const int num_bins = 1 + (image_size - 1) / bin_size; // Divide round up.
const int M = max_faces_per_bin;
if (num_bins >= 22) {
std::stringstream ss;
ss << "Got " << num_bins << "; that's too many!";
@@ -702,13 +701,8 @@ __global__ void RasterizeMeshesFineCudaKernel(
if (yi >= H || xi >= W)
continue;
// Reverse ordering of the X and Y axis so that
// in the image +Y is pointing up and +X is pointing left.
const int yidx = H - 1 - yi;
const int xidx = W - 1 - xi;
const float xf = PixToNdc(xidx, W);
const float yf = PixToNdc(yidx, H);
const float xf = PixToNdc(xi, W);
const float yf = PixToNdc(yi, H);
const float2 pxy = make_float2(xf, yf);
// This part looks like the naive rasterization kernel, except we use
@@ -743,7 +737,12 @@ __global__ void RasterizeMeshesFineCudaKernel(
// output for the current pixel.
// TODO: make sorting an option as only top k is needed, not sorted values.
BubbleSort(q, q_size);
const int pix_idx = n * H * W * K + yi * H * K + xi * K;
// Reverse ordering of the X and Y axis so that
// in the image +Y is pointing up and +X is pointing left.
const int yidx = H - 1 - yi;
const int xidx = W - 1 - xi;
const int pix_idx = n * H * W * K + yidx * H * K + xidx * K;
for (int k = 0; k < q_size; k++) {
face_idxs[pix_idx + k] = q[k].idx;
zbuf[pix_idx + k] = q[k].z;

View File

@@ -430,13 +430,13 @@ torch::Tensor RasterizeMeshesCoarseCpu(
const int face_stop_idx =
(face_start_idx + num_faces_per_mesh[n].item().to<int32_t>());
float bin_y_max = 1.0f;
float bin_y_min = bin_y_max - bin_width;
float bin_y_min = -1.0f;
float bin_y_max = bin_y_min + bin_width;
// Iterate through the horizontal bins from top to bottom.
for (int by = 0; by < BH; ++by) {
float bin_x_max = 1.0f;
float bin_x_min = bin_x_max - bin_width;
float bin_x_min = -1.0f;
float bin_x_max = bin_x_min + bin_width;
// Iterate through bins on this horizontal line, left to right.
for (int bx = 0; bx < BW; ++bx) {
@@ -473,13 +473,13 @@ torch::Tensor RasterizeMeshesCoarseCpu(
}
}
// Shift the bin to the left for the next loop iteration.
bin_x_max = bin_x_min;
bin_x_min = bin_x_min - bin_width;
// Shift the bin to the right for the next loop iteration
bin_x_min = bin_x_max;
bin_x_max = bin_x_min + bin_width;
}
// Shift the bin down for the next loop iteration.
bin_y_max = bin_y_min;
bin_y_min = bin_y_min - bin_width;
// Shift the bin down for the next loop iteration
bin_y_min = bin_y_max;
bin_y_max = bin_y_min + bin_width;
}
}
return bin_faces;

View File

@@ -95,7 +95,8 @@ __global__ void RasterizePointsNaiveCudaKernel(
const int n = i / (S * S); // Batch index
const int pix_idx = i % (S * S);
// Reverse ordering of X and Y axes.
// Reverse ordering of the X and Y axis as the camera coordinates
// assume that +Y is pointing up and +X is pointing left.
const int yi = S - 1 - pix_idx / S;
const int xi = S - 1 - pix_idx % S;
@@ -260,23 +261,20 @@ __global__ void RasterizePointsCoarseCudaKernel(
// Get y extent for the bin. PixToNdc gives us the location of
// the center of each pixel, so we need to add/subtract a half
// pixel to get the true extent of the bin.
// Reverse ordering of Y axis so that +Y is upwards in the image.
const int yidx = num_bins - by;
const float bin_y_max = PixToNdc(yidx * bin_size - 1, S) + half_pix;
const float bin_y_min = PixToNdc((yidx - 1) * bin_size, S) - half_pix;
const float by0 = PixToNdc(by * bin_size, S) - half_pix;
const float by1 = PixToNdc((by + 1) * bin_size - 1, S) + half_pix;
const bool y_overlap = (py0 <= by1) && (by0 <= py1);
const bool y_overlap = (py0 <= bin_y_max) && (bin_y_min <= py1);
if (!y_overlap) {
continue;
}
for (int bx = 0; bx < num_bins; ++bx) {
// Get x extent for the bin; again we need to adjust the
// output of PixToNdc by half a pixel.
// Reverse ordering of x axis so that +X is left.
const int xidx = num_bins - bx;
const float bin_x_max = PixToNdc(xidx * bin_size - 1, S) + half_pix;
const float bin_x_min = PixToNdc((xidx - 1) * bin_size, S) - half_pix;
const bool x_overlap = (px0 <= bin_x_max) && (bin_x_min <= px1);
const float bx0 = PixToNdc(bx * bin_size, S) - half_pix;
const float bx1 = PixToNdc((bx + 1) * bin_size - 1, S) + half_pix;
const bool x_overlap = (px0 <= bx1) && (bx0 <= px1);
if (x_overlap) {
binmask.set(by, bx, p);
}
@@ -330,6 +328,7 @@ torch::Tensor RasterizePointsCoarseCuda(
const int N = num_points_per_cloud.size(0);
const int num_bins = 1 + (image_size - 1) / bin_size; // divide round up
const int M = max_points_per_bin;
if (points.ndimension() != 2 || points.size(1) != 3) {
AT_ERROR("points must have dimensions (num_points, 3)");
}
@@ -346,6 +345,7 @@ torch::Tensor RasterizePointsCoarseCuda(
const size_t shared_size = num_bins * num_bins * chunk_size / 8;
const size_t blocks = 64;
const size_t threads = 512;
RasterizePointsCoarseCudaKernel<<<blocks, threads, shared_size>>>(
points.contiguous().data_ptr<float>(),
cloud_to_packed_first_idx.contiguous().data_ptr<int64_t>(),
@@ -372,7 +372,7 @@ __global__ void RasterizePointsFineCudaKernel(
const float radius,
const int bin_size,
const int N,
const int B,
const int B, // num_bins
const int M,
const int S,
const int K,
@@ -397,19 +397,15 @@ __global__ void RasterizePointsFineCudaKernel(
i %= B * bin_size * bin_size;
const int bx = i / (bin_size * bin_size);
i %= bin_size * bin_size;
const int yi = i / bin_size + by * bin_size;
const int xi = i % bin_size + bx * bin_size;
if (yi >= S || xi >= S)
continue;
// Reverse ordering of the X and Y axis so that
// in the image +Y is pointing up and +X is pointing left.
const int yidx = S - 1 - yi;
const int xidx = S - 1 - xi;
const float xf = PixToNdc(xidx, S);
const float yf = PixToNdc(yidx, S);
const float xf = PixToNdc(xi, S);
const float yf = PixToNdc(yi, S);
// This part looks like the naive rasterization kernel, except we use
// bin_points to only look at a subset of points already known to fall
@@ -431,7 +427,13 @@ __global__ void RasterizePointsFineCudaKernel(
// Now we've looked at all the points for this bin, so we can write
// output for the current pixel.
BubbleSort(q, q_size);
const int pix_idx = n * S * S * K + yi * S * K + xi * K;
// Reverse ordering of the X and Y axis as the camera coordinates
// assume that +Y is pointing up and +X is pointing left.
const int yidx = S - 1 - yi;
const int xidx = S - 1 - xi;
const int pix_idx = n * S * S * K + yidx * S * K + xidx * K;
for (int k = 0; k < q_size; ++k) {
point_idxs[pix_idx + k] = q[k].idx;
zbuf[pix_idx + k] = q[k].z;
@@ -448,7 +450,7 @@ std::tuple<torch::Tensor, torch::Tensor, torch::Tensor> RasterizePointsFineCuda(
const int bin_size,
const int points_per_pixel) {
const int N = bin_points.size(0);
const int B = bin_points.size(1);
const int B = bin_points.size(1); // num_bins
const int M = bin_points.size(3);
const int S = image_size;
const int K = points_per_pixel;

View File

@@ -125,13 +125,13 @@ torch::Tensor RasterizePointsCoarseCpu(
const int point_stop_idx =
(point_start_idx + num_points_per_cloud[n].item().to<int32_t>());
float bin_y_max = 1.0f;
float bin_y_min = bin_y_max - bin_width;
float bin_y_min = -1.0f;
float bin_y_max = bin_y_min + bin_width;
// Iterate through the horizontal bins from top to bottom.
for (int by = 0; by < B; by++) {
float bin_x_max = 1.0f;
float bin_x_min = bin_x_max - bin_width;
float bin_x_min = -1.0f;
float bin_x_max = bin_x_min + bin_width;
// Iterate through bins on this horizontal line, left to right.
for (int bx = 0; bx < B; bx++) {
@@ -166,13 +166,13 @@ torch::Tensor RasterizePointsCoarseCpu(
// Record the number of points found in this bin
points_per_bin_a[n][by][bx] = points_hit;
// Shift the bin to the left for the next loop iteration.
bin_x_max = bin_x_min;
bin_x_min = bin_x_min - bin_width;
// Shift the bin to the right for the next loop iteration
bin_x_min = bin_x_max;
bin_x_max = bin_x_min + bin_width;
}
// Shift the bin down for the next loop iteration.
bin_y_max = bin_y_min;
bin_y_min = bin_y_min - bin_width;
// Shift the bin down for the next loop iteration
bin_y_min = bin_y_max;
bin_y_max = bin_y_min + bin_width;
}
}
return bin_points;