pytorch3d/pytorch3d/csrc/rasterize_points/rasterization_utils.cuh
Nikhila Ravi 13429640d3 Bug fix for case where aspect ratio is a float
Summary:
- Fix the calculation of the non square NDC range when the H and W are not integer multiples.
- Add test for this case

Reviewed By: gkioxari

Differential Revision: D26613213

fbshipit-source-id: df6763cac602e9f1d516b41b432c4d2cfbaa356d
2021-02-24 10:07:17 -08:00

61 lines
2.5 KiB
Plaintext

// Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.
#pragma once
// The default value of the NDC range is [-1, 1], however in the case that
// H != W, the NDC range is set such that the shorter side has range [-1, 1] and
// the longer side is scaled by the ratio of H:W. S1 is the dimension for which
// the NDC range is calculated and S2 is the other image dimension.
// e.g. to get the NDC x range S1 = W and S2 = H
__device__ inline float NonSquareNdcRange(int S1, int S2) {
float range = 2.0f;
if (S1 > S2) {
// First multiply S1 by float range so that division results
// in a float value.
range = (S1 * range) / S2;
}
return range;
}
// Given a pixel coordinate 0 <= i < S1, convert it to a normalized device
// coordinates. We divide the NDC range into S1 evenly-sized
// pixels, and assume that each pixel falls in the *center* of its range.
// The default value of the NDC range is [-1, 1], however in the case that
// H != W, the NDC range is set such that the shorter side has range [-1, 1] and
// the longer side is scaled by the ratio of H:W. The dimension of i should be
// S1 and the other image dimension is S2 For example, to get the x and y NDC
// coordinates or a given pixel i:
// x = PixToNonSquareNdc(i, W, H)
// y = PixToNonSquareNdc(i, H, W)
__device__ inline float PixToNonSquareNdc(int i, int S1, int S2) {
float range = NonSquareNdcRange(S1, S2);
// NDC: offset + (i * pixel_width + half_pixel_width)
// The NDC range is [-range/2, range/2].
float offset = (range / 2.0f);
return -offset + (range * i + offset) / S1;
}
// The maximum number of points per pixel that we can return. Since we use
// thread-local arrays to hold and sort points, the maximum size of the array
// needs to be known at compile time. There might be some fancy template magic
// we could use to make this more dynamic, but for now just fix a constant.
// TODO: is 8 enough? Would increasing have performance considerations?
const int32_t kMaxPointsPerPixel = 150;
const int32_t kMaxItemsPerBin = 22;
template <typename T>
__device__ inline void BubbleSort(T* arr, int n) {
// Bubble sort. We only use it for tiny thread-local arrays (n < 8); in this
// regime we care more about warp divergence than computational complexity.
for (int i = 0; i < n - 1; ++i) {
for (int j = 0; j < n - i - 1; ++j) {
if (arr[j + 1] < arr[j]) {
T temp = arr[j];
arr[j] = arr[j + 1];
arr[j + 1] = temp;
}
}
}
}