mirror of
https://github.com/facebookresearch/pytorch3d.git
synced 2025-08-02 20:02:49 +08:00
Summary: CUDA implementation of farthest point sampling algorithm. ## Visual comparison Compared to random sampling, farthest point sampling gives better coverage of the shape. {F658631262} ## Reduction Parallelized block reduction to find the max value at each iteration happens as follows: 1. First split the points into two equal sized parts (e.g. for a list with 8 values): `[20, 27, 6, 8 | 11, 10, 2, 33]` 2. Use half of the thread (4 threads) to compare pairs of elements from each half (e.g elements [0, 4], [1, 5] etc) and store the result in the first half of the list: `[20, 27, 6, 33 | 11, 10, 2, 33]` Now we no longer care about the second part but again divide the first part into two `[20, 27 | 6, 33| -, -, -, -]` Now we can use 2 threads to compare the 4 elements 4. Finally we have gotten down to a single pair `[20 | 33 | -, - | -, -, -, -]` Use 1 thread to compare the remaining two elements 5. The max will now be at thread id = 0 `[33 | - | -, - | -, -, -, -]` The reduction will give the farthest point for the selected batch index at this iteration. Reviewed By: bottler, jcjohnson Differential Revision: D30401803 fbshipit-source-id: 525bd5ae27c4b13b501812cfe62306bb003827d2
47 lines
1.2 KiB
Python
47 lines
1.2 KiB
Python
# Copyright (c) Facebook, Inc. and its affiliates.
|
|
# All rights reserved.
|
|
#
|
|
# This source code is licensed under the BSD-style license found in the
|
|
# LICENSE file in the root directory of this source tree.
|
|
|
|
from itertools import product
|
|
|
|
from fvcore.common.benchmark import benchmark
|
|
from test_sample_farthest_points import TestFPS
|
|
|
|
|
|
def bm_fps() -> None:
|
|
kwargs_list = []
|
|
backends = ["cpu", "cuda:0"]
|
|
Ns = [8, 32]
|
|
Ps = [64, 256]
|
|
Ds = [3]
|
|
Ks = [24]
|
|
test_cases = product(Ns, Ps, Ds, Ks, backends)
|
|
for case in test_cases:
|
|
N, P, D, K, d = case
|
|
kwargs_list.append({"N": N, "P": P, "D": D, "K": K, "device": d})
|
|
|
|
benchmark(
|
|
TestFPS.sample_farthest_points_naive,
|
|
"FPS_NAIVE_PYTHON",
|
|
kwargs_list,
|
|
warmup_iters=1,
|
|
)
|
|
|
|
# Add some larger batch sizes and pointcloud sizes
|
|
Ns = [32]
|
|
Ps = [2048, 8192, 18384]
|
|
Ds = [3, 9]
|
|
Ks = [24, 48]
|
|
test_cases = product(Ns, Ps, Ds, Ks, backends)
|
|
for case in test_cases:
|
|
N, P, D, K, d = case
|
|
kwargs_list.append({"N": N, "P": P, "D": D, "K": K, "device": d})
|
|
|
|
benchmark(TestFPS.sample_farthest_points, "FPS", kwargs_list, warmup_iters=1)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
bm_fps()
|