# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved. from itertools import product import torch from fvcore.common.benchmark import benchmark from pytorch3d import _C from pytorch3d.ops.knn import _knn_points_idx_naive def bm_knn() -> None: """ Entry point for the benchmark """ benchmark_knn_cpu() benchmark_knn_cuda_vs_naive() benchmark_knn_cuda_versions() def benchmark_knn_cuda_versions() -> None: # Compare our different KNN implementations, # and also compare against our existing 1-NN Ns = [1, 2] Ps = [4096, 16384] Ds = [3] Ks = [1, 4, 16, 64] versions = [0, 1, 2, 3] knn_kwargs, nn_kwargs = [], [] for N, P, D, K, version in product(Ns, Ps, Ds, Ks, versions): if version == 2 and K > 32: continue if version == 3 and K > 4: continue knn_kwargs.append({"N": N, "D": D, "P": P, "K": K, "v": version}) for N, P, D in product(Ns, Ps, Ds): nn_kwargs.append({"N": N, "D": D, "P": P}) benchmark( knn_cuda_with_init, "KNN_CUDA_VERSIONS", knn_kwargs, warmup_iters=1 ) benchmark(nn_cuda_with_init, "NN_CUDA", nn_kwargs, warmup_iters=1) def benchmark_knn_cuda_vs_naive() -> None: # Compare against naive pytorch version of KNN Ns = [1, 2, 4] Ps = [1024, 4096, 16384, 65536] Ds = [3] Ks = [1, 2, 4, 8, 16] knn_kwargs, naive_kwargs = [], [] for N, P, D, K in product(Ns, Ps, Ds, Ks): knn_kwargs.append({"N": N, "D": D, "P": P, "K": K}) if P <= 4096: naive_kwargs.append({"N": N, "D": D, "P": P, "K": K}) benchmark( knn_python_cuda_with_init, "KNN_CUDA_PYTHON", naive_kwargs, warmup_iters=1, ) benchmark(knn_cuda_with_init, "KNN_CUDA", knn_kwargs, warmup_iters=1) def benchmark_knn_cpu() -> None: Ns = [1, 2] Ps = [256, 512] Ds = [3] Ks = [1, 2, 4] knn_kwargs, nn_kwargs = [], [] for N, P, D, K in product(Ns, Ps, Ds, Ks): knn_kwargs.append({"N": N, "D": D, "P": P, "K": K}) for N, P, D in product(Ns, Ps, Ds): nn_kwargs.append({"N": N, "D": D, "P": P}) benchmark( knn_python_cpu_with_init, "KNN_CPU_PYTHON", knn_kwargs, warmup_iters=1 ) benchmark(knn_cpu_with_init, "KNN_CPU_CPP", knn_kwargs, warmup_iters=1) benchmark(nn_cpu_with_init, "NN_CPU_CPP", nn_kwargs, warmup_iters=1) def knn_cuda_with_init(N, D, P, K, v=-1): device = torch.device("cuda:0") x = torch.randn(N, P, D, device=device) y = torch.randn(N, P, D, device=device) torch.cuda.synchronize() def knn(): _C.knn_points_idx(x, y, K, v) torch.cuda.synchronize() return knn def knn_cpu_with_init(N, D, P, K): device = torch.device("cpu") x = torch.randn(N, P, D, device=device) y = torch.randn(N, P, D, device=device) def knn(): _C.knn_points_idx(x, y, K, 0) return knn def knn_python_cuda_with_init(N, D, P, K): device = torch.device("cuda") x = torch.randn(N, P, D, device=device) y = torch.randn(N, P, D, device=device) torch.cuda.synchronize() def knn(): _knn_points_idx_naive(x, y, K) torch.cuda.synchronize() return knn def knn_python_cpu_with_init(N, D, P, K): device = torch.device("cpu") x = torch.randn(N, P, D, device=device) y = torch.randn(N, P, D, device=device) def knn(): _knn_points_idx_naive(x, y, K) return knn def nn_cuda_with_init(N, D, P): device = torch.device("cuda") x = torch.randn(N, P, D, device=device) y = torch.randn(N, P, D, device=device) torch.cuda.synchronize() def knn(): _C.nn_points_idx(x, y) torch.cuda.synchronize() return knn def nn_cpu_with_init(N, D, P): device = torch.device("cpu") x = torch.randn(N, P, D, device=device) y = torch.randn(N, P, D, device=device) def knn(): _C.nn_points_idx(x, y) return knn