# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved. from itertools import product import torch from fvcore.common.benchmark import benchmark from pytorch3d import _C from pytorch3d.ops.knn import _knn_points_idx_naive def bm_knn() -> None: """ Entry point for the benchmark """ benchmark_knn_cpu() benchmark_knn_cuda_vs_naive() benchmark_knn_cuda_versions() def benchmark_knn_cuda_versions() -> None: # Compare our different KNN implementations, # and also compare against our existing 1-NN Ns = [1, 2] Ps = [4096, 16384] Ds = [3] Ks = [1, 4, 16, 64] versions = [0, 1, 2, 3] knn_kwargs, nn_kwargs = [], [] for N, P, D, K, version in product(Ns, Ps, Ds, Ks, versions): if version == 2 and K > 32: continue if version == 3 and K > 4: continue knn_kwargs.append({'N': N, 'D': D, 'P': P, 'K': K, 'v': version}) for N, P, D in product(Ns, Ps, Ds): nn_kwargs.append({'N': N, 'D': D, 'P': P}) benchmark( knn_cuda_with_init, 'KNN_CUDA_VERSIONS', knn_kwargs, warmup_iters=1, ) benchmark( nn_cuda_with_init, 'NN_CUDA', nn_kwargs, warmup_iters=1, ) def benchmark_knn_cuda_vs_naive() -> None: # Compare against naive pytorch version of KNN Ns = [1, 2, 4] Ps = [1024, 4096, 16384, 65536] Ds = [3] Ks = [1, 2, 4, 8, 16] knn_kwargs, naive_kwargs = [], [] for N, P, D, K in product(Ns, Ps, Ds, Ks): knn_kwargs.append({'N': N, 'D': D, 'P': P, 'K': K}) if P <= 4096: naive_kwargs.append({'N': N, 'D': D, 'P': P, 'K': K}) benchmark( knn_python_cuda_with_init, 'KNN_CUDA_PYTHON', naive_kwargs, warmup_iters=1, ) benchmark( knn_cuda_with_init, 'KNN_CUDA', knn_kwargs, warmup_iters=1, ) def benchmark_knn_cpu() -> None: Ns = [1, 2] Ps = [256, 512] Ds = [3] Ks = [1, 2, 4] knn_kwargs, nn_kwargs = [], [] for N, P, D, K in product(Ns, Ps, Ds, Ks): knn_kwargs.append({'N': N, 'D': D, 'P': P, 'K': K}) for N, P, D in product(Ns, Ps, Ds): nn_kwargs.append({'N': N, 'D': D, 'P': P}) benchmark( knn_python_cpu_with_init, 'KNN_CPU_PYTHON', knn_kwargs, warmup_iters=1, ) benchmark( knn_cpu_with_init, 'KNN_CPU_CPP', knn_kwargs, warmup_iters=1, ) benchmark( nn_cpu_with_init, 'NN_CPU_CPP', nn_kwargs, warmup_iters=1, ) def knn_cuda_with_init(N, D, P, K, v=-1): device = torch.device('cuda:0') x = torch.randn(N, P, D, device=device) y = torch.randn(N, P, D, device=device) torch.cuda.synchronize() def knn(): _C.knn_points_idx(x, y, K, v) torch.cuda.synchronize() return knn def knn_cpu_with_init(N, D, P, K): device = torch.device('cpu') x = torch.randn(N, P, D, device=device) y = torch.randn(N, P, D, device=device) def knn(): _C.knn_points_idx(x, y, K, 0) return knn def knn_python_cuda_with_init(N, D, P, K): device = torch.device('cuda') x = torch.randn(N, P, D, device=device) y = torch.randn(N, P, D, device=device) torch.cuda.synchronize() def knn(): _knn_points_idx_naive(x, y, K) torch.cuda.synchronize() return knn def knn_python_cpu_with_init(N, D, P, K): device = torch.device('cpu') x = torch.randn(N, P, D, device=device) y = torch.randn(N, P, D, device=device) def knn(): _knn_points_idx_naive(x, y, K) return knn def nn_cuda_with_init(N, D, P): device = torch.device('cuda') x = torch.randn(N, P, D, device=device) y = torch.randn(N, P, D, device=device) torch.cuda.synchronize() def knn(): _C.nn_points_idx(x, y) torch.cuda.synchronize() return knn def nn_cpu_with_init(N, D, P): device = torch.device('cpu') x = torch.randn(N, P, D, device=device) y = torch.randn(N, P, D, device=device) def knn(): _C.nn_points_idx(x, y) return knn