From 88f579389fc91e8ccd1f6e7c377a837646ca16dc Mon Sep 17 00:00:00 2001 From: Nikhila Ravi Date: Tue, 30 Jun 2020 12:39:20 -0700 Subject: [PATCH] fix default settings for point rasterization and update benchmark Summary: Fixes the default setting of `max_points_per_bin` in `rasterize_points.py`. For large batches with large size pointclouds this was a causing the rasterizer to be very slow. Expanded the pointcloud rendering benchmarks to include larger size pointclouds and fixed cuda synchronization issue in benchmark. Reviewed By: gkioxari Differential Revision: D22301185 fbshipit-source-id: 5077c1ba2c43d73efc1c659f0ec75959ceddf893 --- pytorch3d/renderer/points/rasterize_points.py | 2 +- tests/bm_rasterize_points.py | 16 ++++++++++++++-- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/pytorch3d/renderer/points/rasterize_points.py b/pytorch3d/renderer/points/rasterize_points.py index 345ea8e9..1d86dbe8 100644 --- a/pytorch3d/renderer/points/rasterize_points.py +++ b/pytorch3d/renderer/points/rasterize_points.py @@ -98,7 +98,7 @@ def rasterize_points( ) if max_points_per_bin is None: - max_points_per_bin = int(max(10000, points_packed.shape[0] / 5)) + max_points_per_bin = int(max(10000, pointclouds._P / 5)) # Function.apply cannot take keyword args, so we handle defaults in this # wrapper and call apply with positional args only diff --git a/tests/bm_rasterize_points.py b/tests/bm_rasterize_points.py index eb32b22c..b281fe1e 100644 --- a/tests/bm_rasterize_points.py +++ b/tests/bm_rasterize_points.py @@ -28,10 +28,17 @@ def _bm_cpu_with_init(N, P, img_size=32, radius=0.1, pts_per_pxl=3): def _bm_cuda_with_init(N, P, img_size=32, radius=0.1, pts_per_pxl=3): torch.manual_seed(231) - points = torch.randn(N, P, 3, device=torch.device("cuda")) + device = torch.device("cuda:0") + points = torch.randn(N, P, 3, device=device) pointclouds = Pointclouds(points=points) args = (pointclouds, img_size, radius, pts_per_pxl) - return lambda: rasterize_points(*args) + torch.cuda.synchronize(device) + + def fn(): + rasterize_points(*args) + torch.cuda.synchronize(device) + + return fn def bm_python_vs_cpu() -> None: @@ -46,4 +53,9 @@ def bm_python_vs_cpu() -> None: {"N": 4, "P": 1024, "img_size": 128, "radius": 0.05, "pts_per_pxl": 5}, ] benchmark(_bm_cpu_with_init, "RASTERIZE_CPU", kwargs_list, warmup_iters=1) + kwargs_list += [ + {"N": 32, "P": 10000, "img_size": 128, "radius": 0.01, "pts_per_pxl": 50}, + {"N": 32, "P": 100000, "img_size": 128, "radius": 0.01, "pts_per_pxl": 50}, + {"N": 8, "P": 200000, "img_size": 512, "radius": 0.01, "pts_per_pxl": 50}, + ] benchmark(_bm_cuda_with_init, "RASTERIZE_CUDA", kwargs_list, warmup_iters=1)