move benchmarks to separate directory

Summary: Move benchmarks to a separate directory as tests/ is getting big. Reviewed By: nikhilaravi Differential Revision: D32885462 fbshipit-source-id: a832662a494ee341ab77d95493c95b0af0a83f43
2026-03-03 18:55:59 +08:00 · 2021-12-07 10:22:17 -08:00
parent a6508ac3df
commit a0e2d2e3c3
43 changed files with 0 additions and 0 deletions
--- a/tests/benchmarks/bm_pulsar.py
+++ b/tests/benchmarks/bm_pulsar.py
@@ -0,0 +1,126 @@
+# Copyright (c) Facebook, Inc. and its affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""Test render speed."""
+import logging
+import sys
+from os import path
+
+import torch
+from fvcore.common.benchmark import benchmark
+from pytorch3d.renderer.points.pulsar import Renderer
+from torch.autograd import Variable
+
+
+# Making sure you can run this, even if pulsar hasn't been installed yet.
+sys.path.insert(0, path.join(path.dirname(__file__), ".."))
+LOGGER = logging.getLogger(__name__)
+
+
+"""Measure the execution speed of the rendering.
+
+This measures a very pessimistic upper bound on speed, because synchronization
+points have to be introduced in Python. On a pure PyTorch execution pipeline,
+results should be significantly faster. You can get pure CUDA timings through
+C++ by activating `PULSAR_TIMINGS_BATCHED_ENABLED` in the file
+`pytorch3d/csrc/pulsar/logging.h` or defining it for your compiler.
+"""
+
+
+def _bm_pulsar():
+    n_points = 1_000_000
+    width = 1_000
+    height = 1_000
+    renderer = Renderer(width, height, n_points)
+    # Generate sample data.
+    torch.manual_seed(1)
+    vert_pos = torch.rand(n_points, 3, dtype=torch.float32) * 10.0
+    vert_pos[:, 2] += 25.0
+    vert_pos[:, :2] -= 5.0
+    vert_col = torch.rand(n_points, 3, dtype=torch.float32)
+    vert_rad = torch.rand(n_points, dtype=torch.float32)
+    cam_params = torch.tensor(
+        [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 2.0], dtype=torch.float32
+    )
+    device = torch.device("cuda")
+    vert_pos = vert_pos.to(device)
+    vert_col = vert_col.to(device)
+    vert_rad = vert_rad.to(device)
+    cam_params = cam_params.to(device)
+    renderer = renderer.to(device)
+    vert_pos_var = Variable(vert_pos, requires_grad=False)
+    vert_col_var = Variable(vert_col, requires_grad=False)
+    vert_rad_var = Variable(vert_rad, requires_grad=False)
+    cam_params_var = Variable(cam_params, requires_grad=False)
+
+    def bm_closure():
+        renderer.forward(
+            vert_pos_var,
+            vert_col_var,
+            vert_rad_var,
+            cam_params_var,
+            1.0e-1,
+            45.0,
+            percent_allowed_difference=0.01,
+        )
+        torch.cuda.synchronize()
+
+    return bm_closure
+
+
+def _bm_pulsar_backward():
+    n_points = 1_000_000
+    width = 1_000
+    height = 1_000
+    renderer = Renderer(width, height, n_points)
+    # Generate sample data.
+    torch.manual_seed(1)
+    vert_pos = torch.rand(n_points, 3, dtype=torch.float32) * 10.0
+    vert_pos[:, 2] += 25.0
+    vert_pos[:, :2] -= 5.0
+    vert_col = torch.rand(n_points, 3, dtype=torch.float32)
+    vert_rad = torch.rand(n_points, dtype=torch.float32)
+    cam_params = torch.tensor(
+        [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 2.0], dtype=torch.float32
+    )
+    device = torch.device("cuda")
+    vert_pos = vert_pos.to(device)
+    vert_col = vert_col.to(device)
+    vert_rad = vert_rad.to(device)
+    cam_params = cam_params.to(device)
+    renderer = renderer.to(device)
+    vert_pos_var = Variable(vert_pos, requires_grad=True)
+    vert_col_var = Variable(vert_col, requires_grad=True)
+    vert_rad_var = Variable(vert_rad, requires_grad=True)
+    cam_params_var = Variable(cam_params, requires_grad=True)
+    res = renderer.forward(
+        vert_pos_var,
+        vert_col_var,
+        vert_rad_var,
+        cam_params_var,
+        1.0e-1,
+        45.0,
+        percent_allowed_difference=0.01,
+    )
+    loss = res.sum()
+
+    def bm_closure():
+        loss.backward(retain_graph=True)
+        torch.cuda.synchronize()
+
+    return bm_closure
+
+
+def bm_pulsar() -> None:
+    if not torch.cuda.is_available():
+        return
+
+    benchmark(_bm_pulsar, "PULSAR_FORWARD", [{}], warmup_iters=3)
+    benchmark(_bm_pulsar_backward, "PULSAR_BACKWARD", [{}], warmup_iters=3)
+
+
+if __name__ == "__main__":
+    bm_pulsar()