sample_pdf CUDA and C++ implementations.

Summary: Implement the sample_pdf function from the NeRF project as compiled operators.. The binary search (in searchsorted) is replaced with a low tech linear search, but this is not a problem for the envisaged numbers of bins. Reviewed By: gkioxari Differential Revision: D26312535 fbshipit-source-id: df1c3119cd63d944380ed1b2657b6ad81d743e49
2026-02-06 05:52:17 +08:00 · 2021-08-17 08:06:48 -07:00
parent 7d7d00f288
commit 1ea2b7272a
7 changed files with 488 additions and 3 deletions
--- a/tests/bm_sample_pdf.py
+++ b/tests/bm_sample_pdf.py
@@ -12,7 +12,7 @@ from test_sample_pdf import TestSamplePDF

 def bm_sample_pdf() -> None:

-    backends = ["python_cuda", "python_cpu"]
+    backends = ["python_cuda", "cuda", "python_cpu", "cpu"]

    kwargs_list = []
    sample_counts = [64]
--- a/tests/test_sample_pdf.py
+++ b/tests/test_sample_pdf.py
@@ -5,10 +5,11 @@
 # LICENSE file in the root directory of this source tree.

 import unittest
+from itertools import product

 import torch
 from common_testing import TestCaseMixin
-from pytorch3d.renderer.implicit.sample_pdf import sample_pdf_python
+from pytorch3d.renderer.implicit.sample_pdf import sample_pdf, sample_pdf_python


 class TestSamplePDF(TestCaseMixin, unittest.TestCase):
@@ -23,9 +24,59 @@ class TestSamplePDF(TestCaseMixin, unittest.TestCase):
        calc = torch.linspace(17, 18, 100).expand(5, -1)
        self.assertClose(output, calc)

+    def test_simple_det(self):
+        for n_bins, n_samples, batch in product(
+            [7, 20], [2, 7, 31, 32, 33], [(), (1, 4), (31,), (32,), (33,)]
+        ):
+            weights = torch.rand(size=(batch + (n_bins,)))
+            bins = torch.cumsum(torch.rand(size=(batch + (n_bins + 1,))), dim=-1)
+            python = sample_pdf_python(bins, weights, n_samples, det=True)
+
+            cpp = sample_pdf(bins, weights, n_samples, det=True)
+            self.assertClose(cpp, python, atol=2e-3)
+
+            nthreads = torch.get_num_threads()
+            torch.set_num_threads(1)
+            cpp_singlethread = sample_pdf(bins, weights, n_samples, det=True)
+            self.assertClose(cpp_singlethread, python, atol=2e-3)
+            torch.set_num_threads(nthreads)
+
+            device = torch.device("cuda:0")
+            cuda = sample_pdf(
+                bins.to(device), weights.to(device), n_samples, det=True
+            ).cpu()
+
+            self.assertClose(cuda, python, atol=2e-3)
+
+    def test_rand_cpu(self):
+        n_bins, n_samples, batch_size = 11, 17, 9
+        weights = torch.rand(size=(batch_size, n_bins))
+        bins = torch.cumsum(torch.rand(size=(batch_size, n_bins + 1)), dim=-1)
+        torch.manual_seed(1)
+        python = sample_pdf_python(bins, weights, n_samples)
+        torch.manual_seed(1)
+        cpp = sample_pdf(bins, weights, n_samples)
+
+        self.assertClose(cpp, python, atol=2e-3)
+
+    def test_rand_nogap(self):
+        # Case where random is actually deterministic
+        weights = torch.FloatTensor([0, 10, 0])
+        bins = torch.FloatTensor([0, 10, 10, 25])
+        n_samples = 8
+        predicted = torch.full((n_samples,), 10.0)
+        python = sample_pdf_python(bins, weights, n_samples)
+        self.assertClose(python, predicted)
+        cpp = sample_pdf(bins, weights, n_samples)
+        self.assertClose(cpp, predicted)
+
+        device = torch.device("cuda:0")
+        cuda = sample_pdf(bins.to(device), weights.to(device), n_samples).cpu()
+        self.assertClose(cuda, predicted)
+
    @staticmethod
    def bm_fn(*, backend: str, n_samples, batch_size, n_bins):
-        f = sample_pdf_python
+        f = sample_pdf_python if "python" in backend else sample_pdf
        weights = torch.rand(size=(batch_size, n_bins))
        bins = torch.cumsum(torch.rand(size=(batch_size, n_bins + 1)), dim=-1)