Multithread CPU naive mesh rasterization

Summary:
Threaded the for loop:
```
for (int yi = 0; yi < H; ++yi) {...}
```
in function `RasterizeMeshesNaiveCpu()`.
Chunk size is approx equal.

Reviewed By: bottler

Differential Revision: D40063604

fbshipit-source-id: 09150269405538119b0f1b029892179501421e68
This commit is contained in:
Gavin Peng
2022-10-06 06:42:58 -07:00
committed by Facebook GitHub Bot
parent 37bd280d19
commit 6471893f59
3 changed files with 121 additions and 47 deletions

View File

@@ -4,13 +4,15 @@
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
import os
from itertools import product
import torch
from fvcore.common.benchmark import benchmark
from tests.test_rasterize_meshes import TestRasterizeMeshes
BM_RASTERIZE_MESHES_N_THREADS = os.getenv("BM_RASTERIZE_MESHES_N_THREADS", 1)
torch.set_num_threads(int(BM_RASTERIZE_MESHES_N_THREADS))
# ico levels:
# 0: (12 verts, 20 faces)
@@ -41,7 +43,7 @@ def bm_rasterize_meshes() -> None:
kwargs_list = []
num_meshes = [1]
ico_level = [1]
image_size = [64, 128]
image_size = [64, 128, 512]
blur = [1e-6]
faces_per_pixel = [3, 50]
test_cases = product(num_meshes, ico_level, image_size, blur, faces_per_pixel)

View File

@@ -35,7 +35,7 @@ class TestRasterizeMeshes(TestCaseMixin, unittest.TestCase):
self._test_barycentric_clipping(rasterize_meshes_python, device, bin_size=-1)
self._test_back_face_culling(rasterize_meshes_python, device, bin_size=-1)
def test_simple_cpu_naive(self):
def _test_simple_cpu_naive_instance(self):
device = torch.device("cpu")
self._simple_triangle_raster(rasterize_meshes, device, bin_size=0)
self._simple_blurry_raster(rasterize_meshes, device, bin_size=0)
@@ -43,6 +43,16 @@ class TestRasterizeMeshes(TestCaseMixin, unittest.TestCase):
self._test_perspective_correct(rasterize_meshes, device, bin_size=0)
self._test_back_face_culling(rasterize_meshes, device, bin_size=0)
def test_simple_cpu_naive(self):
n_threads = torch.get_num_threads()
torch.set_num_threads(1) # single threaded
self._test_simple_cpu_naive_instance()
torch.set_num_threads(4) # even (divisible) number of threads
self._test_simple_cpu_naive_instance()
torch.set_num_threads(5) # odd (nondivisible) number of threads
self._test_simple_cpu_naive_instance()
torch.set_num_threads(n_threads)
def test_simple_cuda_naive(self):
device = get_random_cuda_device()
self._simple_triangle_raster(rasterize_meshes, device, bin_size=0)