mirror of
https://github.com/facebookresearch/pytorch3d.git
synced 2025-08-02 03:42:50 +08:00
Summary: Cpu implementation for packed to padded and added gradients ``` Benchmark Avg Time(μs) Peak Time(μs) Iterations -------------------------------------------------------------------------------- PACKED_TO_PADDED_2_100_300_1_cpu 138 221 3625 PACKED_TO_PADDED_2_100_300_1_cuda:0 184 261 2716 PACKED_TO_PADDED_2_100_300_16_cpu 555 726 901 PACKED_TO_PADDED_2_100_300_16_cuda:0 179 260 2794 PACKED_TO_PADDED_2_100_3000_1_cpu 396 519 1262 PACKED_TO_PADDED_2_100_3000_1_cuda:0 181 274 2764 PACKED_TO_PADDED_2_100_3000_16_cpu 4517 5003 111 PACKED_TO_PADDED_2_100_3000_16_cuda:0 224 397 2235 PACKED_TO_PADDED_2_1000_300_1_cpu 138 212 3616 PACKED_TO_PADDED_2_1000_300_1_cuda:0 180 282 2775 PACKED_TO_PADDED_2_1000_300_16_cpu 565 711 885 PACKED_TO_PADDED_2_1000_300_16_cuda:0 179 264 2797 PACKED_TO_PADDED_2_1000_3000_1_cpu 389 494 1287 PACKED_TO_PADDED_2_1000_3000_1_cuda:0 180 271 2777 PACKED_TO_PADDED_2_1000_3000_16_cpu 4522 5170 111 PACKED_TO_PADDED_2_1000_3000_16_cuda:0 216 286 2313 PACKED_TO_PADDED_10_100_300_1_cpu 251 345 1995 PACKED_TO_PADDED_10_100_300_1_cuda:0 178 262 2806 PACKED_TO_PADDED_10_100_300_16_cpu 2354 2750 213 PACKED_TO_PADDED_10_100_300_16_cuda:0 178 291 2814 PACKED_TO_PADDED_10_100_3000_1_cpu 1519 1786 330 PACKED_TO_PADDED_10_100_3000_1_cuda:0 179 237 2791 PACKED_TO_PADDED_10_100_3000_16_cpu 24705 25879 21 PACKED_TO_PADDED_10_100_3000_16_cuda:0 228 316 2191 PACKED_TO_PADDED_10_1000_300_1_cpu 261 432 1919 PACKED_TO_PADDED_10_1000_300_1_cuda:0 181 261 2756 PACKED_TO_PADDED_10_1000_300_16_cpu 2349 2770 213 PACKED_TO_PADDED_10_1000_300_16_cuda:0 180 256 2782 PACKED_TO_PADDED_10_1000_3000_1_cpu 1613 1929 310 PACKED_TO_PADDED_10_1000_3000_1_cuda:0 183 253 2739 PACKED_TO_PADDED_10_1000_3000_16_cpu 22041 23653 23 PACKED_TO_PADDED_10_1000_3000_16_cuda:0 220 343 2270 PACKED_TO_PADDED_32_100_300_1_cpu 555 750 901 PACKED_TO_PADDED_32_100_300_1_cuda:0 188 282 2661 PACKED_TO_PADDED_32_100_300_16_cpu 7550 8131 67 PACKED_TO_PADDED_32_100_300_16_cuda:0 181 272 2770 PACKED_TO_PADDED_32_100_3000_1_cpu 4574 6327 110 PACKED_TO_PADDED_32_100_3000_1_cuda:0 173 254 2884 PACKED_TO_PADDED_32_100_3000_16_cpu 70366 72563 8 PACKED_TO_PADDED_32_100_3000_16_cuda:0 349 654 1433 PACKED_TO_PADDED_32_1000_300_1_cpu 612 728 818 PACKED_TO_PADDED_32_1000_300_1_cuda:0 189 295 2647 PACKED_TO_PADDED_32_1000_300_16_cpu 7699 8254 65 PACKED_TO_PADDED_32_1000_300_16_cuda:0 189 311 2646 PACKED_TO_PADDED_32_1000_3000_1_cpu 5105 5261 98 PACKED_TO_PADDED_32_1000_3000_1_cuda:0 191 260 2625 PACKED_TO_PADDED_32_1000_3000_16_cpu 87073 92708 6 PACKED_TO_PADDED_32_1000_3000_16_cuda:0 344 425 1455 -------------------------------------------------------------------------------- Benchmark Avg Time(μs) Peak Time(μs) Iterations -------------------------------------------------------------------------------- PACKED_TO_PADDED_TORCH_2_100_300_1_cpu 492 627 1016 PACKED_TO_PADDED_TORCH_2_100_300_1_cuda:0 768 975 652 PACKED_TO_PADDED_TORCH_2_100_300_16_cpu 659 804 760 PACKED_TO_PADDED_TORCH_2_100_300_16_cuda:0 781 918 641 PACKED_TO_PADDED_TORCH_2_100_3000_1_cpu 624 734 802 PACKED_TO_PADDED_TORCH_2_100_3000_1_cuda:0 778 929 643 PACKED_TO_PADDED_TORCH_2_100_3000_16_cpu 2609 2850 192 PACKED_TO_PADDED_TORCH_2_100_3000_16_cuda:0 758 901 660 PACKED_TO_PADDED_TORCH_2_1000_300_1_cpu 467 612 1072 PACKED_TO_PADDED_TORCH_2_1000_300_1_cuda:0 772 905 648 PACKED_TO_PADDED_TORCH_2_1000_300_16_cpu 689 839 726 PACKED_TO_PADDED_TORCH_2_1000_300_16_cuda:0 789 1143 635 PACKED_TO_PADDED_TORCH_2_1000_3000_1_cpu 629 735 795 PACKED_TO_PADDED_TORCH_2_1000_3000_1_cuda:0 812 916 616 PACKED_TO_PADDED_TORCH_2_1000_3000_16_cpu 2716 3117 185 PACKED_TO_PADDED_TORCH_2_1000_3000_16_cuda:0 844 1288 593 PACKED_TO_PADDED_TORCH_10_100_300_1_cpu 2387 2557 210 PACKED_TO_PADDED_TORCH_10_100_300_1_cuda:0 4112 4993 122 PACKED_TO_PADDED_TORCH_10_100_300_16_cpu 3385 4254 148 PACKED_TO_PADDED_TORCH_10_100_300_16_cuda:0 3959 4902 127 PACKED_TO_PADDED_TORCH_10_100_3000_1_cpu 2918 3105 172 PACKED_TO_PADDED_TORCH_10_100_3000_1_cuda:0 4054 4450 124 PACKED_TO_PADDED_TORCH_10_100_3000_16_cpu 12748 13623 40 PACKED_TO_PADDED_TORCH_10_100_3000_16_cuda:0 4023 4395 125 PACKED_TO_PADDED_TORCH_10_1000_300_1_cpu 2258 2492 222 PACKED_TO_PADDED_TORCH_10_1000_300_1_cuda:0 3997 4312 126 PACKED_TO_PADDED_TORCH_10_1000_300_16_cpu 3404 3597 147 PACKED_TO_PADDED_TORCH_10_1000_300_16_cuda:0 3877 4227 129 PACKED_TO_PADDED_TORCH_10_1000_3000_1_cpu 2789 3054 180 PACKED_TO_PADDED_TORCH_10_1000_3000_1_cuda:0 3821 4402 131 PACKED_TO_PADDED_TORCH_10_1000_3000_16_cpu 11967 12963 42 PACKED_TO_PADDED_TORCH_10_1000_3000_16_cuda:0 3729 4290 135 PACKED_TO_PADDED_TORCH_32_100_300_1_cpu 6933 8152 73 PACKED_TO_PADDED_TORCH_32_100_300_1_cuda:0 11856 12287 43 PACKED_TO_PADDED_TORCH_32_100_300_16_cpu 9895 11205 51 PACKED_TO_PADDED_TORCH_32_100_300_16_cuda:0 12354 13596 41 PACKED_TO_PADDED_TORCH_32_100_3000_1_cpu 9516 10128 53 PACKED_TO_PADDED_TORCH_32_100_3000_1_cuda:0 12917 13597 39 PACKED_TO_PADDED_TORCH_32_100_3000_16_cpu 41209 43783 13 PACKED_TO_PADDED_TORCH_32_100_3000_16_cuda:0 12210 13288 41 PACKED_TO_PADDED_TORCH_32_1000_300_1_cpu 7179 7689 70 PACKED_TO_PADDED_TORCH_32_1000_300_1_cuda:0 11896 12381 43 PACKED_TO_PADDED_TORCH_32_1000_300_16_cpu 10127 15494 50 PACKED_TO_PADDED_TORCH_32_1000_300_16_cuda:0 12034 12817 42 PACKED_TO_PADDED_TORCH_32_1000_3000_1_cpu 8743 10251 58 PACKED_TO_PADDED_TORCH_32_1000_3000_1_cuda:0 12023 12908 42 PACKED_TO_PADDED_TORCH_32_1000_3000_16_cpu 39071 41777 13 PACKED_TO_PADDED_TORCH_32_1000_3000_16_cuda:0 11999 13690 42 -------------------------------------------------------------------------------- ``` Reviewed By: bottler, nikhilaravi, jcjohnson Differential Revision: D19870575 fbshipit-source-id: 23a2477b73373c411899633386c87ab034c3702a
326 lines
10 KiB
Python
326 lines
10 KiB
Python
#!/usr/bin/env python3
|
|
# Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.
|
|
|
|
|
|
import unittest
|
|
from pathlib import Path
|
|
import torch
|
|
|
|
from pytorch3d import _C
|
|
from pytorch3d.ops.sample_points_from_meshes import sample_points_from_meshes
|
|
from pytorch3d.structures.meshes import Meshes
|
|
from pytorch3d.utils.ico_sphere import ico_sphere
|
|
|
|
|
|
class TestSamplePoints(unittest.TestCase):
|
|
def setUp(self) -> None:
|
|
super().setUp()
|
|
torch.manual_seed(1)
|
|
|
|
@staticmethod
|
|
def init_meshes(
|
|
num_meshes: int = 10,
|
|
num_verts: int = 1000,
|
|
num_faces: int = 3000,
|
|
device: str = "cpu",
|
|
):
|
|
device = torch.device(device)
|
|
verts_list = []
|
|
faces_list = []
|
|
for _ in range(num_meshes):
|
|
verts = torch.rand(
|
|
(num_verts, 3), dtype=torch.float32, device=device
|
|
)
|
|
faces = torch.randint(
|
|
num_verts, size=(num_faces, 3), dtype=torch.int64, device=device
|
|
)
|
|
verts_list.append(verts)
|
|
faces_list.append(faces)
|
|
meshes = Meshes(verts_list, faces_list)
|
|
|
|
return meshes
|
|
|
|
def test_all_empty_meshes(self):
|
|
"""
|
|
Check sample_points_from_meshes raises an exception if all meshes are
|
|
invalid.
|
|
"""
|
|
device = torch.device("cuda:0")
|
|
verts1 = torch.tensor([], dtype=torch.float32, device=device)
|
|
faces1 = torch.tensor([], dtype=torch.int64, device=device)
|
|
meshes = Meshes(
|
|
verts=[verts1, verts1, verts1], faces=[faces1, faces1, faces1]
|
|
)
|
|
with self.assertRaises(ValueError) as err:
|
|
sample_points_from_meshes(
|
|
meshes, num_samples=100, return_normals=True
|
|
)
|
|
self.assertTrue("Meshes are empty." in str(err.exception))
|
|
|
|
def test_sampling_output(self):
|
|
"""
|
|
Check outputs of sampling are correct for different meshes.
|
|
For an ico_sphere, the sampled vertices should lie on a unit sphere.
|
|
For an empty mesh, the samples and normals should be 0.
|
|
"""
|
|
device = torch.device("cuda:0")
|
|
|
|
# Unit simplex.
|
|
verts_pyramid = torch.tensor(
|
|
[
|
|
[0.0, 0.0, 0.0],
|
|
[1.0, 0.0, 0.0],
|
|
[0.0, 1.0, 0.0],
|
|
[0.0, 0.0, 1.0],
|
|
],
|
|
dtype=torch.float32,
|
|
device=device,
|
|
)
|
|
faces_pyramid = torch.tensor(
|
|
[[0, 1, 2], [0, 2, 3], [0, 1, 3], [1, 2, 3]],
|
|
dtype=torch.int64,
|
|
device=device,
|
|
)
|
|
sphere_mesh = ico_sphere(9, device)
|
|
verts_sphere, faces_sphere = sphere_mesh.get_mesh_verts_faces(0)
|
|
verts_empty = torch.tensor([], dtype=torch.float32, device=device)
|
|
faces_empty = torch.tensor([], dtype=torch.int64, device=device)
|
|
num_samples = 10
|
|
meshes = Meshes(
|
|
verts=[verts_empty, verts_sphere, verts_pyramid],
|
|
faces=[faces_empty, faces_sphere, faces_pyramid],
|
|
)
|
|
samples, normals = sample_points_from_meshes(
|
|
meshes, num_samples=num_samples, return_normals=True
|
|
)
|
|
samples = samples.cpu()
|
|
normals = normals.cpu()
|
|
|
|
self.assertEqual(samples.shape, (3, num_samples, 3))
|
|
self.assertEqual(normals.shape, (3, num_samples, 3))
|
|
|
|
# Empty meshes: should have all zeros for samples and normals.
|
|
self.assertTrue(
|
|
torch.allclose(samples[0, :], torch.zeros((1, num_samples, 3)))
|
|
)
|
|
self.assertTrue(
|
|
torch.allclose(normals[0, :], torch.zeros((1, num_samples, 3)))
|
|
)
|
|
|
|
# Sphere: points should have radius 1.
|
|
x, y, z = samples[1, :].unbind(1)
|
|
radius = torch.sqrt(x ** 2 + y ** 2 + z ** 2)
|
|
|
|
self.assertTrue(torch.allclose(radius, torch.ones((num_samples))))
|
|
|
|
# Pyramid: points shoudl lie on one of the faces.
|
|
pyramid_verts = samples[2, :]
|
|
pyramid_normals = normals[2, :]
|
|
|
|
self.assertTrue(
|
|
torch.allclose(
|
|
pyramid_verts.lt(1).float(), torch.ones_like(pyramid_verts)
|
|
)
|
|
)
|
|
self.assertTrue(
|
|
torch.allclose(
|
|
(pyramid_verts >= 0).float(), torch.ones_like(pyramid_verts)
|
|
)
|
|
)
|
|
|
|
# Face 1: z = 0, x + y <= 1, normals = (0, 0, 1).
|
|
face_1_idxs = pyramid_verts[:, 2] == 0
|
|
face_1_verts, face_1_normals = (
|
|
pyramid_verts[face_1_idxs, :],
|
|
pyramid_normals[face_1_idxs, :],
|
|
)
|
|
self.assertTrue(
|
|
torch.all((face_1_verts[:, 0] + face_1_verts[:, 1]) <= 1)
|
|
)
|
|
self.assertTrue(
|
|
torch.allclose(
|
|
face_1_normals,
|
|
torch.tensor([0, 0, 1], dtype=torch.float32).expand(
|
|
face_1_normals.size()
|
|
),
|
|
)
|
|
)
|
|
|
|
# Face 2: x = 0, z + y <= 1, normals = (1, 0, 0).
|
|
face_2_idxs = pyramid_verts[:, 0] == 0
|
|
face_2_verts, face_2_normals = (
|
|
pyramid_verts[face_2_idxs, :],
|
|
pyramid_normals[face_2_idxs, :],
|
|
)
|
|
self.assertTrue(
|
|
torch.all((face_2_verts[:, 1] + face_2_verts[:, 2]) <= 1)
|
|
)
|
|
self.assertTrue(
|
|
torch.allclose(
|
|
face_2_normals,
|
|
torch.tensor([1, 0, 0], dtype=torch.float32).expand(
|
|
face_2_normals.size()
|
|
),
|
|
)
|
|
)
|
|
|
|
# Face 3: y = 0, x + z <= 1, normals = (0, -1, 0).
|
|
face_3_idxs = pyramid_verts[:, 1] == 0
|
|
face_3_verts, face_3_normals = (
|
|
pyramid_verts[face_3_idxs, :],
|
|
pyramid_normals[face_3_idxs, :],
|
|
)
|
|
self.assertTrue(
|
|
torch.all((face_3_verts[:, 0] + face_3_verts[:, 2]) <= 1)
|
|
)
|
|
self.assertTrue(
|
|
torch.allclose(
|
|
face_3_normals,
|
|
torch.tensor([0, -1, 0], dtype=torch.float32).expand(
|
|
face_3_normals.size()
|
|
),
|
|
)
|
|
)
|
|
|
|
# Face 4: x + y + z = 1, normals = (1, 1, 1)/sqrt(3).
|
|
face_4_idxs = pyramid_verts.gt(0).all(1)
|
|
face_4_verts, face_4_normals = (
|
|
pyramid_verts[face_4_idxs, :],
|
|
pyramid_normals[face_4_idxs, :],
|
|
)
|
|
self.assertTrue(
|
|
torch.allclose(
|
|
face_4_verts.sum(1), torch.ones(face_4_verts.size(0))
|
|
)
|
|
)
|
|
self.assertTrue(
|
|
torch.allclose(
|
|
face_4_normals,
|
|
(
|
|
torch.tensor([1, 1, 1], dtype=torch.float32)
|
|
/ torch.sqrt(torch.tensor(3, dtype=torch.float32))
|
|
).expand(face_4_normals.size()),
|
|
)
|
|
)
|
|
|
|
def test_mutinomial(self):
|
|
"""
|
|
Confirm that torch.multinomial does not sample elements which have
|
|
zero probability.
|
|
"""
|
|
freqs = torch.cuda.FloatTensor(
|
|
[
|
|
0.0,
|
|
0.0,
|
|
0.0,
|
|
0.0,
|
|
0.0,
|
|
0.0,
|
|
0.0,
|
|
0.0,
|
|
0.0,
|
|
0.03178183361887932,
|
|
0.027680952101945877,
|
|
0.033176131546497345,
|
|
0.046052902936935425,
|
|
0.07742464542388916,
|
|
0.11543981730937958,
|
|
0.14148041605949402,
|
|
0.15784293413162231,
|
|
0.13180233538150787,
|
|
0.08271478116512299,
|
|
0.049702685326337814,
|
|
0.027557924389839172,
|
|
0.018125897273421288,
|
|
0.011851548217236996,
|
|
0.010252203792333603,
|
|
0.007422595750540495,
|
|
0.005372154992073774,
|
|
0.0045109698548913,
|
|
0.0036087757907807827,
|
|
0.0035267581697553396,
|
|
0.0018864056328311563,
|
|
0.0024605290964245796,
|
|
0.0022964938543736935,
|
|
0.0018453967059031129,
|
|
0.0010662291897460818,
|
|
0.0009842115687206388,
|
|
0.00045109697384759784,
|
|
0.0007791675161570311,
|
|
0.00020504408166743815,
|
|
0.00020504408166743815,
|
|
0.00020504408166743815,
|
|
0.00012302644609007984,
|
|
0.0,
|
|
0.00012302644609007984,
|
|
4.100881778867915e-05,
|
|
0.0,
|
|
0.0,
|
|
0.0,
|
|
0.0,
|
|
0.0,
|
|
0.0,
|
|
]
|
|
)
|
|
|
|
sample = []
|
|
for _ in range(1000):
|
|
torch.cuda.get_rng_state()
|
|
sample = torch.multinomial(freqs, 1000, True)
|
|
if freqs[sample].min() == 0:
|
|
sample_idx = (freqs[sample] == 0).nonzero()[0][0]
|
|
sampled = sample[sample_idx]
|
|
print(
|
|
"%s th element of last sample was %s, which has probability %s"
|
|
% (sample_idx, sampled, freqs[sampled])
|
|
)
|
|
return False
|
|
return True
|
|
|
|
def test_multinomial_weights(self):
|
|
"""
|
|
Confirm that torch.multinomial does not sample elements which have
|
|
zero probability using a real example of input from a training run.
|
|
"""
|
|
weights = torch.load(Path(__file__).resolve().parent / "weights.pt")
|
|
S = 4096
|
|
num_trials = 100
|
|
for _ in range(0, num_trials):
|
|
weights[weights < 0] = 0.0
|
|
samples = weights.multinomial(S, replacement=True)
|
|
sampled_weights = weights[samples]
|
|
assert sampled_weights.min() > 0
|
|
if sampled_weights.min() <= 0:
|
|
return False
|
|
return True
|
|
|
|
@staticmethod
|
|
def sample_points_with_init(
|
|
num_meshes: int,
|
|
num_verts: int,
|
|
num_faces: int,
|
|
num_samples: int,
|
|
device: str = "cpu",
|
|
):
|
|
verts_list = []
|
|
faces_list = []
|
|
for _ in range(num_meshes):
|
|
verts = torch.rand(
|
|
(num_verts, 3), dtype=torch.float32, device=device
|
|
)
|
|
faces = torch.randint(
|
|
num_verts, size=(num_faces, 3), dtype=torch.int64, device=device
|
|
)
|
|
verts_list.append(verts)
|
|
faces_list.append(faces)
|
|
meshes = Meshes(verts_list, faces_list)
|
|
torch.cuda.synchronize()
|
|
|
|
def sample_points():
|
|
sample_points_from_meshes(
|
|
meshes, num_samples=num_samples, return_normals=True
|
|
)
|
|
torch.cuda.synchronize()
|
|
|
|
return sample_points
|