mirror of
https://github.com/facebookresearch/pytorch3d.git
synced 2025-08-06 05:52:49 +08:00
Summary: Implementation of point to mesh distances. The current diff contains two types: (a) Point to Edge (b) Point to Face ``` Benchmark Avg Time(μs) Peak Time(μs) Iterations -------------------------------------------------------------------------------- POINT_MESH_EDGE_4_100_300_5000_cuda:0 2745 3138 183 POINT_MESH_EDGE_4_100_300_10000_cuda:0 4408 4499 114 POINT_MESH_EDGE_4_100_3000_5000_cuda:0 4978 5070 101 POINT_MESH_EDGE_4_100_3000_10000_cuda:0 9076 9187 56 POINT_MESH_EDGE_4_1000_300_5000_cuda:0 1411 1487 355 POINT_MESH_EDGE_4_1000_300_10000_cuda:0 4829 5030 104 POINT_MESH_EDGE_4_1000_3000_5000_cuda:0 7539 7620 67 POINT_MESH_EDGE_4_1000_3000_10000_cuda:0 12088 12272 42 POINT_MESH_EDGE_8_100_300_5000_cuda:0 3106 3222 161 POINT_MESH_EDGE_8_100_300_10000_cuda:0 8561 8648 59 POINT_MESH_EDGE_8_100_3000_5000_cuda:0 6932 7021 73 POINT_MESH_EDGE_8_100_3000_10000_cuda:0 24032 24176 21 POINT_MESH_EDGE_8_1000_300_5000_cuda:0 5272 5399 95 POINT_MESH_EDGE_8_1000_300_10000_cuda:0 11348 11430 45 POINT_MESH_EDGE_8_1000_3000_5000_cuda:0 17478 17683 29 POINT_MESH_EDGE_8_1000_3000_10000_cuda:0 25961 26236 20 POINT_MESH_EDGE_16_100_300_5000_cuda:0 8244 8323 61 POINT_MESH_EDGE_16_100_300_10000_cuda:0 18018 18071 28 POINT_MESH_EDGE_16_100_3000_5000_cuda:0 19428 19544 26 POINT_MESH_EDGE_16_100_3000_10000_cuda:0 44967 45135 12 POINT_MESH_EDGE_16_1000_300_5000_cuda:0 7825 7937 64 POINT_MESH_EDGE_16_1000_300_10000_cuda:0 18504 18571 28 POINT_MESH_EDGE_16_1000_3000_5000_cuda:0 65805 66132 8 POINT_MESH_EDGE_16_1000_3000_10000_cuda:0 90885 91089 6 -------------------------------------------------------------------------------- Benchmark Avg Time(μs) Peak Time(μs) Iterations -------------------------------------------------------------------------------- POINT_MESH_FACE_4_100_300_5000_cuda:0 1561 1685 321 POINT_MESH_FACE_4_100_300_10000_cuda:0 2818 2954 178 POINT_MESH_FACE_4_100_3000_5000_cuda:0 15893 16018 32 POINT_MESH_FACE_4_100_3000_10000_cuda:0 16350 16439 31 POINT_MESH_FACE_4_1000_300_5000_cuda:0 3179 3278 158 POINT_MESH_FACE_4_1000_300_10000_cuda:0 2353 2436 213 POINT_MESH_FACE_4_1000_3000_5000_cuda:0 16262 16336 31 POINT_MESH_FACE_4_1000_3000_10000_cuda:0 9334 9448 54 POINT_MESH_FACE_8_100_300_5000_cuda:0 4377 4493 115 POINT_MESH_FACE_8_100_300_10000_cuda:0 9728 9822 52 POINT_MESH_FACE_8_100_3000_5000_cuda:0 26428 26544 19 POINT_MESH_FACE_8_100_3000_10000_cuda:0 42238 43031 12 POINT_MESH_FACE_8_1000_300_5000_cuda:0 3891 3982 129 POINT_MESH_FACE_8_1000_300_10000_cuda:0 5363 5429 94 POINT_MESH_FACE_8_1000_3000_5000_cuda:0 20998 21084 24 POINT_MESH_FACE_8_1000_3000_10000_cuda:0 39711 39897 13 POINT_MESH_FACE_16_100_300_5000_cuda:0 5955 6001 84 POINT_MESH_FACE_16_100_300_10000_cuda:0 12082 12144 42 POINT_MESH_FACE_16_100_3000_5000_cuda:0 44996 45176 12 POINT_MESH_FACE_16_100_3000_10000_cuda:0 73042 73197 7 POINT_MESH_FACE_16_1000_300_5000_cuda:0 8292 8374 61 POINT_MESH_FACE_16_1000_300_10000_cuda:0 19442 19506 26 POINT_MESH_FACE_16_1000_3000_5000_cuda:0 36059 36194 14 POINT_MESH_FACE_16_1000_3000_10000_cuda:0 64644 64822 8 -------------------------------------------------------------------------------- ``` Reviewed By: jcjohnson Differential Revision: D20590462 fbshipit-source-id: 42a39837b514a546ac9471bfaff60eefe7fae829
160 lines
4.5 KiB
Plaintext
160 lines
4.5 KiB
Plaintext
// Copyright (c) Facebook, Inc. and its affiliates. All rights reserved.
|
|
|
|
#pragma once
|
|
#define MINK_H
|
|
|
|
#include "index_utils.cuh"
|
|
|
|
// A data structure to keep track of the smallest K keys seen so far as well
|
|
// as their associated values, intended to be used in device code.
|
|
// This data structure doesn't allocate any memory; keys and values are stored
|
|
// in arrays passed to the constructor.
|
|
//
|
|
// The implementation is generic; it can be used for any key type that supports
|
|
// the < operator, and can be used with any value type.
|
|
//
|
|
// Example usage:
|
|
//
|
|
// float keys[K];
|
|
// int values[K];
|
|
// MinK<float, int> mink(keys, values, K);
|
|
// for (...) {
|
|
// // Produce some key and value from somewhere
|
|
// mink.add(key, value);
|
|
// }
|
|
// mink.sort();
|
|
//
|
|
// Now keys and values store the smallest K keys seen so far and the values
|
|
// associated to these keys:
|
|
//
|
|
// for (int k = 0; k < K; ++k) {
|
|
// float key_k = keys[k];
|
|
// int value_k = values[k];
|
|
// }
|
|
template <typename key_t, typename value_t>
|
|
class MinK {
|
|
public:
|
|
// Constructor.
|
|
//
|
|
// Arguments:
|
|
// keys: Array in which to store keys
|
|
// values: Array in which to store values
|
|
// K: How many values to keep track of
|
|
__device__ MinK(key_t* keys, value_t* vals, int K)
|
|
: keys(keys), vals(vals), K(K), _size(0) {}
|
|
|
|
// Try to add a new key and associated value to the data structure. If the key
|
|
// is one of the smallest K seen so far then it will be kept; otherwise it
|
|
// it will not be kept.
|
|
//
|
|
// This takes O(1) operations if the new key is not kept, or if the structure
|
|
// currently contains fewer than K elements. Otherwise this takes O(K) time.
|
|
//
|
|
// Arguments:
|
|
// key: The key to add
|
|
// val: The value associated to the key
|
|
__device__ __forceinline__ void add(const key_t& key, const value_t& val) {
|
|
if (_size < K) {
|
|
keys[_size] = key;
|
|
vals[_size] = val;
|
|
if (_size == 0 || key > max_key) {
|
|
max_key = key;
|
|
max_idx = _size;
|
|
}
|
|
_size++;
|
|
} else if (key < max_key) {
|
|
keys[max_idx] = key;
|
|
vals[max_idx] = val;
|
|
max_key = key;
|
|
for (int k = 0; k < K; ++k) {
|
|
key_t cur_key = keys[k];
|
|
if (cur_key > max_key) {
|
|
max_key = cur_key;
|
|
max_idx = k;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Get the number of items currently stored in the structure.
|
|
// This takes O(1) time.
|
|
__device__ __forceinline__ int size() {
|
|
return _size;
|
|
}
|
|
|
|
// Sort the items stored in the structure using bubble sort.
|
|
// This takes O(K^2) time.
|
|
__device__ __forceinline__ void sort() {
|
|
for (int i = 0; i < _size - 1; ++i) {
|
|
for (int j = 0; j < _size - i - 1; ++j) {
|
|
if (keys[j + 1] < keys[j]) {
|
|
key_t key = keys[j];
|
|
value_t val = vals[j];
|
|
keys[j] = keys[j + 1];
|
|
vals[j] = vals[j + 1];
|
|
keys[j + 1] = key;
|
|
vals[j + 1] = val;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
private:
|
|
key_t* keys;
|
|
value_t* vals;
|
|
int K;
|
|
int _size;
|
|
key_t max_key;
|
|
int max_idx;
|
|
};
|
|
|
|
// This is a version of MinK that only touches the arrays using static indexing
|
|
// via RegisterIndexUtils. If the keys and values are stored in thread-local
|
|
// arrays, then this may allow the compiler to place them in registers for
|
|
// fast access.
|
|
//
|
|
// This has the same API as RegisterMinK, but doesn't support sorting.
|
|
// We found that sorting via RegisterIndexUtils gave very poor performance,
|
|
// and suspect it may have prevented the compiler from placing the arrays
|
|
// into registers.
|
|
template <typename key_t, typename value_t, int K>
|
|
class RegisterMinK {
|
|
public:
|
|
__device__ RegisterMinK(key_t* keys, value_t* vals)
|
|
: keys(keys), vals(vals), _size(0) {}
|
|
|
|
__device__ __forceinline__ void add(const key_t& key, const value_t& val) {
|
|
if (_size < K) {
|
|
RegisterIndexUtils<key_t, K>::set(keys, _size, key);
|
|
RegisterIndexUtils<value_t, K>::set(vals, _size, val);
|
|
if (_size == 0 || key > max_key) {
|
|
max_key = key;
|
|
max_idx = _size;
|
|
}
|
|
_size++;
|
|
} else if (key < max_key) {
|
|
RegisterIndexUtils<key_t, K>::set(keys, max_idx, key);
|
|
RegisterIndexUtils<value_t, K>::set(vals, max_idx, val);
|
|
max_key = key;
|
|
for (int k = 0; k < K; ++k) {
|
|
key_t cur_key = RegisterIndexUtils<key_t, K>::get(keys, k);
|
|
if (cur_key > max_key) {
|
|
max_key = cur_key;
|
|
max_idx = k;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
__device__ __forceinline__ int size() {
|
|
return _size;
|
|
}
|
|
|
|
private:
|
|
key_t* keys;
|
|
value_t* vals;
|
|
int _size;
|
|
key_t max_key;
|
|
int max_idx;
|
|
};
|