mirror of
https://github.com/facebookresearch/pytorch3d.git
synced 2025-12-22 07:10:34 +08:00
implicitron v0 (#1133)
Co-authored-by: Jeremy Francis Reizenstein <bottler@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
parent
0e377c6850
commit
cdd2142dd5
290
tests/implicitron/test_evaluation.py
Normal file
290
tests/implicitron/test_evaluation.py
Normal file
@@ -0,0 +1,290 @@
|
||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the BSD-style license found in the
|
||||
# LICENSE file in the root directory of this source tree.
|
||||
|
||||
|
||||
import contextlib
|
||||
import copy
|
||||
import dataclasses
|
||||
import math
|
||||
import os
|
||||
import unittest
|
||||
|
||||
import lpips
|
||||
import torch
|
||||
from pytorch3d.implicitron.dataset.implicitron_dataset import (
|
||||
FrameData,
|
||||
ImplicitronDataset,
|
||||
)
|
||||
from pytorch3d.implicitron.evaluation.evaluate_new_view_synthesis import eval_batch
|
||||
from pytorch3d.implicitron.models.model_dbir import ModelDBIR
|
||||
from pytorch3d.implicitron.tools.metric_utils import calc_psnr, eval_depth
|
||||
from pytorch3d.implicitron.tools.utils import dataclass_to_cuda_
|
||||
|
||||
if os.environ.get("FB_TEST", False):
|
||||
from .common_resources import get_skateboard_data, provide_lpips_vgg
|
||||
else:
|
||||
from common_resources import get_skateboard_data, provide_lpips_vgg
|
||||
|
||||
|
||||
class TestEvaluation(unittest.TestCase):
|
||||
def setUp(self):
|
||||
# initialize evaluation dataset/dataloader
|
||||
torch.manual_seed(42)
|
||||
|
||||
stack = contextlib.ExitStack()
|
||||
dataset_root, path_manager = stack.enter_context(get_skateboard_data())
|
||||
self.addCleanup(stack.close)
|
||||
|
||||
category = "skateboard"
|
||||
frame_file = os.path.join(dataset_root, category, "frame_annotations.jgz")
|
||||
sequence_file = os.path.join(dataset_root, category, "sequence_annotations.jgz")
|
||||
self.image_size = 256
|
||||
self.dataset = ImplicitronDataset(
|
||||
frame_annotations_file=frame_file,
|
||||
sequence_annotations_file=sequence_file,
|
||||
dataset_root=dataset_root,
|
||||
image_height=self.image_size,
|
||||
image_width=self.image_size,
|
||||
box_crop=True,
|
||||
path_manager=path_manager,
|
||||
)
|
||||
self.bg_color = 0.0
|
||||
|
||||
# init the lpips model for eval
|
||||
provide_lpips_vgg()
|
||||
self.lpips_model = lpips.LPIPS(net="vgg")
|
||||
|
||||
def test_eval_depth(self):
|
||||
"""
|
||||
Check that eval_depth correctly masks errors and that, for get_best_scale=True,
|
||||
the error with scaled prediction equals the error without scaling the
|
||||
predicted depth. Finally, test that the error values are as expected
|
||||
for prediction and gt differing by a constant offset.
|
||||
"""
|
||||
gt = (torch.randn(10, 1, 300, 400, device="cuda") * 5.0).clamp(0.0)
|
||||
mask = (torch.rand_like(gt) > 0.5).type_as(gt)
|
||||
|
||||
for diff in 10 ** torch.linspace(-5, 0, 6):
|
||||
for crop in (0, 5):
|
||||
|
||||
pred = gt + (torch.rand_like(gt) - 0.5) * 2 * diff
|
||||
|
||||
# scaled prediction test
|
||||
mse_depth, abs_depth = eval_depth(
|
||||
pred,
|
||||
gt,
|
||||
crop=crop,
|
||||
mask=mask,
|
||||
get_best_scale=True,
|
||||
)
|
||||
mse_depth_scale, abs_depth_scale = eval_depth(
|
||||
pred * 10.0,
|
||||
gt,
|
||||
crop=crop,
|
||||
mask=mask,
|
||||
get_best_scale=True,
|
||||
)
|
||||
self.assertAlmostEqual(
|
||||
float(mse_depth.sum()), float(mse_depth_scale.sum()), delta=1e-4
|
||||
)
|
||||
self.assertAlmostEqual(
|
||||
float(abs_depth.sum()), float(abs_depth_scale.sum()), delta=1e-4
|
||||
)
|
||||
|
||||
# error masking test
|
||||
pred_masked_err = gt + (torch.rand_like(gt) + diff) * (1 - mask)
|
||||
mse_depth_masked, abs_depth_masked = eval_depth(
|
||||
pred_masked_err,
|
||||
gt,
|
||||
crop=crop,
|
||||
mask=mask,
|
||||
get_best_scale=True,
|
||||
)
|
||||
self.assertAlmostEqual(
|
||||
float(mse_depth_masked.sum()), float(0.0), delta=1e-4
|
||||
)
|
||||
self.assertAlmostEqual(
|
||||
float(abs_depth_masked.sum()), float(0.0), delta=1e-4
|
||||
)
|
||||
mse_depth_unmasked, abs_depth_unmasked = eval_depth(
|
||||
pred_masked_err,
|
||||
gt,
|
||||
crop=crop,
|
||||
mask=1 - mask,
|
||||
get_best_scale=True,
|
||||
)
|
||||
self.assertGreater(
|
||||
float(mse_depth_unmasked.sum()),
|
||||
float(diff ** 2),
|
||||
)
|
||||
self.assertGreater(
|
||||
float(abs_depth_unmasked.sum()),
|
||||
float(diff),
|
||||
)
|
||||
|
||||
# tests with constant error
|
||||
pred_fix_diff = gt + diff * mask
|
||||
for _mask_gt in (mask, None):
|
||||
mse_depth_fix_diff, abs_depth_fix_diff = eval_depth(
|
||||
pred_fix_diff,
|
||||
gt,
|
||||
crop=crop,
|
||||
mask=_mask_gt,
|
||||
get_best_scale=False,
|
||||
)
|
||||
if _mask_gt is not None:
|
||||
expected_err_abs = diff
|
||||
expected_err_mse = diff ** 2
|
||||
else:
|
||||
err_mask = (gt > 0.0).float() * mask
|
||||
if crop > 0:
|
||||
err_mask = err_mask[:, :, crop:-crop, crop:-crop]
|
||||
gt_cropped = gt[:, :, crop:-crop, crop:-crop]
|
||||
else:
|
||||
gt_cropped = gt
|
||||
gt_mass = (gt_cropped > 0.0).float().sum(dim=(1, 2, 3))
|
||||
expected_err_abs = (
|
||||
diff * err_mask.sum(dim=(1, 2, 3)) / (gt_mass)
|
||||
)
|
||||
expected_err_mse = diff * expected_err_abs
|
||||
self.assertTrue(
|
||||
torch.allclose(
|
||||
abs_depth_fix_diff,
|
||||
expected_err_abs * torch.ones_like(abs_depth_fix_diff),
|
||||
atol=1e-4,
|
||||
)
|
||||
)
|
||||
self.assertTrue(
|
||||
torch.allclose(
|
||||
mse_depth_fix_diff,
|
||||
expected_err_mse * torch.ones_like(mse_depth_fix_diff),
|
||||
atol=1e-4,
|
||||
)
|
||||
)
|
||||
|
||||
def test_psnr(self):
|
||||
"""
|
||||
Compare against opencv and check that the psnr is above
|
||||
the minimum possible value.
|
||||
"""
|
||||
import cv2
|
||||
|
||||
im1 = torch.rand(100, 3, 256, 256).cuda()
|
||||
im1_uint8 = (im1 * 255).to(torch.uint8)
|
||||
im1_rounded = im1_uint8.float() / 255
|
||||
for max_diff in 10 ** torch.linspace(-5, 0, 6):
|
||||
im2 = im1 + (torch.rand_like(im1) - 0.5) * 2 * max_diff
|
||||
im2 = im2.clamp(0.0, 1.0)
|
||||
im2_uint8 = (im2 * 255).to(torch.uint8)
|
||||
im2_rounded = im2_uint8.float() / 255
|
||||
# check that our psnr matches the output of opencv
|
||||
psnr = calc_psnr(im1_rounded, im2_rounded)
|
||||
# some versions of cv2 can only take uint8 input
|
||||
psnr_cv2 = cv2.PSNR(
|
||||
im1_uint8.cpu().numpy(),
|
||||
im2_uint8.cpu().numpy(),
|
||||
)
|
||||
self.assertAlmostEqual(float(psnr), float(psnr_cv2), delta=1e-4)
|
||||
# check that all PSNRs are bigger than the minimum possible PSNR
|
||||
max_mse = max_diff ** 2
|
||||
min_psnr = 10 * math.log10(1.0 / max_mse)
|
||||
for _im1, _im2 in zip(im1, im2):
|
||||
_psnr = calc_psnr(_im1, _im2)
|
||||
self.assertGreaterEqual(float(_psnr) + 1e-6, min_psnr)
|
||||
|
||||
def _one_sequence_test(
|
||||
self,
|
||||
seq_dataset,
|
||||
n_batches=2,
|
||||
min_batch_size=5,
|
||||
max_batch_size=10,
|
||||
):
|
||||
# form a list of random batches
|
||||
batch_indices = []
|
||||
for _ in range(n_batches):
|
||||
batch_size = torch.randint(
|
||||
low=min_batch_size, high=max_batch_size, size=(1,)
|
||||
)
|
||||
batch_indices.append(torch.randperm(len(seq_dataset))[:batch_size])
|
||||
|
||||
loader = torch.utils.data.DataLoader(
|
||||
seq_dataset,
|
||||
# batch_size=1,
|
||||
shuffle=False,
|
||||
batch_sampler=batch_indices,
|
||||
collate_fn=FrameData.collate,
|
||||
)
|
||||
|
||||
model = ModelDBIR(image_size=self.image_size, bg_color=self.bg_color)
|
||||
model.cuda()
|
||||
self.lpips_model.cuda()
|
||||
|
||||
for frame_data in loader:
|
||||
self.assertIsNone(frame_data.frame_type)
|
||||
self.assertIsNotNone(frame_data.image_rgb)
|
||||
# override the frame_type
|
||||
frame_data.frame_type = [
|
||||
"train_unseen",
|
||||
*(["train_known"] * (len(frame_data.image_rgb) - 1)),
|
||||
]
|
||||
|
||||
# move frame_data to gpu
|
||||
frame_data = dataclass_to_cuda_(frame_data)
|
||||
preds = model(**dataclasses.asdict(frame_data))
|
||||
|
||||
nvs_prediction = copy.deepcopy(preds["nvs_prediction"])
|
||||
eval_result = eval_batch(
|
||||
frame_data,
|
||||
nvs_prediction,
|
||||
bg_color=self.bg_color,
|
||||
lpips_model=self.lpips_model,
|
||||
)
|
||||
|
||||
# Make a terribly bad NVS prediction and check that this is worse
|
||||
# than the DBIR prediction.
|
||||
nvs_prediction_bad = copy.deepcopy(preds["nvs_prediction"])
|
||||
nvs_prediction_bad.depth_render += (
|
||||
torch.randn_like(nvs_prediction.depth_render) * 100.0
|
||||
)
|
||||
nvs_prediction_bad.image_render += (
|
||||
torch.randn_like(nvs_prediction.image_render) * 100.0
|
||||
)
|
||||
nvs_prediction_bad.mask_render = (
|
||||
torch.randn_like(nvs_prediction.mask_render) > 0.0
|
||||
).float()
|
||||
eval_result_bad = eval_batch(
|
||||
frame_data,
|
||||
nvs_prediction_bad,
|
||||
bg_color=self.bg_color,
|
||||
lpips_model=self.lpips_model,
|
||||
)
|
||||
|
||||
lower_better = {
|
||||
"psnr": False,
|
||||
"psnr_fg": False,
|
||||
"depth_abs_fg": True,
|
||||
"iou": False,
|
||||
"rgb_l1": True,
|
||||
"rgb_l1_fg": True,
|
||||
}
|
||||
|
||||
for metric in lower_better.keys():
|
||||
m_better = eval_result[metric]
|
||||
m_worse = eval_result_bad[metric]
|
||||
if m_better != m_better or m_worse != m_worse:
|
||||
continue # metric is missing, i.e. NaN
|
||||
_assert = (
|
||||
self.assertLessEqual
|
||||
if lower_better[metric]
|
||||
else self.assertGreaterEqual
|
||||
)
|
||||
_assert(m_better, m_worse)
|
||||
|
||||
def test_full_eval(self, n_sequences=5):
|
||||
"""Test evaluation."""
|
||||
for _, idx in list(self.dataset.seq_to_idx.items())[:n_sequences]:
|
||||
seq_dataset = torch.utils.data.Subset(self.dataset, idx)
|
||||
self._one_sequence_test(seq_dataset)
|
||||
Reference in New Issue
Block a user