From d561f1913ed41e2e2a6b292f1c91fe8b96cbaf61 Mon Sep 17 00:00:00 2001
From: Roman Shapovalov <romansh@meta.com>
Date: Mon, 23 Jan 2023 10:38:56 -0800
Subject: [PATCH] Cleaning up camera difficulty
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Summary: We don’t see much value in reporting metrics by camera difficulty while supporting that in new datasets is quite painful, hence deprecating training cameras in the data API and ignoring in evaluation.

Reviewed By: bottler

Differential Revision: D42678879

fbshipit-source-id: aad511f6cb2ca82745f31c19594e1d80594b61d7
---
 projects/implicitron_trainer/experiment.py    |  7 --
 .../implicitron_trainer/impl/training_loop.py |  4 -
 pytorch3d/implicitron/dataset/data_source.py  |  4 +
 .../dataset/dataset_map_provider.py           |  1 +
 pytorch3d/implicitron/eval_demo.py            | 11 +--
 .../evaluation/evaluate_new_view_synthesis.py | 89 ++++---------------
 pytorch3d/implicitron/evaluation/evaluator.py | 12 +--
 7 files changed, 22 insertions(+), 106 deletions(-)

diff --git a/projects/implicitron_trainer/experiment.py b/projects/implicitron_trainer/experiment.py
index 8f6879ab..cede59a3 100755
--- a/projects/implicitron_trainer/experiment.py
+++ b/projects/implicitron_trainer/experiment.py
@@ -207,12 +207,6 @@ class Experiment(Configurable):  # pyre-ignore: 13
                 val_loader,
             ) = accelerator.prepare(model, optimizer, train_loader, val_loader)
 
-        # pyre-fixme[16]: Optional type has no attribute `is_multisequence`.
-        if not self.training_loop.evaluator.is_multisequence:
-            all_train_cameras = self.data_source.all_train_cameras
-        else:
-            all_train_cameras = None
-
         # Enter the main training loop.
         self.training_loop.run(
             train_loader=train_loader,
@@ -223,7 +217,6 @@ class Experiment(Configurable):  # pyre-ignore: 13
             model=model,
             optimizer=optimizer,
             scheduler=scheduler,
-            all_train_cameras=all_train_cameras,
             accelerator=accelerator,
             device=device,
             exp_dir=self.exp_dir,
diff --git a/projects/implicitron_trainer/impl/training_loop.py b/projects/implicitron_trainer/impl/training_loop.py
index 1cafc38b..6c4ae1b8 100644
--- a/projects/implicitron_trainer/impl/training_loop.py
+++ b/projects/implicitron_trainer/impl/training_loop.py
@@ -122,7 +122,6 @@ class ImplicitronTrainingLoop(TrainingLoopBase):
         optimizer: torch.optim.Optimizer,
         scheduler: Any,
         accelerator: Optional[Accelerator],
-        all_train_cameras: Optional[CamerasBase],
         device: torch.device,
         exp_dir: str,
         stats: Stats,
@@ -142,7 +141,6 @@ class ImplicitronTrainingLoop(TrainingLoopBase):
             if test_loader is not None:
                 # pyre-fixme[16]: `Optional` has no attribute `run`.
                 self.evaluator.run(
-                    all_train_cameras=all_train_cameras,
                     dataloader=test_loader,
                     device=device,
                     dump_to_json=True,
@@ -200,7 +198,6 @@ class ImplicitronTrainingLoop(TrainingLoopBase):
                     and epoch % self.test_interval == 0
                 ):
                     self.evaluator.run(
-                        all_train_cameras=all_train_cameras,
                         device=device,
                         dataloader=test_loader,
                         model=model,
@@ -217,7 +214,6 @@ class ImplicitronTrainingLoop(TrainingLoopBase):
         if self.test_when_finished:
             if test_loader is not None:
                 self.evaluator.run(
-                    all_train_cameras=all_train_cameras,
                     device=device,
                     dump_to_json=True,
                     epoch=stats.epoch,
diff --git a/pytorch3d/implicitron/dataset/data_source.py b/pytorch3d/implicitron/dataset/data_source.py
index 9c749024..fcc2ed20 100644
--- a/pytorch3d/implicitron/dataset/data_source.py
+++ b/pytorch3d/implicitron/dataset/data_source.py
@@ -34,6 +34,7 @@ class DataSourceBase(ReplaceableBase):
     @property
     def all_train_cameras(self) -> Optional[CamerasBase]:
         """
+        DEPRECATED! The property will be removed in future versions.
         If the data is all for a single scene, a list
         of the known training cameras for that scene, which is
         used for evaluating the viewpoint difficulty of the
@@ -70,6 +71,9 @@ class ImplicitronDataSource(DataSourceBase):  # pyre-ignore[13]
 
     @property
     def all_train_cameras(self) -> Optional[CamerasBase]:
+        """
+        DEPRECATED! The property will be removed in future versions.
+        """
         if self._all_train_cameras_cache is None:  # pyre-ignore[16]
             all_train_cameras = self.dataset_map_provider.get_all_train_cameras()
             self._all_train_cameras_cache = (all_train_cameras,)
diff --git a/pytorch3d/implicitron/dataset/dataset_map_provider.py b/pytorch3d/implicitron/dataset/dataset_map_provider.py
index 17569e52..91274f18 100644
--- a/pytorch3d/implicitron/dataset/dataset_map_provider.py
+++ b/pytorch3d/implicitron/dataset/dataset_map_provider.py
@@ -95,6 +95,7 @@ class DatasetMapProviderBase(ReplaceableBase):
 
     def get_all_train_cameras(self) -> Optional[CamerasBase]:
         """
+        DEPRECATED! The function will be removed in future versions.
         If the data is all for a single scene, returns a list
         of the known training cameras for that scene, which is
         used for evaluating the difficulty of the unknown
diff --git a/pytorch3d/implicitron/eval_demo.py b/pytorch3d/implicitron/eval_demo.py
index 2dbebe52..bffc5da7 100644
--- a/pytorch3d/implicitron/eval_demo.py
+++ b/pytorch3d/implicitron/eval_demo.py
@@ -153,21 +153,12 @@ def evaluate_dbir_for_category(
                 preds["implicitron_render"],
                 bg_color=bg_color,
                 lpips_model=lpips_model,
-                source_cameras=data_source.all_train_cameras,
             )
         )
 
-    if task == Task.SINGLE_SEQUENCE:
-        camera_difficulty_bin_breaks = 0.97, 0.98
-        multisequence_evaluation = False
-    else:
-        camera_difficulty_bin_breaks = 2.0 / 3, 5.0 / 6
-        multisequence_evaluation = True
-
     category_result_flat, category_result = summarize_nvs_eval_results(
         per_batch_eval_results,
-        camera_difficulty_bin_breaks=camera_difficulty_bin_breaks,
-        is_multisequence=multisequence_evaluation,
+        is_multisequence=task != Task.SINGLE_SEQUENCE,
     )
 
     return category_result["results"]
diff --git a/pytorch3d/implicitron/evaluation/evaluate_new_view_synthesis.py b/pytorch3d/implicitron/evaluation/evaluate_new_view_synthesis.py
index f73e66f8..54e4e68b 100644
--- a/pytorch3d/implicitron/evaluation/evaluate_new_view_synthesis.py
+++ b/pytorch3d/implicitron/evaluation/evaluate_new_view_synthesis.py
@@ -18,13 +18,11 @@ from pytorch3d.implicitron.dataset.dataset_base import FrameData
 from pytorch3d.implicitron.dataset.utils import is_known_frame, is_train_frame
 from pytorch3d.implicitron.models.base_model import ImplicitronRender
 from pytorch3d.implicitron.tools import vis_utils
-from pytorch3d.implicitron.tools.camera_utils import volumetric_camera_overlaps
 from pytorch3d.implicitron.tools.image_utils import mask_background
 from pytorch3d.implicitron.tools.metric_utils import calc_psnr, eval_depth, iou, rgb_l1
 from pytorch3d.implicitron.tools.point_cloud_utils import get_rgbd_point_cloud
 from pytorch3d.implicitron.tools.vis_utils import make_depth_image
-from pytorch3d.renderer.camera_utils import join_cameras_as_batch
-from pytorch3d.renderer.cameras import CamerasBase, PerspectiveCameras
+from pytorch3d.renderer.cameras import PerspectiveCameras
 from pytorch3d.vis.plotly_vis import plot_scene
 from tabulate import tabulate
 
@@ -149,7 +147,6 @@ def eval_batch(
     visualize: bool = False,
     visualize_visdom_env: str = "eval_debug",
     break_after_visualising: bool = True,
-    source_cameras: Optional[CamerasBase] = None,
 ) -> Dict[str, Any]:
     """
     Produce performance metrics for a single batch of new-view synthesis
@@ -171,8 +168,6 @@ def eval_batch(
             ground truth.
         lpips_model: A pre-trained model for evaluating the LPIPS metric.
         visualize: If True, visualizes the results to Visdom.
-        source_cameras: A list of all training cameras for evaluating the
-            difficulty of the target views.
 
     Returns:
         results: A dictionary holding evaluation metrics.
@@ -365,16 +360,7 @@ def eval_batch(
     # convert all metrics to floats
     results = {k: float(v) for k, v in results.items()}
 
-    if source_cameras is None:
-        # pyre-fixme[16]: Optional has no attribute __getitem__
-        source_cameras = frame_data.camera[torch.where(is_known)[0]]
-
     results["meta"] = {
-        # calculate the camera difficulties and add to results
-        "camera_difficulty": calculate_camera_difficulties(
-            frame_data.camera[0],
-            source_cameras,
-        )[0].item(),
         # store the size of the batch (corresponds to n_src_views+1)
         "batch_size": int(is_known.numel()),
         # store the type of the target frame
@@ -406,33 +392,6 @@ def average_per_batch_results(
     }
 
 
-def calculate_camera_difficulties(
-    cameras_target: CamerasBase,
-    cameras_source: CamerasBase,
-) -> torch.Tensor:
-    """
-    Calculate the difficulties of the target cameras, given a set of known
-    cameras `cameras_source`.
-
-    Returns:
-        a tensor of shape (len(cameras_target),)
-    """
-    ious = [
-        volumetric_camera_overlaps(
-            join_cameras_as_batch(
-                # pyre-fixme[6]: Expected `CamerasBase` for 1st param but got
-                #  `Optional[pytorch3d.renderer.utils.TensorProperties]`.
-                [cameras_target[cami], cameras_source.to(cameras_target.device)]
-            )
-        )[0, :]
-        for cami in range(cameras_target.R.shape[0])
-    ]
-    camera_difficulties = torch.stack(
-        [_reduce_camera_iou_overlap(iou[1:]) for iou in ious]
-    )
-    return camera_difficulties
-
-
 def _reduce_camera_iou_overlap(ious: torch.Tensor, topk: int = 2) -> torch.Tensor:
     """
     Calculate the final camera difficulty by computing the average of the
@@ -458,8 +417,7 @@ def _get_camera_difficulty_bin_edges(camera_difficulty_bin_breaks: Tuple[float,
 def summarize_nvs_eval_results(
     per_batch_eval_results: List[Dict[str, Any]],
     is_multisequence: bool,
-    camera_difficulty_bin_breaks: Tuple[float, float],
-):
+) -> Tuple[Dict[str, Any], Dict[str, Any]]:
     """
     Compile the per-batch evaluation results `per_batch_eval_results` into
     a set of aggregate metrics. The produced metrics depend on is_multisequence.
@@ -482,19 +440,12 @@ def summarize_nvs_eval_results(
     batch_sizes = torch.tensor(
         [r["meta"]["batch_size"] for r in per_batch_eval_results]
     ).long()
-    camera_difficulty = torch.tensor(
-        [r["meta"]["camera_difficulty"] for r in per_batch_eval_results]
-    ).float()
+
     is_train = is_train_frame([r["meta"]["frame_type"] for r in per_batch_eval_results])
 
     # init the result database dict
     results = []
 
-    diff_bin_edges, diff_bin_names = _get_camera_difficulty_bin_edges(
-        camera_difficulty_bin_breaks
-    )
-    n_diff_edges = diff_bin_edges.numel()
-
     # add per set averages
     for SET in eval_sets:
         if SET is None:
@@ -504,26 +455,17 @@ def summarize_nvs_eval_results(
             ok_set = is_train == int(SET == "train")
             set_name = SET
 
-        # eval each difficulty bin, including a full average result (diff_bin=None)
-        for diff_bin in [None, *list(range(n_diff_edges - 1))]:
-            if diff_bin is None:
-                # average over all results
-                in_bin = ok_set
-                diff_bin_name = "all"
-            else:
-                b1, b2 = diff_bin_edges[diff_bin : (diff_bin + 2)]
-                in_bin = ok_set & (camera_difficulty > b1) & (camera_difficulty <= b2)
-                diff_bin_name = diff_bin_names[diff_bin]
-            bin_results = average_per_batch_results(
-                per_batch_eval_results, idx=torch.where(in_bin)[0]
-            )
-            results.append(
-                {
-                    "subset": set_name,
-                    "subsubset": f"diff={diff_bin_name}",
-                    "metrics": bin_results,
-                }
-            )
+        # average over all results
+        bin_results = average_per_batch_results(
+            per_batch_eval_results, idx=torch.where(ok_set)[0]
+        )
+        results.append(
+            {
+                "subset": set_name,
+                "subsubset": "diff=all",
+                "metrics": bin_results,
+            }
+        )
 
         if is_multisequence:
             # split based on n_src_views
@@ -552,7 +494,7 @@ def _get_flat_nvs_metric_key(result, metric_name) -> str:
     return metric_key
 
 
-def flatten_nvs_results(results):
+def flatten_nvs_results(results) -> Dict[str, Any]:
     """
     Takes input `results` list of dicts of the form::
 
@@ -571,7 +513,6 @@ def flatten_nvs_results(results):
             'subset=train/test/...|subsubset=src=1/src=2/...': nvs_eval_metrics,
             ...
         }
-
     """
     results_flat = {}
     for result in results:
diff --git a/pytorch3d/implicitron/evaluation/evaluator.py b/pytorch3d/implicitron/evaluation/evaluator.py
index d4c77715..f328a023 100644
--- a/pytorch3d/implicitron/evaluation/evaluator.py
+++ b/pytorch3d/implicitron/evaluation/evaluator.py
@@ -23,7 +23,6 @@ from pytorch3d.implicitron.tools.config import (
     ReplaceableBase,
     run_auto_creation,
 )
-from pytorch3d.renderer.cameras import CamerasBase
 from torch.utils.data import DataLoader
 
 logger = logging.getLogger(__name__)
@@ -50,12 +49,9 @@ class EvaluatorBase(ReplaceableBase):
 class ImplicitronEvaluator(EvaluatorBase):
     """
     Evaluate the results of Implicitron training.
-
-    Members:
-        camera_difficulty_bin_breaks: low/medium vals to divide camera difficulties into
-            [0-eps, low, medium, 1+eps].
     """
 
+    # UNUSED; preserved for compatibility purposes
     camera_difficulty_bin_breaks: Tuple[float, ...] = 0.97, 0.98
 
     def __post_init__(self):
@@ -65,7 +61,6 @@ class ImplicitronEvaluator(EvaluatorBase):
         self,
         model: ImplicitronModelBase,
         dataloader: DataLoader,
-        all_train_cameras: Optional[CamerasBase],
         device: torch.device,
         dump_to_json: bool = False,
         exp_dir: Optional[str] = None,
@@ -79,7 +74,6 @@ class ImplicitronEvaluator(EvaluatorBase):
         Args:
             model: A (trained) model to evaluate.
             dataloader: A test dataloader.
-            all_train_cameras: Camera instances we used for training.
             device: A torch device.
             dump_to_json: If True, will dump the results to a json file.
             exp_dir: Root expeirment directory.
@@ -123,16 +117,12 @@ class ImplicitronEvaluator(EvaluatorBase):
                         implicitron_render,
                         bg_color="black",
                         lpips_model=lpips_model,
-                        source_cameras=(  # None will make it use batch’s known cameras
-                            None if self.is_multisequence else all_train_cameras
-                        ),
                     )
                 )
 
         _, category_result = evaluate.summarize_nvs_eval_results(
             per_batch_eval_results,
             self.is_multisequence,
-            self.camera_difficulty_bin_breaks,
         )
 
         results = category_result["results"]