implicitron v0 (#1133)

Co-authored-by: Jeremy Francis Reizenstein <bottler@users.noreply.github.com>
2026-05-07 16:48:54 +08:00 · 2022-03-21 20:20:10 +00:00
parent 0e377c6850
commit cdd2142dd5
90 changed files with 17075 additions and 0 deletions
--- a/projects/implicitron_trainer/configs/repro_base.yaml
+++ b/projects/implicitron_trainer/configs/repro_base.yaml
@@ -0,0 +1,83 @@
+defaults:
+- default_config
+- _self_
+exp_dir: ./data/exps/base/
+architecture: generic
+visualize_interval: 0
+visdom_port: 8097
+dataloader_args:
+  batch_size: 10
+  dataset_len: 1000
+  dataset_len_val: 1
+  num_workers: 8
+  images_per_seq_options:
+  - 2
+  - 3
+  - 4
+  - 5
+  - 6
+  - 7
+  - 8
+  - 9
+  - 10
+dataset_args:
+  dataset_root: ${oc.env:CO3D_DATASET_ROOT}"
+  load_point_clouds: false
+  mask_depths: false
+  mask_images: false
+  n_frames_per_sequence: -1
+  test_on_train: true
+  test_restrict_sequence_id: 0
+generic_model_args:
+  loss_weights:
+    loss_mask_bce: 1.0
+    loss_prev_stage_mask_bce: 1.0
+    loss_autodecoder_norm: 0.01
+    loss_rgb_mse: 1.0
+    loss_prev_stage_rgb_mse: 1.0
+  output_rasterized_mc: false
+  chunk_size_grid: 102400
+  render_image_height: 400
+  render_image_width: 400
+  num_passes: 2
+  implicit_function_NeuralRadianceFieldImplicitFunction_args:
+    n_harmonic_functions_xyz: 10
+    n_harmonic_functions_dir: 4
+    n_hidden_neurons_xyz: 256
+    n_hidden_neurons_dir: 128
+    n_layers_xyz: 8
+    append_xyz:
+    - 5
+    latent_dim: 0
+  raysampler_args:
+    n_rays_per_image_sampled_from_mask: 1024
+    min_depth: 0.0
+    max_depth: 0.0
+    scene_extent: 8.0
+    n_pts_per_ray_training: 64
+    n_pts_per_ray_evaluation: 64
+    stratified_point_sampling_training: true
+    stratified_point_sampling_evaluation: false
+  renderer_MultiPassEmissionAbsorptionRenderer_args:
+    n_pts_per_ray_fine_training: 64
+    n_pts_per_ray_fine_evaluation: 64
+    append_coarse_samples_to_fine: true
+    density_noise_std_train: 1.0
+  view_sampler_args:
+    masked_sampling: false
+  image_feature_extractor_args:
+    stages:
+    - 1
+    - 2
+    - 3
+    - 4
+    proj_dim: 16
+    image_rescale: 0.32
+    first_max_pool: false
+solver_args:
+  breed: adam
+  lr: 0.0005
+  lr_policy: multistep
+  max_epochs: 2000
+  momentum: 0.9
+  weight_decay: 0.0
--- a/projects/implicitron_trainer/configs/repro_feat_extractor_normed.yaml
+++ b/projects/implicitron_trainer/configs/repro_feat_extractor_normed.yaml
@@ -0,0 +1,16 @@
+generic_model_args:
+  image_feature_extractor_args:
+    add_images: true
+    add_masks: true
+    first_max_pool: true
+    image_rescale: 0.375
+    l2_norm: true
+    name: resnet34
+    normalize_image: true
+    pretrained: true
+    stages:
+    - 1
+    - 2
+    - 3
+    - 4
+    proj_dim: 32
--- a/projects/implicitron_trainer/configs/repro_feat_extractor_transformer.yaml
+++ b/projects/implicitron_trainer/configs/repro_feat_extractor_transformer.yaml
@@ -0,0 +1,16 @@
+generic_model_args:
+  image_feature_extractor_args:
+    add_images: true
+    add_masks: true
+    first_max_pool: false
+    image_rescale: 0.375
+    l2_norm: true
+    name: resnet34
+    normalize_image: true
+    pretrained: true
+    stages:
+    - 1
+    - 2
+    - 3
+    - 4
+    proj_dim: 16
--- a/projects/implicitron_trainer/configs/repro_feat_extractor_unnormed.yaml
+++ b/projects/implicitron_trainer/configs/repro_feat_extractor_unnormed.yaml
@@ -0,0 +1,16 @@
+generic_model_args:
+  image_feature_extractor_args:
+    stages:
+    - 1
+    - 2
+    - 3
+    first_max_pool: false
+    proj_dim: -1
+    l2_norm: false
+    image_rescale: 0.375
+    name: resnet34
+    normalize_image: true
+    pretrained: true
+  feature_aggregator_AngleWeightedReductionFeatureAggregator_args:
+    reduction_functions:
+    - AVG
--- a/projects/implicitron_trainer/configs/repro_multiseq_base.yaml
+++ b/projects/implicitron_trainer/configs/repro_multiseq_base.yaml
@@ -0,0 +1,31 @@
+defaults:
+- repro_base.yaml
+- _self_
+dataloader_args:
+  batch_size: 10
+  dataset_len: 1000
+  dataset_len_val: 1
+  num_workers: 8
+  images_per_seq_options:
+  - 2
+  - 3
+  - 4
+  - 5
+  - 6
+  - 7
+  - 8
+  - 9
+  - 10
+dataset_args:
+  assert_single_seq: false
+  dataset_name: co3d_multisequence
+  load_point_clouds: false
+  mask_depths: false
+  mask_images: false
+  n_frames_per_sequence: -1
+  test_on_train: true
+  test_restrict_sequence_id: 0
+solver_args:
+  max_epochs: 3000
+  milestones:
+  - 1000
--- a/projects/implicitron_trainer/configs/repro_multiseq_idr_ad.yaml
+++ b/projects/implicitron_trainer/configs/repro_multiseq_idr_ad.yaml
@@ -0,0 +1,64 @@
+defaults:
+- repro_multiseq_base.yaml
+- _self_
+generic_model_args:
+  loss_weights:
+    loss_mask_bce: 100.0
+    loss_kl: 0.0
+    loss_rgb_mse: 1.0
+    loss_eikonal: 0.1
+  chunk_size_grid: 65536
+  num_passes: 1
+  output_rasterized_mc: true
+  sampling_mode_training: mask_sample
+  view_pool: false
+  sequence_autodecoder_args:
+    n_instances: 20000
+    init_scale: 1.0
+    encoding_dim: 256
+  implicit_function_IdrFeatureField_args:
+    n_harmonic_functions_xyz: 6
+    bias: 0.6
+    d_in: 3
+    d_out: 1
+    dims:
+    - 512
+    - 512
+    - 512
+    - 512
+    - 512
+    - 512
+    - 512
+    - 512
+    geometric_init: true
+    pooled_feature_dim: 0
+    skip_in:
+    - 6
+    weight_norm: true
+  renderer_SignedDistanceFunctionRenderer_args:
+    ray_tracer_args:
+      line_search_step: 0.5
+      line_step_iters: 3
+      n_secant_steps: 8
+      n_steps: 100
+      object_bounding_sphere: 8.0
+      sdf_threshold: 5.0e-05
+    ray_normal_coloring_network_args:
+      d_in: 9
+      d_out: 3
+      dims:
+      - 512
+      - 512
+      - 512
+      - 512
+      mode: idr
+      n_harmonic_functions_dir: 4
+      pooled_feature_dim: 0
+      weight_norm: true
+  raysampler_args:
+    n_rays_per_image_sampled_from_mask: 1024
+    n_pts_per_ray_training: 0
+    n_pts_per_ray_evaluation: 0
+    scene_extent: 8.0
+  renderer_class_type: SignedDistanceFunctionRenderer
+  implicit_function_class_type: IdrFeatureField
--- a/projects/implicitron_trainer/configs/repro_multiseq_nerf_ad.yaml
+++ b/projects/implicitron_trainer/configs/repro_multiseq_nerf_ad.yaml
@@ -0,0 +1,9 @@
+defaults:
+- repro_multiseq_base.yaml
+- _self_
+generic_model_args:
+  chunk_size_grid: 16000
+  view_pool: false
+  sequence_autodecoder_args:
+    n_instances: 20000
+    encoding_dim: 256
--- a/projects/implicitron_trainer/configs/repro_multiseq_nerf_wce.yaml
+++ b/projects/implicitron_trainer/configs/repro_multiseq_nerf_wce.yaml
@@ -0,0 +1,10 @@
+defaults:
+- repro_multiseq_base.yaml
+- repro_feat_extractor_unnormed.yaml
+- _self_
+clip_grad: 1.0
+generic_model_args:
+  chunk_size_grid: 16000
+  view_pool: true
+  raysampler_args:
+    n_rays_per_image_sampled_from_mask: 850
--- a/projects/implicitron_trainer/configs/repro_multiseq_nerformer.yaml
+++ b/projects/implicitron_trainer/configs/repro_multiseq_nerformer.yaml
@@ -0,0 +1,16 @@
+defaults:
+- repro_multiseq_base.yaml
+- repro_feat_extractor_transformer.yaml
+- _self_
+generic_model_args:
+  chunk_size_grid: 16000
+  view_pool: true
+  raysampler_args:
+    n_rays_per_image_sampled_from_mask: 800
+    n_pts_per_ray_training: 32
+    n_pts_per_ray_evaluation: 32
+  renderer_MultiPassEmissionAbsorptionRenderer_args:
+    n_pts_per_ray_fine_training: 16
+    n_pts_per_ray_fine_evaluation: 16
+  implicit_function_class_type: NeRFormerImplicitFunction
+  feature_aggregator_class_type: IdentityFeatureAggregator
--- a/projects/implicitron_trainer/configs/repro_multiseq_nerformer_angle_w.yaml
+++ b/projects/implicitron_trainer/configs/repro_multiseq_nerformer_angle_w.yaml
@@ -0,0 +1,16 @@
+defaults:
+- repro_multiseq_base.yaml
+- repro_feat_extractor_transformer.yaml
+- _self_
+generic_model_args:
+  chunk_size_grid: 16000
+  view_pool: true
+  raysampler_args:
+    n_rays_per_image_sampled_from_mask: 800
+    n_pts_per_ray_training: 32
+    n_pts_per_ray_evaluation: 32
+  renderer_MultiPassEmissionAbsorptionRenderer_args:
+    n_pts_per_ray_fine_training: 16
+    n_pts_per_ray_fine_evaluation: 16
+  implicit_function_class_type: NeRFormerImplicitFunction
+  feature_aggregator_class_type: AngleWeightedIdentityFeatureAggregator
--- a/projects/implicitron_trainer/configs/repro_multiseq_srn_ad_hypernet.yaml
+++ b/projects/implicitron_trainer/configs/repro_multiseq_srn_ad_hypernet.yaml
@@ -0,0 +1,32 @@
+defaults:
+- repro_multiseq_base.yaml
+- _self_
+generic_model_args:
+  chunk_size_grid: 16000
+  view_pool: false
+  n_train_target_views: -1
+  num_passes: 1
+  loss_weights:
+    loss_rgb_mse: 200.0
+    loss_prev_stage_rgb_mse: 0.0
+    loss_mask_bce: 1.0
+    loss_prev_stage_mask_bce: 0.0
+    loss_autodecoder_norm: 0.001
+    depth_neg_penalty: 10000.0
+  sequence_autodecoder_args:
+    encoding_dim: 256
+    n_instances: 20000
+  raysampler_args:
+    n_rays_per_image_sampled_from_mask: 2048
+    min_depth: 0.05
+    max_depth: 0.05
+    scene_extent: 0.0
+    n_pts_per_ray_training: 1
+    n_pts_per_ray_evaluation: 1
+    stratified_point_sampling_training: false
+    stratified_point_sampling_evaluation: false
+  renderer_class_type: LSTMRenderer
+  implicit_function_class_type: SRNHyperNetImplicitFunction
+solver_args:
+  breed: adam
+  lr: 5.0e-05
--- a/projects/implicitron_trainer/configs/repro_multiseq_srn_ad_hypernet_noharm.yaml
+++ b/projects/implicitron_trainer/configs/repro_multiseq_srn_ad_hypernet_noharm.yaml
@@ -0,0 +1,10 @@
+defaults:
+- repro_multiseq_srn_ad_hypernet.yaml
+- _self_
+generic_model_args:
+  num_passes: 1
+  implicit_function_SRNHyperNetImplicitFunction_args:
+    pixel_generator_args:
+      n_harmonic_functions: 0
+    hypernet_args:
+      n_harmonic_functions: 0
--- a/projects/implicitron_trainer/configs/repro_multiseq_srn_wce.yaml
+++ b/projects/implicitron_trainer/configs/repro_multiseq_srn_wce.yaml
@@ -0,0 +1,30 @@
+defaults:
+- repro_multiseq_base.yaml
+- repro_feat_extractor_normed.yaml
+- _self_
+generic_model_args:
+  chunk_size_grid: 32000
+  view_pool: true
+  num_passes: 1
+  n_train_target_views: -1
+  loss_weights:
+    loss_rgb_mse: 200.0
+    loss_prev_stage_rgb_mse: 0.0
+    loss_mask_bce: 1.0
+    loss_prev_stage_mask_bce: 0.0
+    loss_autodecoder_norm: 0.0
+    depth_neg_penalty: 10000.0
+  raysampler_args:
+    n_rays_per_image_sampled_from_mask: 2048
+    min_depth: 0.05
+    max_depth: 0.05
+    scene_extent: 0.0
+    n_pts_per_ray_training: 1
+    n_pts_per_ray_evaluation: 1
+    stratified_point_sampling_training: false
+    stratified_point_sampling_evaluation: false
+  renderer_class_type: LSTMRenderer
+  implicit_function_class_type: SRNImplicitFunction
+solver_args:
+  breed: adam
+  lr: 5.0e-05
--- a/projects/implicitron_trainer/configs/repro_multiseq_srn_wce_noharm.yaml
+++ b/projects/implicitron_trainer/configs/repro_multiseq_srn_wce_noharm.yaml
@@ -0,0 +1,10 @@
+defaults:
+- repro_multiseq_srn_wce.yaml
+- _self_
+generic_model_args:
+  num_passes: 1
+  implicit_function_SRNImplicitFunction_args:
+    pixel_generator_args:
+      n_harmonic_functions: 0
+    raymarch_function_args:
+      n_harmonic_functions: 0
--- a/projects/implicitron_trainer/configs/repro_singleseq_base.yaml
+++ b/projects/implicitron_trainer/configs/repro_singleseq_base.yaml
@@ -0,0 +1,41 @@
+defaults:
+- repro_base
+- _self_
+dataloader_args:
+  batch_size: 1
+  dataset_len: 1000
+  dataset_len_val: 1
+  num_workers: 8
+  images_per_seq_options:
+  - 2
+dataset_args:
+  dataset_name: co3d_singlesequence
+  assert_single_seq: true
+  n_frames_per_sequence: -1
+  test_restrict_sequence_id: 0
+  test_on_train: false
+generic_model_args:
+  render_image_height: 800
+  render_image_width: 800
+  log_vars:
+  - loss_rgb_psnr_fg
+  - loss_rgb_psnr
+  - loss_eikonal
+  - loss_prev_stage_rgb_psnr
+  - loss_mask_bce
+  - loss_prev_stage_mask_bce
+  - loss_rgb_mse
+  - loss_prev_stage_rgb_mse
+  - loss_depth_abs
+  - loss_depth_abs_fg
+  - loss_kl
+  - loss_mask_neg_iou
+  - objective
+  - epoch
+  - sec/it
+solver_args:
+  lr: 0.0005
+  max_epochs: 400
+  milestones:
+  - 200
+  - 300
--- a/projects/implicitron_trainer/configs/repro_singleseq_idr.yaml
+++ b/projects/implicitron_trainer/configs/repro_singleseq_idr.yaml
@@ -0,0 +1,57 @@
+defaults:
+- repro_singleseq_base
+- _self_
+generic_model_args:
+  loss_weights:
+    loss_mask_bce: 100.0
+    loss_kl: 0.0
+    loss_rgb_mse: 1.0
+    loss_eikonal: 0.1
+  chunk_size_grid: 65536
+  num_passes: 1
+  view_pool: false
+  implicit_function_IdrFeatureField_args:
+    n_harmonic_functions_xyz: 6
+    bias: 0.6
+    d_in: 3
+    d_out: 1
+    dims:
+    - 512
+    - 512
+    - 512
+    - 512
+    - 512
+    - 512
+    - 512
+    - 512
+    geometric_init: true
+    pooled_feature_dim: 0
+    skip_in:
+    - 6
+    weight_norm: true
+  renderer_SignedDistanceFunctionRenderer_args:
+    ray_tracer_args:
+      line_search_step: 0.5
+      line_step_iters: 3
+      n_secant_steps: 8
+      n_steps: 100
+      object_bounding_sphere: 8.0
+      sdf_threshold: 5.0e-05
+    ray_normal_coloring_network_args:
+      d_in: 9
+      d_out: 3
+      dims:
+      - 512
+      - 512
+      - 512
+      - 512
+      mode: idr
+      n_harmonic_functions_dir: 4
+      pooled_feature_dim: 0
+      weight_norm: true
+  raysampler_args:
+    n_rays_per_image_sampled_from_mask: 1024
+    n_pts_per_ray_training: 0
+    n_pts_per_ray_evaluation: 0
+  renderer_class_type: SignedDistanceFunctionRenderer
+  implicit_function_class_type: IdrFeatureField
--- a/projects/implicitron_trainer/configs/repro_singleseq_nerf.yaml
+++ b/projects/implicitron_trainer/configs/repro_singleseq_nerf.yaml
@@ -0,0 +1,4 @@
+defaults:
+- repro_singleseq_base
+- _self_
+exp_dir: ./data/nerf_single_apple/
--- a/projects/implicitron_trainer/configs/repro_singleseq_nerf_wce.yaml
+++ b/projects/implicitron_trainer/configs/repro_singleseq_nerf_wce.yaml
@@ -0,0 +1,9 @@
+defaults:
+- repro_singleseq_wce_base.yaml
+- repro_feat_extractor_unnormed.yaml
+- _self_
+generic_model_args:
+  chunk_size_grid: 16000
+  view_pool: true
+  raysampler_args:
+    n_rays_per_image_sampled_from_mask: 850
--- a/projects/implicitron_trainer/configs/repro_singleseq_nerformer.yaml
+++ b/projects/implicitron_trainer/configs/repro_singleseq_nerformer.yaml
@@ -0,0 +1,16 @@
+defaults:
+- repro_singleseq_wce_base.yaml
+- repro_feat_extractor_transformer.yaml
+- _self_
+generic_model_args:
+  chunk_size_grid: 16000
+  view_pool: true
+  implicit_function_class_type: NeRFormerImplicitFunction
+  raysampler_args:
+    n_rays_per_image_sampled_from_mask: 800
+    n_pts_per_ray_training: 32
+    n_pts_per_ray_evaluation: 32
+  renderer_MultiPassEmissionAbsorptionRenderer_args:
+    n_pts_per_ray_fine_training: 16
+    n_pts_per_ray_fine_evaluation: 16
+  feature_aggregator_class_type: IdentityFeatureAggregator
--- a/projects/implicitron_trainer/configs/repro_singleseq_srn.yaml
+++ b/projects/implicitron_trainer/configs/repro_singleseq_srn.yaml
@@ -0,0 +1,28 @@
+defaults:
+- repro_singleseq_base.yaml
+- _self_
+generic_model_args:
+  num_passes: 1
+  chunk_size_grid: 32000
+  view_pool: false
+  loss_weights:
+    loss_rgb_mse: 200.0
+    loss_prev_stage_rgb_mse: 0.0
+    loss_mask_bce: 1.0
+    loss_prev_stage_mask_bce: 0.0
+    loss_autodecoder_norm: 0.0
+    depth_neg_penalty: 10000.0
+  raysampler_args:
+    n_rays_per_image_sampled_from_mask: 2048
+    min_depth: 0.05
+    max_depth: 0.05
+    scene_extent: 0.0
+    n_pts_per_ray_training: 1
+    n_pts_per_ray_evaluation: 1
+    stratified_point_sampling_training: false
+    stratified_point_sampling_evaluation: false
+  renderer_class_type: LSTMRenderer
+  implicit_function_class_type: SRNImplicitFunction
+solver_args:
+  breed: adam
+  lr: 5.0e-05
--- a/projects/implicitron_trainer/configs/repro_singleseq_srn_noharm.yaml
+++ b/projects/implicitron_trainer/configs/repro_singleseq_srn_noharm.yaml
@@ -0,0 +1,10 @@
+defaults:
+- repro_singleseq_srn.yaml
+- _self_
+generic_model_args:
+  num_passes: 1
+  implicit_function_SRNImplicitFunction_args:
+    pixel_generator_args:
+      n_harmonic_functions: 0
+    raymarch_function_args:
+      n_harmonic_functions: 0
--- a/projects/implicitron_trainer/configs/repro_singleseq_srn_wce.yaml
+++ b/projects/implicitron_trainer/configs/repro_singleseq_srn_wce.yaml
@@ -0,0 +1,29 @@
+defaults:
+- repro_singleseq_wce_base
+- repro_feat_extractor_normed.yaml
+- _self_
+generic_model_args:
+  num_passes: 1
+  chunk_size_grid: 32000
+  view_pool: true
+  loss_weights:
+    loss_rgb_mse: 200.0
+    loss_prev_stage_rgb_mse: 0.0
+    loss_mask_bce: 1.0
+    loss_prev_stage_mask_bce: 0.0
+    loss_autodecoder_norm: 0.0
+    depth_neg_penalty: 10000.0
+  raysampler_args:
+    n_rays_per_image_sampled_from_mask: 2048
+    min_depth: 0.05
+    max_depth: 0.05
+    scene_extent: 0.0
+    n_pts_per_ray_training: 1
+    n_pts_per_ray_evaluation: 1
+    stratified_point_sampling_training: false
+    stratified_point_sampling_evaluation: false
+  renderer_class_type: LSTMRenderer
+  implicit_function_class_type: SRNImplicitFunction
+solver_args:
+  breed: adam
+  lr: 5.0e-05
--- a/projects/implicitron_trainer/configs/repro_singleseq_srn_wce_noharm.yaml
+++ b/projects/implicitron_trainer/configs/repro_singleseq_srn_wce_noharm.yaml
@@ -0,0 +1,10 @@
+defaults:
+- repro_singleseq_srn_wce.yaml
+- _self_
+generic_model_args:
+  num_passes: 1
+  implicit_function_SRNImplicitFunction_args:
+    pixel_generator_args:
+      n_harmonic_functions: 0
+    raymarch_function_args:
+      n_harmonic_functions: 0
--- a/projects/implicitron_trainer/configs/repro_singleseq_wce_base.yaml
+++ b/projects/implicitron_trainer/configs/repro_singleseq_wce_base.yaml
@@ -0,0 +1,18 @@
+defaults:
+- repro_singleseq_base
+- _self_
+dataloader_args:
+  batch_size: 10
+  dataset_len: 1000
+  dataset_len_val: 1
+  num_workers: 8
+  images_per_seq_options:
+  - 2
+  - 3
+  - 4
+  - 5
+  - 6
+  - 7
+  - 8
+  - 9
+  - 10