# model model_name_or_path: google/paligemma-3b-mix-448 visual_inputs: true tune_mm_proj: true #print_param_status: true # method stage: sft do_train: true finetuning_type: full # ddp ddp_timeout: 180000000 deepspeed: examples/deepspeed/ds_z2_offload_config.json # dataset dataset: mllm_pt_demo dataset_dir: data template: gemma cutoff_len: 2048 max_samples: 3 #val_size: 0.0001 overwrite_cache: true preprocessing_num_workers: 16 # output output_dir: saves/paligemma/full/sft_llava_pt_test logging_steps: 1 save_steps: 50 plot_loss: true overwrite_output_dir: true #save_strategy: epoch #save_total_limit: 2 # train per_device_train_batch_size: 1 gradient_accumulation_steps: 16 learning_rate: 0.00001 num_train_epochs: 100 lr_scheduler_type: cosine warmup_steps: 0.1 #bf16: true pure_bf16: true # eval do_eval: false #per_device_eval_batch_size: 1 #evaluation_strategy: steps #eval_steps: 500