diff --git a/examples/README.md b/examples/README.md
index 6aeaaab1..6732faaf 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -3,41 +3,41 @@ We provide diverse examples about fine-tuning LLMs.
 ```
 examples/
 ├── lora_single_gpu/
-│   ├── pt.sh: Pre-training
-│   ├── sft.sh: Supervised fine-tuning
-│   ├── reward.sh: Reward modeling
-│   ├── ppo.sh: PPO training
-│   ├── dpo.sh: DPO training
-│   ├── orpo.sh: ORPO training
+│   ├── pt.sh: Do pre-training
+│   ├── sft.sh: Do supervised fine-tuning
+│   ├── reward.sh: Do reward modeling
+│   ├── ppo.sh: Do PPO training
+│   ├── dpo.sh: Do DPO training
+│   ├── orpo.sh: Do ORPO training
 │   ├── prepare.sh: Save tokenized dataset
-│   └── predict.sh: Batch prediction
+│   └── predict.sh: Do batch predict
 ├── qlora_single_gpu/
-│   ├── bitsandbytes.sh
-│   ├── gptq.sh
-│   ├── awq.sh
-│   └── aqlm.sh
+│   ├── bitsandbytes.sh: Fine-tune 4/8-bit BNB models
+│   ├── gptq.sh: Fine-tune 4/8-bit GPTQ models
+│   ├── awq.sh: Fine-tune 4-bit AWQ models
+│   └── aqlm.sh: Fine-tune 2-bit AQLM models
 ├── lora_multi_gpu/
-│   ├── single_node.sh
-│   └── multi_node.sh
+│   ├── single_node.sh: Fine-tune model with Accelerate on single node
+│   └── multi_node.sh: Fine-tune model with Accelerate on multiple nodes
 ├── full_multi_gpu/
-│   ├── single_node.sh
-│   └── multi_node.sh
+│   ├── single_node.sh: Fine-tune model with DeepSpeed on single node
+│   └── multi_node.sh: Fine-tune model with DeepSpeed on multiple nodes
 ├── merge_lora/
-│   ├── merge.sh: Merge LoRA weights
-│   └── quantize.sh: Quantize with AutoGPTQ
+│   ├── merge.sh: Merge LoRA weights into the pre-trained models
+│   └── quantize.sh: Quantize fine-tuned model with AutoGPTQ
 ├── inference/
-│   ├── cli_demo.sh
-│   ├── api_demo.sh
-│   ├── web_demo.sh
-│   └── evaluate.sh
+│   ├── cli_demo.sh: Launch a command line interface
+│   ├── api_demo.sh: Launch an OpenAI-style API
+│   ├── web_demo.sh: Launch a web interface
+│   └── evaluate.sh: Evaluate model on the MMLU benchmark
 └── extras/
     ├── galore/
-    │   └── sft.sh
+    │   └── sft.sh: Fine-tune model with GaLore
     ├── loraplus/
-    │   └── sft.sh
+    │   └── sft.sh: Fine-tune model with LoRA+
     ├── llama_pro/
-    │   ├── expand.sh
-    │   └── sft.sh
+    │   ├── expand.sh: Expand layers in the model
+    │   └── sft.sh: Fine-tune expanded model
     └── fsdp_qlora/
-        └── sft.sh
+        └── sft.sh: Fine-tune quantized model with FSDP
 ```
diff --git a/examples/extras/fsdp_qlora/sft.sh b/examples/extras/fsdp_qlora/sft.sh
index 8ffb5f2e..614245d3 100644
--- a/examples/extras/fsdp_qlora/sft.sh
+++ b/examples/extras/fsdp_qlora/sft.sh
@@ -5,17 +5,17 @@ pip install "accelerate>=0.28.0"
 pip install "bitsandbytes>=0.43.0"
 
 CUDA_VISIBLE_DEVICES=0,1 accelerate launch \
-    --config_file ../accelerate/fsdp_config.yaml \
-    ../../src/train_bash.py \
+    --config_file ../../accelerate/fsdp_config.yaml \
+    ../../../src/train_bash.py \
     --stage sft \
     --do_train \
     --model_name_or_path meta-llama/Llama-2-70b-hf \
     --dataset alpaca_gpt4_en,glaive_toolcall \
-    --dataset_dir ../../data \
+    --dataset_dir ../../../data \
     --template default \
     --finetuning_type lora \
     --lora_target q_proj,v_proj \
-    --output_dir ../../saves/LLaMA2-70B/lora/sft \
+    --output_dir ../../../saves/LLaMA2-70B/lora/sft \
     --overwrite_cache \
     --overwrite_output_dir \
     --cutoff_len 1024 \
diff --git a/examples/inference/api_demo.sh b/examples/inference/api_demo.sh
index 4a601bb6..aee86595 100644
--- a/examples/inference/api_demo.sh
+++ b/examples/inference/api_demo.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 
-CUDA_VISIBLE_DEVICES=0 API_PORT=8000 python src/api_demo.py \
+CUDA_VISIBLE_DEVICES=0 API_PORT=8000 python ../../src/api_demo.py \
     --model_name_or_path meta-llama/Llama-2-7b-hf \
     --adapter_name_or_path ../../saves/LLaMA2-7B/lora/sft \
     --template default \
diff --git a/examples/inference/cli_demo.sh b/examples/inference/cli_demo.sh
index fdeb01e6..3e4a1e4e 100644
--- a/examples/inference/cli_demo.sh
+++ b/examples/inference/cli_demo.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 
-CUDA_VISIBLE_DEVICES=0 python src/cli_demo.py \
+CUDA_VISIBLE_DEVICES=0 python ../../src/cli_demo.py \
     --model_name_or_path meta-llama/Llama-2-7b-hf \
     --adapter_name_or_path ../../saves/LLaMA2-7B/lora/sft \
     --template default \
diff --git a/examples/inference/evaluate.sh b/examples/inference/evaluate.sh
index b3053662..b54c2a60 100644
--- a/examples/inference/evaluate.sh
+++ b/examples/inference/evaluate.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 
-CUDA_VISIBLE_DEVICES=0 python src/evaluate.py \
+CUDA_VISIBLE_DEVICES=0 python ../../src/evaluate.py \
     --model_name_or_path meta-llama/Llama-2-7b-hf \
     --adapter_name_or_path ../../saves/LLaMA2-7B/lora/sft \
     --template vanilla \
diff --git a/examples/inference/web_demo.sh b/examples/inference/web_demo.sh
index 0f8307fb..201be2b4 100644
--- a/examples/inference/web_demo.sh
+++ b/examples/inference/web_demo.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 
-CUDA_VISIBLE_DEVICES=0 python src/web_demo.py \
+CUDA_VISIBLE_DEVICES=0 python ../../src/web_demo.py \
     --model_name_or_path meta-llama/Llama-2-7b-hf \
     --adapter_name_or_path ../../saves/LLaMA2-7B/lora/sft \
     --template default \