PaddlePaddle · llbdyiu66 · Nov 5, 2025 · Nov 5, 2025 · Nov 6, 2025 · Nov 7, 2025
diff --git a/examples/config/dpo/full_tp_pp_ep_sd_no_packing.yaml b/examples/config/dpo/full_tp_pp_ep_sd_no_packing.yaml
@@ -0,0 +1,54 @@
+### data
+train_dataset_type: erniekit
+eval_dataset_type: erniekit
+train_dataset_path: ./data/dpo/train.jsonl
+train_dataset_prob: "1.0"
+eval_dataset_path: ./data/dpo/dev.jsonl
+eval_dataset_prob: "1.0"
+max_seq_len: 8192
+num_samples_each_epoch: 6000000
+packing: false
+mix_strategy: concat
+
+### model
+model_name_or_path: Qwen/Qwen3-30B-A3B
+attn_impl: flashmask
+
+### finetuning
+# base
+stage: DPO
+fine_tuning: full
+seed: 23
+do_train: true
+do_eval: true
+per_device_eval_batch_size: 1
+per_device_train_batch_size: 1
+num_train_epochs: 1
+max_steps: -1
+eval_steps: 100
+evaluation_strategy: steps
+save_steps: 100
+save_strategy: steps
+logging_steps: 1
+gradient_accumulation_steps: 4
+logging_dir: ./vdl_log
+output_dir: ./checkpoints/qwen3_hf_0p6b_dpo_ckpts_parallel
+disable_tqdm: true
+eval_accumulation_steps: 16
+
+# train
+warmup_steps: 20
+learning_rate: 1.0e-6
+
+# performance
+tensor_parallel_degree: 2
+pipeline_parallel_degree: 2
+pipeline_parallel_config: enable_clear_every_step_cache disable_partial_send_recv
+expert_parallel_degree: 4
+sharding_parallel_degree: 2
+sharding: stage1
+sequence_parallel: false
+recompute: true
+bf16: true
+fp16_opt_level: O2
+unified_checkpoint: true
diff --git a/examples/config/dpo/full_tp_pp_no_packing.yaml b/examples/config/dpo/full_tp_pp_no_packing.yaml
@@ -0,0 +1,52 @@
+### data
+train_dataset_type: erniekit
+eval_dataset_type: erniekit
+train_dataset_path: ./data/dpo/train.jsonl
+train_dataset_prob: "1.0"
+eval_dataset_path: ./data/dpo/dev.jsonl
+eval_dataset_prob: "1.0"
+max_seq_len: 8192
+num_samples_each_epoch: 6000000
+packing: false
+mix_strategy: concat
+
+### model
+model_name_or_path: Qwen/Qwen3-0.6B-Base
+attn_impl: flashmask
+
+### finetuning
+# base
+stage: DPO
+fine_tuning: full
+seed: 23
+do_train: true
+do_eval: true
+per_device_eval_batch_size: 1
+per_device_train_batch_size: 1
+num_train_epochs: 1
+max_steps: -1
+eval_steps: 100
+evaluation_strategy: steps
+save_steps: 100
+save_strategy: steps
+logging_steps: 1
+gradient_accumulation_steps: 4
+logging_dir: ./vdl_log
+output_dir: ./checkpoints/qwen3_hf_0p6b_dpo_ckpts_parallel
+disable_tqdm: true
+eval_accumulation_steps: 16
+
+# train
+warmup_steps: 20
+learning_rate: 1.0e-6
+
+# performance
+tensor_parallel_degree: 2
+pipeline_parallel_degree: 2
+pipeline_parallel_config: enable_clear_every_step_cache disable_partial_send_recv
+sharding: stage1
+sequence_parallel: false
+recompute: true
+bf16: true
+fp16_opt_level: O2
+unified_checkpoint: true
diff --git a/examples/config/dpo/full_tp_pp.yaml → examples/config/dpo/full_tp_pp_packing.yaml b/examples/config/dpo/full_tp_pp.yaml → examples/config/dpo/full_tp_pp_packing.yaml
@@ -44,8 +44,8 @@ learning_rate: 1.0e-6
 tensor_parallel_degree: 2
 pipeline_parallel_degree: 2
 pipeline_parallel_config: enable_clear_every_step_cache disable_partial_send_recv
-sequence_parallel: true
 sharding: stage1
+sequence_parallel: true
 recompute: true
 bf16: true
 fp16_opt_level: O2

diff --git a/examples/config/dpo/full_tp_pp_packing_optim_vram.yaml b/examples/config/dpo/full_tp_pp_packing_optim_vram.yaml
@@ -0,0 +1,55 @@
+### data
+train_dataset_type: erniekit
+eval_dataset_type: erniekit
+train_dataset_path: ./data/dpo/train.jsonl
+train_dataset_prob: "1.0"
+eval_dataset_path: ./data/dpo/dev.jsonl
+eval_dataset_prob: "1.0"
+max_seq_len: 8192
+num_samples_each_epoch: 6000000
+packing: true
+mix_strategy: concat
+
+### model
+model_name_or_path: Qwen/Qwen3-0.6B-Base
+attn_impl: flashmask
+
+### finetuning
+# base
+stage: DPO
+fine_tuning: full
+seed: 23
+do_train: true
+do_eval: true
+per_device_eval_batch_size: 1
+per_device_train_batch_size: 1
+num_train_epochs: 1
+max_steps: -1
+eval_steps: 100
+evaluation_strategy: steps
+save_steps: 100
+save_strategy: steps
+logging_steps: 1
+gradient_accumulation_steps: 4
+logging_dir: ./vdl_log
+output_dir: ./checkpoints/qwen3_hf_0p6b_dpo_ckpts_parallel
+disable_tqdm: true
+eval_accumulation_steps: 16
+
+# train
+warmup_steps: 20
+learning_rate: 1.0e-6
+
+# performance
+tensor_parallel_degree: 2
+pipeline_parallel_degree: 2
+pipeline_parallel_config: enable_clear_every_step_cache disable_partial_send_recv
+sharding: stage1
+sequence_parallel: true
+use_fused_head_and_loss_fn: true
+loss_subbatch_sequence_length: 8192
+tensorwise_offload_optimizer: true
+recompute: true
+bf16: true
+fp16_opt_level: O2
+unified_checkpoint: true
diff --git a/examples/config/dpo/full_tp_pp_sd_no_packing.yaml b/examples/config/dpo/full_tp_pp_sd_no_packing.yaml
@@ -0,0 +1,53 @@
+### data
+train_dataset_type: erniekit
+eval_dataset_type: erniekit
+train_dataset_path: ./data/dpo/train.jsonl
+train_dataset_prob: "1.0"
+eval_dataset_path: ./data/dpo/dev.jsonl
+eval_dataset_prob: "1.0"
+max_seq_len: 8192
+num_samples_each_epoch: 6000000
+packing: false
+mix_strategy: concat
+
+### model
+model_name_or_path: Qwen/Qwen3-0.6B-Base
+attn_impl: flashmask
+
+### finetuning
+# base
+stage: DPO
+fine_tuning: full
+seed: 23
+do_train: true
+do_eval: true
+per_device_eval_batch_size: 1
+per_device_train_batch_size: 1
+num_train_epochs: 1
+max_steps: -1
+eval_steps: 100
+evaluation_strategy: steps
+save_steps: 100
+save_strategy: steps
+logging_steps: 1
+gradient_accumulation_steps: 4
+logging_dir: ./vdl_log
+output_dir: ./checkpoints/qwen3_hf_0p6b_dpo_ckpts_parallel
+disable_tqdm: true
+eval_accumulation_steps: 16
+
+# train
+warmup_steps: 20
+learning_rate: 1.0e-6
+
+# performance
+tensor_parallel_degree: 2
+pipeline_parallel_degree: 2
+pipeline_parallel_config: enable_clear_every_step_cache disable_partial_send_recv
+sharding_parallel_degree: 2
+sharding: stage1
+sequence_parallel: false
+recompute: true
+bf16: true
+fp16_opt_level: O2
+unified_checkpoint: true
diff --git a/examples/config/dpo/full_tp_sd_packing.yaml b/examples/config/dpo/full_tp_sd_packing.yaml
@@ -0,0 +1,51 @@
+### data
+train_dataset_type: erniekit
+eval_dataset_type: erniekit
+train_dataset_path: ./data/dpo/train.jsonl
+train_dataset_prob: "1.0"
+eval_dataset_path: ./data/dpo/dev.jsonl
+eval_dataset_prob: "1.0"
+max_seq_len: 8192
+num_samples_each_epoch: 6000000
+packing: true
+mix_strategy: concat
+
+### model
+model_name_or_path: Qwen/Qwen3-0.6B-Base
+attn_impl: flashmask
+
+### finetuning
+# base
+stage: DPO
+fine_tuning: full
+seed: 23
+do_train: true
+do_eval: true
+per_device_eval_batch_size: 1
+per_device_train_batch_size: 1
+num_train_epochs: 1
+max_steps: -1
+eval_steps: 100
+evaluation_strategy: steps
+save_steps: 100
+save_strategy: steps
+logging_steps: 1
+gradient_accumulation_steps: 4
+logging_dir: ./vdl_log
+output_dir: ./checkpoints/qwen3_hf_0p6b_dpo_ckpts_parallel
+disable_tqdm: true
+eval_accumulation_steps: 16
+
+# train
+warmup_steps: 20
+learning_rate: 1.0e-6
+
+# performance
+tensor_parallel_degree: 2
+sharding_parallel_degree: 2
+sharding: stage1
+sequence_parallel: true
+recompute: true
+bf16: true
+fp16_opt_level: O2
+unified_checkpoint: true
diff --git a/examples/config/dpo/lora_tp_pp_ep_sd_no_packing.yaml b/examples/config/dpo/lora_tp_pp_ep_sd_no_packing.yaml
@@ -0,0 +1,55 @@
+### data
+train_dataset_type: erniekit
+eval_dataset_type: erniekit
+train_dataset_path: ./data/dpo/train.jsonl
+train_dataset_prob: "1.0"
+eval_dataset_path: ./data/dpo/dev.jsonl
+eval_dataset_prob: "1.0"
+max_seq_len: 8192
+packing: false
+mix_strategy: concat
+
+### model
+model_name_or_path: Qwen/Qwen3-30B-A3B
+attn_impl: flashmask
+lora: true
+lora_rank: 8
+
+### finetuning
+# base
+stage: DPO
+fine_tuning: lora
+seed: 23
+do_train: true
+do_eval: true
+per_device_eval_batch_size: 1
+per_device_train_batch_size: 1
+num_train_epochs: 1
+max_steps: -1
+eval_steps: 100
+evaluation_strategy: steps
+save_steps: 100
+save_strategy: steps
+logging_steps: 1
+gradient_accumulation_steps: 4
+logging_dir: ./vdl_log
+output_dir: ./checkpoints/qwen3_hf_0p6b_dpo_lora_ckpts_parallel
+disable_tqdm: true
+eval_accumulation_steps: 16
+
+# train
+warmup_steps: 20
+learning_rate: 1.0e-5
+
+# performance
+tensor_parallel_degree: 2
+pipeline_parallel_degree: 2
+pipeline_parallel_config: enable_clear_every_step_cache disable_partial_send_recv
+expert_parallel_degree: 4
+sharding_parallel_degree: 2
+sharding: stage1
+sequence_parallel: false
+recompute: true
+bf16: true
+fp16_opt_level: O2
+unified_checkpoint: true
diff --git a/examples/config/dpo/lora_tp_pp_no_packing.yaml b/examples/config/dpo/lora_tp_pp_no_packing.yaml
@@ -0,0 +1,53 @@
+### data
+train_dataset_type: erniekit
+eval_dataset_type: erniekit
+train_dataset_path: ./data/dpo/train.jsonl
+train_dataset_prob: "1.0"
+eval_dataset_path: ./data/dpo/dev.jsonl
+eval_dataset_prob: "1.0"
+max_seq_len: 8192
+packing: false
+mix_strategy: concat
+
+### model
+model_name_or_path: Qwen/Qwen3-0.6B-Base
+attn_impl: flashmask
+lora: true
+lora_rank: 8
+
+### finetuning
+# base
+stage: DPO
+fine_tuning: lora
+seed: 23
+do_train: true
+do_eval: true
+per_device_eval_batch_size: 1
+per_device_train_batch_size: 1
+num_train_epochs: 1
+max_steps: -1
+eval_steps: 100
+evaluation_strategy: steps
+save_steps: 100
+save_strategy: steps
+logging_steps: 1
+gradient_accumulation_steps: 4
+logging_dir: ./vdl_log
+output_dir: ./checkpoints/qwen3_hf_0p6b_dpo_lora_ckpts_parallel
+disable_tqdm: true
+eval_accumulation_steps: 16
+
+# train
+warmup_steps: 20
+learning_rate: 1.0e-5
+
+# performance
+tensor_parallel_degree: 2
+pipeline_parallel_degree: 2
+pipeline_parallel_config: enable_clear_every_step_cache disable_partial_send_recv
+sharding: stage1
+sequence_parallel: false
+recompute: true
+bf16: true
+fp16_opt_level: O2
+unified_checkpoint: true
diff --git a/examples/config/dpo/lora_tp_pp.yaml → examples/config/dpo/lora_tp_pp_packing.yaml b/examples/config/dpo/lora_tp_pp.yaml → examples/config/dpo/lora_tp_pp_packing.yaml
@@ -45,8 +45,8 @@ learning_rate: 1.0e-5
 tensor_parallel_degree: 2
 pipeline_parallel_degree: 2
 pipeline_parallel_config: enable_clear_every_step_cache disable_partial_send_recv
-sequence_parallel: true
 sharding: stage1
+sequence_parallel: true
 recompute: true
 bf16: true
 fp16_opt_level: O2