File tree Expand file tree Collapse file tree 2 files changed +67
-1
lines changed
Expand file tree Collapse file tree 2 files changed +67
-1
lines changed Original file line number Diff line number Diff line change 1+ base_model : google/gemma-3-4b-it
2+ strict : false
3+
4+ # gemma3 doesn't seem to play nice with ddp
5+ ddp_find_unused_parameters : true
6+
7+ chat_template : gemma3
8+ datasets :
9+ - path : cgato/SlimOrcaDedupCleaned
10+ type : chat_template
11+ field_messages : conversations
12+ message_property_mappings :
13+ role : from
14+ content : value
15+
16+ dataset_prepared_path : last_run_prepared
17+ val_set_size : 0.01
18+ output_dir : ./outputs/out
19+
20+ adapter : lora
21+ lora_model_dir :
22+
23+ sequence_len : 2048
24+ sample_packing : true
25+ pad_to_sequence_len : true
26+
27+ lora_r : 32
28+ lora_alpha : 16
29+ lora_dropout : 0.05
30+ lora_target_modules : ' language_model.model.layers.[\d]+.(mlp|cross_attn|self_attn).(up|down|gate|q|k|v|o)_proj'
31+
32+ wandb_project :
33+ wandb_entity :
34+ wandb_watch :
35+ wandb_name :
36+ wandb_log_model :
37+
38+ gradient_accumulation_steps : 4
39+ micro_batch_size : 2
40+ num_epochs : 1
41+ optimizer : adamw_bnb_8bit
42+ lr_scheduler : cosine
43+ learning_rate : 0.0002
44+
45+ train_on_inputs : false
46+ group_by_length : false
47+ bf16 : true
48+ fp16 :
49+ tf32 : true
50+
51+ gradient_checkpointing : true
52+ gradient_checkpointing_kwargs :
53+ use_reentrant : false
54+ local_rank :
55+ logging_steps : 1
56+ flash_attention : true
57+ eager_attention :
58+
59+ warmup_ratio : 0.1
60+ evals_per_epoch : 1
61+ saves_per_epoch : 1
62+ debug :
63+ deepspeed :
64+ weight_decay : 0.0
65+ fsdp :
66+ fsdp_config :
Original file line number Diff line number Diff line change @@ -20,7 +20,7 @@ dataset_prepared_path: last_run_prepared
2020val_set_size : 0.01
2121output_dir : ./outputs/out
2222
23- adapter : lora
23+ adapter : qlora
2424lora_model_dir :
2525
2626sequence_len : 2048
You can’t perform that action at this time.
0 commit comments