@@ -279,23 +279,28 @@ def init_weights_tiny_model(model):
279
279
("Qwen/Qwen2.5-VL-3B-Instruct" , Qwen2_5_VLForConditionalGeneration ),
280
280
]:
281
281
processor = AutoProcessor .from_pretrained (model_id )
282
- config = AutoConfig .from_pretrained (model_id )
283
-
284
- config .text_config .num_hidden_layers = 2
285
- config .text_config .hidden_size = 16
286
- config .text_config .num_attention_heads = 4
287
- config .text_config .num_key_value_heads = 2
288
282
289
- config .vision_config .num_hidden_layers = 2
290
- config .vision_config .hidden_size = 16
291
- config .vision_config .num_attention_heads = 4
292
- config .vision_config .num_key_value_heads = 2
283
+ text_config = {
284
+ "num_hidden_layers" : 2 ,
285
+ "hidden_size" : 16 ,
286
+ "num_attention_heads" : 4 ,
287
+ "num_key_value_heads" : 2 ,
288
+ "layer_types" : None , # Set it automatically from num_hidden_layers
289
+ }
290
+ vision_config = {
291
+ "num_hidden_layers" : 2 ,
292
+ "hidden_size" : 16 ,
293
+ "num_attention_heads" : 4 ,
294
+ "num_key_value_heads" : 2 ,
295
+ }
296
+ config = AutoConfig .from_pretrained (model_id , text_config = text_config , vision_config = vision_config )
293
297
294
298
if isinstance (config , (Qwen2VLConfig )):
295
299
config .vision_config .depth = 2
296
300
297
301
if isinstance (config , (Qwen2VLConfig , Qwen2_5_VLConfig )):
298
302
config .text_config .rope_scaling ["mrope_section" ] = [2 ]
303
+ config .rope_scaling ["mrope_section" ] = [2 ] # different dict object from text_config; see GH-4101
299
304
300
305
if isinstance (config , (Qwen2_5_VLConfig )):
301
306
config .vision_config .out_hidden_size = 16
0 commit comments