@@ -279,23 +279,28 @@ def init_weights_tiny_model(model):
279279 ("Qwen/Qwen2.5-VL-3B-Instruct" , Qwen2_5_VLForConditionalGeneration ),
280280]:
281281 processor = AutoProcessor .from_pretrained (model_id )
282- config = AutoConfig .from_pretrained (model_id )
283-
284- config .text_config .num_hidden_layers = 2
285- config .text_config .hidden_size = 16
286- config .text_config .num_attention_heads = 4
287- config .text_config .num_key_value_heads = 2
288282
289- config .vision_config .num_hidden_layers = 2
290- config .vision_config .hidden_size = 16
291- config .vision_config .num_attention_heads = 4
292- config .vision_config .num_key_value_heads = 2
283+ text_config = {
284+ "num_hidden_layers" : 2 ,
285+ "hidden_size" : 16 ,
286+ "num_attention_heads" : 4 ,
287+ "num_key_value_heads" : 2 ,
288+ "layer_types" : None , # Set it automatically from num_hidden_layers
289+ }
290+ vision_config = {
291+ "num_hidden_layers" : 2 ,
292+ "hidden_size" : 16 ,
293+ "num_attention_heads" : 4 ,
294+ "num_key_value_heads" : 2 ,
295+ }
296+ config = AutoConfig .from_pretrained (model_id , text_config = text_config , vision_config = vision_config )
293297
294298 if isinstance (config , (Qwen2VLConfig )):
295299 config .vision_config .depth = 2
296300
297301 if isinstance (config , (Qwen2VLConfig , Qwen2_5_VLConfig )):
298302 config .text_config .rope_scaling ["mrope_section" ] = [2 ]
303+ config .rope_scaling ["mrope_section" ] = [2 ] # different dict object from text_config; see GH-4101
299304
300305 if isinstance (config , (Qwen2_5_VLConfig )):
301306 config .vision_config .out_hidden_size = 16
0 commit comments