@@ -62,6 +62,7 @@ def add_parser_gradio():
62
62
cache_block_seq_len_act = ArgumentHelper .cache_block_seq_len (pt_group )
63
63
prefix_caching_act = ArgumentHelper .enable_prefix_caching (pt_group )
64
64
max_prefill_token_num_act = ArgumentHelper .max_prefill_token_num (pt_group )
65
+ model_format_act = ArgumentHelper .model_format (pt_group )
65
66
# turbomind args
66
67
tb_group = parser .add_argument_group ('TurboMind engine arguments' )
67
68
# common engine args
@@ -73,7 +74,8 @@ def add_parser_gradio():
73
74
tb_group ._group_actions .append (cache_block_seq_len_act )
74
75
tb_group ._group_actions .append (prefix_caching_act )
75
76
tb_group ._group_actions .append (max_prefill_token_num_act )
76
- ArgumentHelper .model_format (tb_group )
77
+ tb_group ._group_actions .append (model_format_act )
78
+
77
79
ArgumentHelper .quant_policy (tb_group )
78
80
ArgumentHelper .rope_scaling_factor (tb_group )
79
81
ArgumentHelper .communicator (tb_group )
@@ -289,8 +291,6 @@ def gradio(args):
289
291
cache_block_seq_len = args .cache_block_seq_len ,
290
292
enable_prefix_caching = args .enable_prefix_caching ,
291
293
max_prefill_token_num = args .max_prefill_token_num ,
292
- num_tokens_per_iter = args .num_tokens_per_iter ,
293
- max_prefill_iters = args .max_prefill_iters ,
294
294
communicator = args .communicator )
295
295
chat_template_config = get_chat_template (args .chat_template )
296
296
run (args .model_path_or_server ,
0 commit comments