@@ -360,6 +360,7 @@ def _process_sampling_with_logprob_batch_output(self):
360
360
metrics = RequestMetrics (
361
361
arrival_time = task .arrival_time ,
362
362
inference_start_time = task .inference_start_time ,
363
+ model_execute_time = time .time () - task .inference_start_time ,
363
364
first_token_time = time .time () - task .inference_start_time ,
364
365
time_in_queue = task .schedule_start_time - task .preprocess_end_time ,
365
366
preprocess_cost_time = task .preprocess_end_time - task .preprocess_start_time ,
@@ -503,6 +504,7 @@ def _process_batch_output(self):
503
504
metrics = RequestMetrics (
504
505
arrival_time = task .arrival_time ,
505
506
inference_start_time = task .inference_start_time ,
507
+ model_execute_time = time .time () - task .inference_start_time ,
506
508
first_token_time = time .time () - task .inference_start_time ,
507
509
time_in_queue = task .schedule_start_time - task .preprocess_end_time ,
508
510
preprocess_cost_time = task .preprocess_end_time - task .preprocess_start_time ,
@@ -514,6 +516,7 @@ def _process_batch_output(self):
514
516
else :
515
517
metrics = RequestMetrics (
516
518
arrival_time = time .time (),
519
+ model_execute_time = time .time () - task .inference_start_time ,
517
520
request_start_time = task .arrival_time ,
518
521
)
519
522
self .number_of_output_tokens += len (token_ids )
0 commit comments