Skip to content

Commit c49c43d

Browse files
[Bug fix] Fix perf in mixed deployment with yiyan adpater (#3703)
Co-authored-by: YuBaoku <[email protected]>
1 parent a424ab9 commit c49c43d

File tree

2 files changed

+4
-1
lines changed

2 files changed

+4
-1
lines changed

fastdeploy/output/token_processor.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -360,6 +360,7 @@ def _process_sampling_with_logprob_batch_output(self):
360360
metrics = RequestMetrics(
361361
arrival_time=task.arrival_time,
362362
inference_start_time=task.inference_start_time,
363+
model_execute_time=time.time() - task.inference_start_time,
363364
first_token_time=time.time() - task.inference_start_time,
364365
time_in_queue=task.schedule_start_time - task.preprocess_end_time,
365366
preprocess_cost_time=task.preprocess_end_time - task.preprocess_start_time,
@@ -503,6 +504,7 @@ def _process_batch_output(self):
503504
metrics = RequestMetrics(
504505
arrival_time=task.arrival_time,
505506
inference_start_time=task.inference_start_time,
507+
model_execute_time=time.time() - task.inference_start_time,
506508
first_token_time=time.time() - task.inference_start_time,
507509
time_in_queue=task.schedule_start_time - task.preprocess_end_time,
508510
preprocess_cost_time=task.preprocess_end_time - task.preprocess_start_time,
@@ -514,6 +516,7 @@ def _process_batch_output(self):
514516
else:
515517
metrics = RequestMetrics(
516518
arrival_time=time.time(),
519+
model_execute_time=time.time() - task.inference_start_time,
517520
request_start_time=task.arrival_time,
518521
)
519522
self.number_of_output_tokens += len(token_ids)

fastdeploy/scheduler/local_scheduler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,7 @@ def calc_required_blocks(self, token_num, block_size):
209209
return (token_num + block_size - 1) // block_size
210210

211211
def get_unhandled_request_num(self):
212-
return len(self.requests)
212+
return len(self.ids) - self.ids_read_cursor
213213

214214
def get_requests(
215215
self,

0 commit comments

Comments
 (0)