Skip to content

Commit 7366d99

Browse files
authored
Improve metrics (#4178)
* use singleton, simplify metrics * fix typo * change comments pos * minor
1 parent ecb6cce commit 7366d99

File tree

6 files changed

+141
-230
lines changed

6 files changed

+141
-230
lines changed

lmdeploy/metrics/loggers.py

Lines changed: 19 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99
import numpy as np
1010

11-
from lmdeploy.metrics.stats import FinishedRequestStats, IterationStats, SchedulerStats, SpeculativeDecodingStats
11+
from lmdeploy.metrics.stats import IterationStats, RequestStats, SchedulerStats, SpeculativeDecodingStats
1212
from lmdeploy.utils import get_logger
1313

1414
logger = get_logger('lmdeploy')
@@ -70,18 +70,18 @@ def record_specdecode(self, stats: SpeculativeDecodingStats):
7070
self.num_accepted_tokens += stats.num_accepted_tokens
7171
self.num_accepted_tokens_per_pos += stats.num_accepted_tokens_per_pos
7272

73-
def record_finish(self, stats: FinishedRequestStats):
73+
def record_finish(self, stats: RequestStats):
7474
pass
7575

76-
def _get_log_spec_msg(self):
76+
def log_spec_msg(self):
7777
"""Get spec decoding logging msg."""
7878
if self.num_drafts == 0:
79-
return ''
79+
return
8080

8181
draft_acceptance_rate = (self.num_accepted_tokens / self.num_draft_tokens *
8282
100 if self.num_draft_tokens > 0 else float('nan'))
8383

84-
# Conventionally, mean acceptance length includes the bonus token
84+
# conventionally, mean acceptance length includes the bonus token
8585
mean_acceptance_length = 1 + (self.num_accepted_tokens / self.num_drafts)
8686

8787
acceptance_rates = self.num_accepted_tokens_per_pos / self.num_drafts
@@ -93,23 +93,23 @@ def _get_log_spec_msg(self):
9393
f'Accepted: {self.num_accepted_tokens} tokens, '
9494
f'Drafted: {self.num_draft_tokens} tokens, '
9595
f'Per-position acceptance rate: {rates_str}')
96-
return log_msg
96+
print(log_msg, flush=True)
9797

9898
def log(self):
9999
now = time.perf_counter()
100+
101+
# skip logging if no tokens were processed
100102
if self.total_prompt_tokens == 0 and self.total_generation_tokens == 0:
101-
# Not show the metrics log in console
102103
self._reset(now)
103104
return
104105

106+
# derive log information
105107
prompt_throughput = self.total_prompt_tokens / (now - self.last_log_time)
106108
generation_throughput = self.total_generation_tokens / (now - self.last_log_time)
107-
108-
spec_log_msg = self._get_log_spec_msg()
109+
scheduler_stats = self.last_scheduler_stats
109110
self._reset(now)
110111

111-
scheduler_stats = self.last_scheduler_stats
112-
# Format and print output.
112+
# format and print
113113
log_msg = (f"[{datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')} "
114114
f'DP{self.dp_rank}] '
115115
f'Avg prompt throughput: {prompt_throughput:.1f} tokens/s, '
@@ -120,8 +120,7 @@ def log(self):
120120
f'Waiting: {scheduler_stats.num_waiting_reqs} reqs, '
121121
f'GPU KV cache usage: {scheduler_stats.gpu_cache_usage * 100 :.1f}%')
122122
print(log_msg, flush=True)
123-
if spec_log_msg:
124-
print(spec_log_msg, flush=True)
123+
self.log_spec_msg()
125124

126125

127126
class PrometheusStatLogger(StatLoggerBase):
@@ -136,12 +135,12 @@ def __init__(self, model_name: str, max_model_len: int, dp_rank: int = 0):
136135

137136
self.dp_rank = dp_rank
138137

139-
# Unregister any existing lmdeploy collectors
138+
# unregister any existing lmdeploy collectors
140139
for collector in list(prometheus_client.REGISTRY._collector_to_names):
141140
if hasattr(collector, '_name') and 'lmdeploy' in collector._name:
142141
prometheus_client.REGISTRY.unregister(collector)
143142

144-
# Config information
143+
# config information
145144
self.info_backend_config = prometheus_client.Info(name='lmdeploy:backend_config',
146145
documentation='information of backend_config')
147146

@@ -319,13 +318,13 @@ def record_iteration(self, stats: IterationStats) -> None:
319318
if stats.itl:
320319
self.histogram_iter_token_latency.observe(stats.itl)
321320

322-
def record_finish(self, stats: FinishedRequestStats) -> None:
321+
def record_finish(self, stats: RequestStats) -> None:
323322
self.counter_request_success[stats.finish_reason].inc()
324323
self.histogram_e2e_time_request.observe(stats.e2e_latency)
325-
self.histogram_queue_time_request.observe(stats.queued_time)
326-
self.histogram_prefill_time_request.observe(stats.prefill_time)
327-
self.histogram_inference_time_request.observe(stats.inference_time)
328-
self.histogram_decode_time_request.observe(stats.decode_time)
324+
self.histogram_queue_time_request.observe(stats.queued_time_interval)
325+
self.histogram_prefill_time_request.observe(stats.prefill_time_interval)
326+
self.histogram_inference_time_request.observe(stats.inference_time_interval)
327+
self.histogram_decode_time_request.observe(stats.decode_time_interval)
329328
self.histogram_num_prompt_tokens_request.observe(stats.prompt_tokens)
330329
self.histogram_num_generation_tokens_request.observe(stats.generation_tokens)
331330

Lines changed: 29 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -1,108 +1,37 @@
11
# Copyright (c) OpenMMLab. All rights reserved.
22
import asyncio
3-
from contextlib import contextmanager
4-
from dataclasses import dataclass, field
53

64
from lmdeploy.messages import ResponseType, ScheduleMetrics
5+
from lmdeploy.pytorch.utils import singleton
76
from lmdeploy.utils import get_logger
87

98
from .stats import SchedulerStats
109

1110
logger = get_logger('lmdeploy')
1211

1312

14-
@dataclass
15-
class MetricsContext:
16-
enable_metrics: bool = False
17-
scheduler_stats: SchedulerStats = field(default_factory=SchedulerStats)
18-
19-
20-
class MetricsManager:
21-
22-
def __init__(self):
23-
"""Initialize metrics manager."""
24-
self._current_ctx = MetricsContext()
25-
26-
def set_context(self, ctx: MetricsContext):
27-
"""Set metrics context."""
28-
self._current_ctx = ctx
29-
30-
def get_context(self):
31-
"""Get current context."""
32-
return self._current_ctx
33-
34-
@contextmanager
35-
def context(self, ctx: MetricsContext):
36-
"""Context manager."""
37-
old_ctx = self.get_context()
38-
self.set_context(ctx)
39-
try:
40-
yield ctx
41-
finally:
42-
self.set_context(old_ctx)
43-
44-
45-
_METRICS_MANAGER = None
46-
47-
48-
def get_metrics_manager():
49-
global _METRICS_MANAGER
50-
if _METRICS_MANAGER is None:
51-
_METRICS_MANAGER = MetricsManager()
52-
53-
return _METRICS_MANAGER
54-
55-
56-
# Metrics getters
57-
def is_metrics_enabled():
58-
return get_metrics_manager().get_context().enable_metrics
59-
60-
61-
def get_current_metrics_context():
62-
return get_metrics_manager().get_context()
63-
64-
65-
def get_current_scheduler_stats():
66-
return get_metrics_manager().get_context().scheduler_stats
67-
68-
69-
# Metrics setters
70-
def set_metrics_enabled_flag(enable_metrics: bool):
71-
"""Set metrics enabled flag."""
72-
ctx = get_current_metrics_context()
73-
ctx.enable_metrics = enable_metrics
74-
75-
if enable_metrics:
76-
logger.info('Metrics are enabled.')
77-
78-
79-
def increment_async_engine_scheduler_stats_total_req():
80-
"""Set scheduler stats in async engine."""
81-
get_current_scheduler_stats().num_total_reqs += 1
82-
83-
84-
def increment_async_engine_scheduler_stats_finished_req():
85-
"""Set scheduler stats in async engine."""
86-
get_current_scheduler_stats().num_finished_reqs += 1
87-
88-
89-
# Metrics processor
13+
@singleton
9014
class MetricsProcessor():
9115
"""Metrics processor."""
9216

9317
def __init__(self):
18+
"""Init metrics processor."""
19+
self.enable_metrics: bool = False
20+
self.scheduler_stats = SchedulerStats()
21+
self.stat_loggers = []
9422
self.metrics_queue: asyncio.Queue = None
9523
self.metrics_handler: asyncio.Task = None
9624

9725
def start_metrics_handler(self, enable_metrics: bool):
98-
set_metrics_enabled_flag(enable_metrics)
99-
26+
"""Start metrics handler."""
27+
self.enable_metrics = enable_metrics
10028
if enable_metrics and self.metrics_handler is None:
10129
self.metrics_queue = asyncio.Queue()
10230
self.metrics_handler = asyncio.create_task(self._run_metrics_handler())
10331
logger.info('Metrics handler task started.')
10432

10533
async def stop_metrics_handler(self):
34+
"""Stop metrics handler."""
10635
if self.metrics_handler is not None:
10736
self.metrics_handler.cancel()
10837
try:
@@ -117,20 +46,20 @@ async def _run_metrics_handler(self):
11746
"""A background task that consumes and processes metrics data."""
11847
while True:
11948
try:
120-
# fetch
49+
# fetch data from the queue
12150
update_data = await self.metrics_queue.get()
122-
outputs, req_state, iteration_stats, specdecode_stats = update_data
51+
outputs, req_stats, iteration_stats, specdecode_stats = update_data
12352

124-
# update request state according the engine events
53+
# update request stats
12554
if outputs and outputs.req_metrics:
12655
# when users visit "/abort_request" endpoint, `req_metrics` might be None
127-
req_state.update_from_events(outputs.req_metrics.engine_events)
56+
req_stats.update_from_events(outputs.req_metrics.engine_events)
12857

129-
# update iteration stats based on outputs and request state.
130-
# some attributes of req_state will also be updated, e.g., lastest_token_time
131-
iteration_stats.update_from_output(outputs, req_state)
58+
# update iteration stats
59+
# some attributes of req_stats will also be updated, e.g., lastest_token_time
60+
iteration_stats.update_from_output(outputs, req_stats)
13261

133-
# spec decode
62+
# update spec decode stats
13463
if specdecode_stats is not None:
13564
specdecode_stats.update_from_output(outputs)
13665

@@ -140,34 +69,37 @@ async def _run_metrics_handler(self):
14069
if specdecode_stats is not None:
14170
stat_logger.record_specdecode(specdecode_stats)
14271

72+
# record finished request stats
14373
if outputs.status == ResponseType.FINISH:
144-
# record finished request stats
14574
for stat_logger in self.stat_loggers:
146-
stat_logger.record_finish(req_state.finish_stats)
75+
stat_logger.record_finish(req_stats)
14776

14877
self.metrics_queue.task_done()
14978
except asyncio.CancelledError:
15079
break
15180
except Exception as e:
15281
logger.exception(f'Metrics handler background task failed: {e}')
15382

154-
async def udpate_schedule_stats(self, schedule_metrics: ScheduleMetrics):
155-
stats = get_current_scheduler_stats()
156-
stats.update_from_schedule_metrics(schedule_metrics)
83+
async def update_schedule_stats(self, schedule_metrics: ScheduleMetrics):
84+
"""Update schedule stats."""
85+
self.scheduler_stats.update_from_schedule_metrics(schedule_metrics)
15786
# record schedule stats
15887
for stat_logger in self.stat_loggers:
159-
stat_logger.record_schedule(stats)
88+
stat_logger.record_schedule(self.scheduler_stats)
16089

16190
def queue_update(self, update_data: tuple):
162-
if not is_metrics_enabled() or self.metrics_queue is None:
91+
"""Queue update."""
92+
if not self.enable_metrics or self.metrics_queue is None:
16393
return
16494
self.metrics_queue.put_nowait(update_data)
16595

16696
def increment_total_requests(self):
167-
increment_async_engine_scheduler_stats_total_req()
97+
"""Increment total requests."""
98+
self.scheduler_stats.num_total_reqs += 1
16899

169100
def increment_finished_requests(self):
170-
increment_async_engine_scheduler_stats_finished_req()
101+
"""Increment finished requests."""
102+
self.scheduler_stats.num_finished_reqs += 1
171103

172104

173105
metrics_processor = MetricsProcessor()

0 commit comments

Comments
 (0)