InternLM
diff --git a/‎lmdeploy/metrics/loggers.py‎
Lines changed: 19 additions & 20 deletions b/‎lmdeploy/metrics/loggers.py‎
Lines changed: 19 additions & 20 deletions
diff --git a/‎lmdeploy/metrics/metrics_processor.py‎
Lines changed: 29 additions & 97 deletions b/‎lmdeploy/metrics/metrics_processor.py‎
Lines changed: 29 additions & 97 deletions
@@ -8,7 +8,7 @@
 
 import numpy as np
 
-from lmdeploy.metrics.stats import FinishedRequestStats, IterationStats, SchedulerStats, SpeculativeDecodingStats
+from lmdeploy.metrics.stats import IterationStats, RequestStats, SchedulerStats, SpeculativeDecodingStats
 from lmdeploy.utils import get_logger
 
 logger = get_logger('lmdeploy')
@@ -70,18 +70,18 @@ def record_specdecode(self, stats: SpeculativeDecodingStats):
         self.num_accepted_tokens += stats.num_accepted_tokens
         self.num_accepted_tokens_per_pos += stats.num_accepted_tokens_per_pos
 
-    def record_finish(self, stats: FinishedRequestStats):
+    def record_finish(self, stats: RequestStats):
         pass
 
-    def _get_log_spec_msg(self):
+    def log_spec_msg(self):
         """Get spec decoding logging msg."""
         if self.num_drafts == 0:
-            return ''
+            return
 
         draft_acceptance_rate = (self.num_accepted_tokens / self.num_draft_tokens *
                                  100 if self.num_draft_tokens > 0 else float('nan'))
 
-        # Conventionally, mean acceptance length includes the bonus token
+        # conventionally, mean acceptance length includes the bonus token
         mean_acceptance_length = 1 + (self.num_accepted_tokens / self.num_drafts)
 
         acceptance_rates = self.num_accepted_tokens_per_pos / self.num_drafts
@@ -93,23 +93,23 @@ def _get_log_spec_msg(self):
                    f'Accepted: {self.num_accepted_tokens} tokens, '
                    f'Drafted: {self.num_draft_tokens} tokens, '
                    f'Per-position acceptance rate: {rates_str}')
-        return log_msg
+        print(log_msg, flush=True)
 
     def log(self):
         now = time.perf_counter()
+
+        # skip logging if no tokens were processed
         if self.total_prompt_tokens == 0 and self.total_generation_tokens == 0:
-            # Not show the metrics log in console
             self._reset(now)
             return
 
+        # derive log information
         prompt_throughput = self.total_prompt_tokens / (now - self.last_log_time)
         generation_throughput = self.total_generation_tokens / (now - self.last_log_time)
-
-        spec_log_msg = self._get_log_spec_msg()
+        scheduler_stats = self.last_scheduler_stats
         self._reset(now)
 
-        scheduler_stats = self.last_scheduler_stats
-        # Format and print output.
+        # format and print
         log_msg = (f"[{datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')} "
                    f'DP{self.dp_rank}] '
                    f'Avg prompt throughput: {prompt_throughput:.1f} tokens/s, '
@@ -120,8 +120,7 @@ def log(self):
                    f'Waiting: {scheduler_stats.num_waiting_reqs} reqs, '
                    f'GPU KV cache usage: {scheduler_stats.gpu_cache_usage * 100 :.1f}%')
         print(log_msg, flush=True)
-        if spec_log_msg:
-            print(spec_log_msg, flush=True)
+        self.log_spec_msg()
 
 
 class PrometheusStatLogger(StatLoggerBase):
@@ -136,12 +135,12 @@ def __init__(self, model_name: str, max_model_len: int, dp_rank: int = 0):
 
         self.dp_rank = dp_rank
 
-        # Unregister any existing lmdeploy collectors
+        # unregister any existing lmdeploy collectors
         for collector in list(prometheus_client.REGISTRY._collector_to_names):
             if hasattr(collector, '_name') and 'lmdeploy' in collector._name:
                 prometheus_client.REGISTRY.unregister(collector)
 
-        # Config information
+        # config information
         self.info_backend_config = prometheus_client.Info(name='lmdeploy:backend_config',
                                                           documentation='information of backend_config')
 
@@ -319,13 +318,13 @@ def record_iteration(self, stats: IterationStats) -> None:
         if stats.itl:
             self.histogram_iter_token_latency.observe(stats.itl)
 
-    def record_finish(self, stats: FinishedRequestStats) -> None:
+    def record_finish(self, stats: RequestStats) -> None:
         self.counter_request_success[stats.finish_reason].inc()
         self.histogram_e2e_time_request.observe(stats.e2e_latency)
-        self.histogram_queue_time_request.observe(stats.queued_time)
-        self.histogram_prefill_time_request.observe(stats.prefill_time)
-        self.histogram_inference_time_request.observe(stats.inference_time)
-        self.histogram_decode_time_request.observe(stats.decode_time)
+        self.histogram_queue_time_request.observe(stats.queued_time_interval)
+        self.histogram_prefill_time_request.observe(stats.prefill_time_interval)
+        self.histogram_inference_time_request.observe(stats.inference_time_interval)
+        self.histogram_decode_time_request.observe(stats.decode_time_interval)
         self.histogram_num_prompt_tokens_request.observe(stats.prompt_tokens)
         self.histogram_num_generation_tokens_request.observe(stats.generation_tokens)
 
 
@@ -1,108 +1,37 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 import asyncio
-from contextlib import contextmanager
-from dataclasses import dataclass, field
 
 from lmdeploy.messages import ResponseType, ScheduleMetrics
+from lmdeploy.pytorch.utils import singleton
 from lmdeploy.utils import get_logger
 
 from .stats import SchedulerStats
 
 logger = get_logger('lmdeploy')
 
 
-@dataclass
-class MetricsContext:
-    enable_metrics: bool = False
-    scheduler_stats: SchedulerStats = field(default_factory=SchedulerStats)
-
-
-class MetricsManager:
-
-    def __init__(self):
-        """Initialize metrics manager."""
-        self._current_ctx = MetricsContext()
-
-    def set_context(self, ctx: MetricsContext):
-        """Set metrics context."""
-        self._current_ctx = ctx
-
-    def get_context(self):
-        """Get current context."""
-        return self._current_ctx
-
-    @contextmanager
-    def context(self, ctx: MetricsContext):
-        """Context manager."""
-        old_ctx = self.get_context()
-        self.set_context(ctx)
-        try:
-            yield ctx
-        finally:
-            self.set_context(old_ctx)
-
-
-_METRICS_MANAGER = None
-
-
-def get_metrics_manager():
-    global _METRICS_MANAGER
-    if _METRICS_MANAGER is None:
-        _METRICS_MANAGER = MetricsManager()
-
-    return _METRICS_MANAGER
-
-
-# Metrics getters
-def is_metrics_enabled():
-    return get_metrics_manager().get_context().enable_metrics
-
-
-def get_current_metrics_context():
-    return get_metrics_manager().get_context()
-
-
-def get_current_scheduler_stats():
-    return get_metrics_manager().get_context().scheduler_stats
-
-
-# Metrics setters
-def set_metrics_enabled_flag(enable_metrics: bool):
-    """Set metrics enabled flag."""
-    ctx = get_current_metrics_context()
-    ctx.enable_metrics = enable_metrics
-
-    if enable_metrics:
-        logger.info('Metrics are enabled.')
-
-
-def increment_async_engine_scheduler_stats_total_req():
-    """Set scheduler stats in async engine."""
-    get_current_scheduler_stats().num_total_reqs += 1
-
-
-def increment_async_engine_scheduler_stats_finished_req():
-    """Set scheduler stats in async engine."""
-    get_current_scheduler_stats().num_finished_reqs += 1
-
-
-# Metrics processor
+@singleton
 class MetricsProcessor():
     """Metrics processor."""
 
     def __init__(self):
+        """Init metrics processor."""
+        self.enable_metrics: bool = False
+        self.scheduler_stats = SchedulerStats()
+        self.stat_loggers = []
         self.metrics_queue: asyncio.Queue = None
         self.metrics_handler: asyncio.Task = None
 
     def start_metrics_handler(self, enable_metrics: bool):
-        set_metrics_enabled_flag(enable_metrics)
-
+        """Start metrics handler."""
+        self.enable_metrics = enable_metrics
         if enable_metrics and self.metrics_handler is None:
             self.metrics_queue = asyncio.Queue()
             self.metrics_handler = asyncio.create_task(self._run_metrics_handler())
             logger.info('Metrics handler task started.')
 
     async def stop_metrics_handler(self):
+        """Stop metrics handler."""
         if self.metrics_handler is not None:
             self.metrics_handler.cancel()
             try:
@@ -117,20 +46,20 @@ async def _run_metrics_handler(self):
         """A background task that consumes and processes metrics data."""
         while True:
             try:
-                # fetch
+                # fetch data from the queue
                 update_data = await self.metrics_queue.get()
-                outputs, req_state, iteration_stats, specdecode_stats = update_data
+                outputs, req_stats, iteration_stats, specdecode_stats = update_data
 
-                # update request state according the engine events
+                # update request stats
                 if outputs and outputs.req_metrics:
                     # when users visit "/abort_request" endpoint, `req_metrics` might be None
-                    req_state.update_from_events(outputs.req_metrics.engine_events)
+                    req_stats.update_from_events(outputs.req_metrics.engine_events)
 
-                # update iteration stats based on outputs and request state.
-                # some attributes of req_state will also be updated, e.g., lastest_token_time
-                iteration_stats.update_from_output(outputs, req_state)
+                # update iteration stats
+                # some attributes of req_stats will also be updated, e.g., lastest_token_time
+                iteration_stats.update_from_output(outputs, req_stats)
 
-                # spec decode
+                # update spec decode stats
                 if specdecode_stats is not None:
                     specdecode_stats.update_from_output(outputs)
 
@@ -140,34 +69,37 @@ async def _run_metrics_handler(self):
                     if specdecode_stats is not None:
                         stat_logger.record_specdecode(specdecode_stats)
 
+                # record finished request stats
                 if outputs.status == ResponseType.FINISH:
-                    # record finished request stats
                     for stat_logger in self.stat_loggers:
-                        stat_logger.record_finish(req_state.finish_stats)
+                        stat_logger.record_finish(req_stats)
 
                 self.metrics_queue.task_done()
             except asyncio.CancelledError:
                 break
             except Exception as e:
                 logger.exception(f'Metrics handler background task failed: {e}')
 
-    async def udpate_schedule_stats(self, schedule_metrics: ScheduleMetrics):
-        stats = get_current_scheduler_stats()
-        stats.update_from_schedule_metrics(schedule_metrics)
+    async def update_schedule_stats(self, schedule_metrics: ScheduleMetrics):
+        """Update schedule stats."""
+        self.scheduler_stats.update_from_schedule_metrics(schedule_metrics)
         # record schedule stats
         for stat_logger in self.stat_loggers:
-            stat_logger.record_schedule(stats)
+            stat_logger.record_schedule(self.scheduler_stats)
 
     def queue_update(self, update_data: tuple):
-        if not is_metrics_enabled() or self.metrics_queue is None:
+        """Queue update."""
+        if not self.enable_metrics or self.metrics_queue is None:
             return
         self.metrics_queue.put_nowait(update_data)
 
     def increment_total_requests(self):
-        increment_async_engine_scheduler_stats_total_req()
+        """Increment total requests."""
+        self.scheduler_stats.num_total_reqs += 1
 
     def increment_finished_requests(self):
-        increment_async_engine_scheduler_stats_finished_req()
+        """Increment finished requests."""
+        self.scheduler_stats.num_finished_reqs += 1
 
 
 metrics_processor = MetricsProcessor()