opti profiler default param

zzzzwwjj · zzzzwwjj · commit 3f75181ea022 · 2025-12-04T11:21:52.000+08:00
Signed-off-by: zzzzwwjj &lt;1183291235@qq.com&gt;
diff --git a/tests/ut/worker/test_worker_v1.py b/tests/ut/worker/test_worker_v1.py
@@ -520,7 +520,7 @@ def test_init_profiler_enabled(
         # Set enum mocks
         mock_export_type.Text = "Text"
         mock_profiler_level.Level1 = "Level1"
-        mock_aic_metrics.AiCoreNone = "AiCoreNone"
+        mock_aic_metrics.PipeUtilization = "PipeUtilization"
         mock_profiler_activity.CPU = "CPU"
         mock_profiler_activity.NPU = "NPU"
 
@@ -554,7 +554,7 @@ def test_init_profiler_enabled(
                 "export_type": "Text",
                 "profiler_level": "Level1",
                 "msprof_tx": False,
-                "aic_metrics": "AiCoreNone",
+                "aic_metrics": "PipeUtilization",
                 "l2_cache": False,
                 "op_attr": False,
                 "data_simplification": False,
diff --git a/vllm_ascend/envs.py b/vllm_ascend/envs.py
@@ -176,6 +176,20 @@
     # Whether to anbale dynamic EPLB
     "DYNAMIC_EPLB":
     lambda: os.getenv("DYNAMIC_EPLB", "false").lower(),
+    # Set torch_npu profiler to profile aicore metrics. There are the following options that can be configured:
+    # 0: torch_npu.profiler.AiCMetrics.AiCoreNone;
+    # 1: torch_npu.profiler.AiCMetrics.PipeUtilization;
+    # 2: torch_npu.profiler.AiCMetrics.ArithmeticUtilization;
+    # 3: torch_npu.profiler.AiCMetrics.Memory;
+    # 4: torch_npu.profiler.AiCMetrics.MemoryL0;
+    # 5: torch_npu.profiler.AiCMetrics.ResourceConflictRatio;
+    # 6: torch_npu.profiler.AiCMetrics.MemoryUB;
+    # 7: torch_npu.profiler.AiCMetrics.L2Cache;
+    # 8: torch_npu.profiler.AiCMetrics.MemoryAccess;
+    # If not set, it will be torch_npu.profiler.AiCMetrics.PipeUtilization by default.
+    # The meanings of various options can refer to: https://www.hiascend.com/document/detail/zh/Pytorch/720/apiref/torchnpuCustomsapi/context/torch_npu-profiler-AiCMetrics.md
+    "VLLM_ASCEND_PROFILER_AIC_METRICS":
+    lambda: int(os.getenv("VLLM_ASCEND_PROFILER_AIC_METRICS", 1)),
 }
 
 # end-env-vars-definition
diff --git a/vllm_ascend/worker/worker_v1.py b/vllm_ascend/worker/worker_v1.py
@@ -430,11 +430,24 @@ def _init_profiler(self):
             logger.info("Profiling enabled. Traces will be saved to: %s",
                         torch_profiler_trace_dir)
 
+            aic_metrics_list = [
+                torch_npu.profiler.AiCMetrics.AiCoreNone,
+                torch_npu.profiler.AiCMetrics.PipeUtilization,
+                torch_npu.profiler.AiCMetrics.ArithmeticUtilization,
+                torch_npu.profiler.AiCMetrics.Memory,
+                torch_npu.profiler.AiCMetrics.MemoryL0,
+                torch_npu.profiler.AiCMetrics.ResourceConflictRatio,
+                torch_npu.profiler.AiCMetrics.MemoryUB,
+                torch_npu.profiler.AiCMetrics.L2Cache,
+                torch_npu.profiler.AiCMetrics.MemoryAccess
+            ]
+
             experimental_config = torch_npu.profiler._ExperimentalConfig(
                 export_type=torch_npu.profiler.ExportType.Text,
                 profiler_level=torch_npu.profiler.ProfilerLevel.Level1,
                 msprof_tx=False,
-                aic_metrics=torch_npu.profiler.AiCMetrics.AiCoreNone,
+                aic_metrics=aic_metrics_list[
+                    envs_ascend.VLLM_ASCEND_PROFILER_AIC_METRICS],
                 l2_cache=False,
                 op_attr=False,
                 data_simplification=False,