From 6a19f46ae91af9377bc871fa9d08284027a38eee Mon Sep 17 00:00:00 2001 From: Profiler Team Date: Thu, 13 Nov 2025 11:22:13 -0800 Subject: [PATCH] Feat: Add cost analysis of Pallas kernels using LLO tracing This change introduces the capability to perform cost analysis of Pallas kernels within XProf. Pallas kernels are represented as "custom-call" HLOs, and this change enables XProf to estimate their performance characteristics (flops, IOPS, and DMA bandwidth) by analyzing Low-Level Optimized (LLO) instruction traces. PiperOrigin-RevId: 831937474 --- xprof/utils/op_metrics_db_utils.cc | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/xprof/utils/op_metrics_db_utils.cc b/xprof/utils/op_metrics_db_utils.cc index f11906af..9954aea1 100644 --- a/xprof/utils/op_metrics_db_utils.cc +++ b/xprof/utils/op_metrics_db_utils.cc @@ -245,6 +245,25 @@ void SetOpMetricsFromHloEvent(const tsl::profiler::XEventVisitor& hlo_event, normalized_duration_ps); op_metrics->set_dma_stall_ps(op_metrics->dma_stall_ps() + dma_stall_ps); } + // Fill The Custom Call Information + if (op_metrics->category() == "custom-call") { + hlo_event.ForEachStat([&](const XStatVisitor& stat) { + if (!stat.Type()) return; + switch (static_cast(*stat.Type())) { + case StatType::kBytesAccessed: + op_metrics->set_bytes_accessed(stat.IntOrUintValue()); + break; + case StatType::kModelFlops: + op_metrics->set_model_flops(stat.IntOrUintValue()); + break; + case StatType::kFlops: + op_metrics->set_flops(stat.IntOrUintValue()); + break; + default: + break; + } + }); + } } void MergeOpMetrics(const OpMetrics& src, OpMetrics& dst) {