feat: system query_metrics table uses timestamps, and improve session metrics output (#241)

YoungVor · web-flow · commit fd7554918421 · 2025-10-06T15:41:28.000-07:00
## What Changed?

- start_ts and end_ts in fenic fenic_system.query_metrics are datetimes
with UTC timezone instead of strings (TIMESTAMPTZ in duckdb)
- Unit test for above
- optionally include LM and RM stats in session stop metrics printout.
e.g., if any LM calls, include request count and token usage
- improve formatting to show 6 decimal places if they matter, otherwise
fewer decimal places.

## Testing and outputs

### query_metrics table

```
(Pdb) session.table("fenic_system.query_metrics").select("start_ts", "end_ts").show()
┌────────────────────────────────┬────────────────────────────────┐
│ start_ts                       ┆ end_ts                         │
╞════════════════════════════════╪════════════════════════════════╡
│ 2025-10-01 10:30:47.978108 UTC ┆ 2025-10-01 10:30:47.979987 UTC │
│ 2025-10-01 10:30:47.985357 UTC ┆ 2025-10-01 10:30:47.985617 UTC │
│ 2025-10-01 10:31:22.398716 UTC ┆ 2025-10-01 10:31:22.402193 UTC │
...
```
```
(Pdb) pp session.table("fenic_system.query_metrics").select("start_ts", "end_ts").schema
Schema(column_fields=[ColumnField(name='start_ts',
                                  data_type=_TimestampType(timezone='UTC')),
                      ColumnField(name='end_ts',
                                  data_type=_TimestampType(timezone='UTC'))])
```

### Metrics after session with no LM:

Session Usage Summary:
  App Name: test_app
  Session ID: a2a2d3a7-6023-40b6-bbb1-ced868d19282
  Total queries executed: 4
  Total execution time: 3.64ms
  Total rows processed: 11
  Total language model cost: $0.00
  Total embedding model cost: $0.00
  Total cost: $0.00

### Metrics after session with LM costs:

Session Usage Summary:
  App Name: document_extraction
  Session ID: efba1fb1-3c90-45cc-8380-033fe90ca4e2
  Total queries executed: 3
  Total execution time: 3045.72ms
  Total rows processed: 15
  Total language model cost: $0.000682
  Total language model requests: 5
Total language model tokens: 2,960 input tokens, 0 cached input tokens,
398 output tokens
  Total embedding model cost: $0.00
  Total cost: $0.000682
diff --git a/src/fenic/_backends/local/session_state.py b/src/fenic/_backends/local/session_state.py
@@ -2,6 +2,7 @@
 
 import logging
 import uuid
+from decimal import ROUND_DOWN, Decimal
 from functools import cached_property
 from pathlib import Path
 from typing import Optional
@@ -122,12 +123,27 @@ def _print_session_usage_summary(self):
                 print(f"  Total queries executed: {costs['query_count']}")
                 print(f"  Total execution time: {costs['total_execution_time_ms']:.2f}ms")
                 print(f"  Total rows processed: {costs['total_output_rows']:,}")
-                print(f"  Total language model cost: ${costs['total_lm_cost']:.6f}")
-                print(f"  Total embedding model cost: ${costs['total_rm_cost']:.6f}")
+                print(f"  Total language model cost: ${_format_float(costs['total_lm_cost'])}")
+                if costs['total_lm_requests'] > 0:
+                    print(f"  Total language model requests: {costs['total_lm_requests']}")
+                    print(f"  Total language model tokens: {costs['total_lm_uncached_input_tokens']:,} input tokens, {costs['total_lm_cached_input_tokens']:,} cached input tokens, {costs['total_lm_output_tokens']:,} output tokens")
+                print(f"  Total embedding model cost: ${_format_float(costs['total_rm_cost'])}")
+                if costs['total_rm_requests'] > 0:
+                    print(f"  Total embedding model requests: {costs['total_rm_requests']}")
+                    print(f"  Total embedding model tokens: {costs['total_rm_input_tokens']:,} input tokens")
                 total_cost = costs['total_lm_cost'] + costs['total_rm_cost']
-                print(f"  Total cost: ${total_cost:.6f}")
+                print(f"  Total cost: ${_format_float(total_cost)}")
         except Exception as e:
             # Don't fail session stop if metrics summary fails
             logger.warning(f"Failed to print session usage summary: {e}")
 
+# Utility functions
 
+def _format_float(value: float) -> str:
+    """Format float up to 6 decimal places, but strip trailing zeros. Always keep at least 2 decimals."""
+    d = Decimal(value).quantize(Decimal("0.000001"), rounding=ROUND_DOWN)  # 6 decimals max
+    s = format(d.normalize(), "f")  # remove exponent notation
+    integer_part, _, decimal_part = s.partition(".")
+    # Remove trailing zeros from decimals, then ensure at least 2 digits
+    decimal_part = (decimal_part.rstrip("0") or "0").ljust(2, "0")
+    return f"{integer_part}.{decimal_part}"
diff --git a/src/fenic/_backends/local/system_table_client.py b/src/fenic/_backends/local/system_table_client.py
@@ -26,6 +26,7 @@
     DoubleType,
     IntegerType,
     StringType,
+    TimestampType,
 )
 
 # Constants for system schema and table names
@@ -630,6 +631,12 @@ def get_metrics_for_session(self, cursor: duckdb.DuckDBPyConnection, session_id:
                 f"""
                 SELECT
                     SUM(total_lm_cost) as total_lm_cost,
+                    SUM(total_lm_uncached_input_tokens) as total_lm_uncached_input_tokens,
+                    SUM(total_lm_cached_input_tokens) as total_lm_cached_input_tokens,
+                    SUM(total_lm_output_tokens) as total_lm_output_tokens,
+                    SUM(total_lm_requests) as total_lm_requests,
+                    SUM(total_rm_input_tokens) as total_rm_input_tokens,
+                    SUM(total_rm_requests) as total_rm_requests,
                     SUM(total_rm_cost) as total_rm_cost,
                     COUNT(*) as query_count,
                     SUM(execution_time_ms) as total_execution_time_ms,
@@ -643,6 +650,12 @@ def get_metrics_for_session(self, cursor: duckdb.DuckDBPyConnection, session_id:
             if result is None:
                 return {
                     "total_lm_cost": 0.0,
+                    "total_lm_uncached_input_tokens": 0,
+                    "total_lm_cached_input_tokens": 0,
+                    "total_lm_output_tokens": 0,
+                    "total_lm_requests": 0,
+                    "total_rm_input_tokens": 0,
+                    "total_rm_requests": 0,
                     "total_rm_cost": 0.0,
                     "query_count": 0,
                     "total_execution_time_ms": 0.0,
@@ -651,10 +664,16 @@ def get_metrics_for_session(self, cursor: duckdb.DuckDBPyConnection, session_id:
 
             return {
                 "total_lm_cost": result[0],
-                "total_rm_cost": result[1],
-                "query_count": result[2],
-                "total_execution_time_ms": result[3],
-                "total_output_rows": result[4],
+                "total_lm_uncached_input_tokens": result[1],
+                "total_lm_cached_input_tokens": result[2],
+                "total_lm_output_tokens": result[3],
+                "total_lm_requests": result[4],
+                "total_rm_input_tokens": result[5],
+                "total_rm_requests": result[6],
+                "total_rm_cost": result[7],
+                "query_count": result[8],
+                "total_execution_time_ms": result[9],
+                "total_output_rows": result[10],
             }
 
         except Exception as e:
@@ -713,8 +732,8 @@ def _initialize_read_only_system_schema_and_tables(self, cursor: duckdb.DuckDBPy
                     session_id TEXT NOT NULL,
                     execution_time_ms DOUBLE NOT NULL,
                     num_output_rows INTEGER NOT NULL,
-                    start_ts TIMESTAMP NOT NULL,
-                    end_ts TIMESTAMP NOT NULL,
+                    start_ts TIMESTAMPTZ NOT NULL,
+                    end_ts TIMESTAMPTZ NOT NULL,
                     total_lm_cost DOUBLE NOT NULL DEFAULT 0.0,
                     total_lm_uncached_input_tokens INTEGER NOT NULL DEFAULT 0,
                     total_lm_cached_input_tokens INTEGER NOT NULL DEFAULT 0,
@@ -733,8 +752,8 @@ def _initialize_read_only_system_schema_and_tables(self, cursor: duckdb.DuckDBPy
                 ColumnField(name="session_id", data_type=StringType),
                 ColumnField(name="execution_time_ms", data_type=DoubleType),
                 ColumnField(name="num_output_rows", data_type=IntegerType),
-                ColumnField(name="start_ts", data_type=StringType),  # Store as ISO timestamp string
-                ColumnField(name="end_ts", data_type=StringType),  # Store as ISO timestamp string
+                ColumnField(name="start_ts", data_type=TimestampType),
+                ColumnField(name="end_ts", data_type=TimestampType),
                 ColumnField(name="total_lm_cost", data_type=DoubleType),
                 ColumnField(name="total_lm_uncached_input_tokens", data_type=IntegerType),
                 ColumnField(name="total_lm_cached_input_tokens", data_type=IntegerType),
diff --git a/tests/_backends/local/catalog/test_metrics_table.py b/tests/_backends/local/catalog/test_metrics_table.py
@@ -1,4 +1,6 @@
 import os
+import zoneinfo
+from datetime import datetime
 
 import pytest
 
@@ -117,6 +119,15 @@ def test_metrics_table_contains_execution_data(local_session: Session, sample_df
     assert latest_metric["execution_id"][0] == execution_id
     assert latest_metric["start_ts"][0] is not None
     assert latest_metric["end_ts"][0] is not None
+    
+    # Check that start_ts and end_ts are datetime objects with UTC timezone
+    start_ts = latest_metric["start_ts"][0]
+    end_ts = latest_metric["end_ts"][0]
+    
+    assert isinstance(start_ts, datetime)
+    assert isinstance(end_ts, datetime)
+    assert start_ts.tzinfo == zoneinfo.ZoneInfo(key='UTC')
+    assert end_ts.tzinfo == zoneinfo.ZoneInfo(key='UTC')
 
 
 def test_multiple_sessions_different_metrics(tmp_path, local_session_config: SessionConfig):