From 3e36c10349ad8d18a845b5bc2187fd94ee417c1d Mon Sep 17 00:00:00 2001 From: Erik Sundell Date: Tue, 11 Nov 2025 11:29:38 +0100 Subject: [PATCH] Make non-running pods panels consistent and style them intuitively --- dashboards/cluster.jsonnet | 21 ++++++------- dashboards/common.libsonnet | 55 +++++++++++++++++++++++++++++++++++ dashboards/jupyterhub.jsonnet | 4 +-- 3 files changed, 67 insertions(+), 13 deletions(-) diff --git a/dashboards/cluster.jsonnet b/dashboards/cluster.jsonnet index d20eff1..88783f1 100755 --- a/dashboards/cluster.jsonnet +++ b/dashboards/cluster.jsonnet @@ -403,22 +403,23 @@ local nodeOOMKills = ]); local nonRunningPods = - common.barChartOptions - + barChart.new('Pods not in Running state') - + barChart.panelOptions.withDescription( + common.tsOptions + + common.tsPodStateStylingOverrides + + ts.new('Non Running Pods') + + ts.panelOptions.withDescription( ||| - Pods in states other than 'Running'. + Pods in a non-running state in the hub's namespace. - In a functional clusters, pods should not be in non-Running states for long. - |||, + Pods stuck in non-running states often indicate an error condition + ||| ) - + ts.fieldConfig.defaults.custom.stacking.withMode('normal') - + barChart.standardOptions.withDecimals(0) - + barChart.queryOptions.withTargets([ + + ts.queryOptions.withTargets([ prometheus.new( '$PROMETHEUS_DS', ||| - sum(kube_pod_status_phase{phase!="Running"}) by (phase) + sum( + kube_pod_status_phase{phase!="Running"} + ) by (phase) ||| ) + prometheus.withLegendFormat('{{phase}}'), diff --git a/dashboards/common.libsonnet b/dashboards/common.libsonnet index 14f60dc..dd29b98 100644 --- a/dashboards/common.libsonnet +++ b/dashboards/common.libsonnet @@ -93,6 +93,61 @@ local _getDashedLineOverride(pattern, color) = { ]) , + tsPodStateStylingOverrides: + ts.standardOptions.withOverrides([ + { + matcher: { id: 'byName', options: 'Pending' }, + properties: [{ + id: 'color', + value: { + fixedColor: 'yellow', + mode: 'fixed', + }, + }], + }, + { + matcher: { id: 'byName', options: 'Running' }, + properties: [{ + id: 'color', + value: { + fixedColor: 'blue', + mode: 'fixed', + }, + }], + }, + { + matcher: { id: 'byName', options: 'Succeeded' }, + properties: [{ + id: 'color', + value: { + fixedColor: 'green', + mode: 'fixed', + }, + }], + }, + { + matcher: { id: 'byName', options: 'Unknown' }, + properties: [{ + id: 'color', + value: { + fixedColor: 'orange', + mode: 'fixed', + }, + }], + }, + { + matcher: { id: 'byName', options: 'Failed' }, + properties: [{ + id: 'color', + value: { + fixedColor: 'red', + mode: 'fixed', + }, + }], + }, + ]) + , + // grafonnet ref: https://grafana.github.io/grafonnet/API/dashboard/variable.html variables: { prometheus: diff --git a/dashboards/jupyterhub.jsonnet b/dashboards/jupyterhub.jsonnet index 62574d1..c74b9a8 100755 --- a/dashboards/jupyterhub.jsonnet +++ b/dashboards/jupyterhub.jsonnet @@ -276,6 +276,7 @@ local usersPerNode = local nonRunningPods = common.tsOptions + + common.tsPodStateStylingOverrides + ts.new('Non Running Pods') + ts.panelOptions.withDescription( ||| @@ -284,8 +285,6 @@ local nonRunningPods = Pods stuck in non-running states often indicate an error condition ||| ) - // decimalsY1=0, - + ts.fieldConfig.defaults.custom.stacking.withMode('normal') + ts.queryOptions.withTargets([ prometheus.new( '$PROMETHEUS_DS', @@ -311,7 +310,6 @@ local sharedVolumeFreeSpace = what extra deployment is needed. ||| ) - // decimalsY1=0, + ts.standardOptions.withMax(1) + ts.standardOptions.withUnit('percentunit') + ts.queryOptions.withTargets([