Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 11 additions & 10 deletions dashboards/cluster.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -403,22 +403,23 @@ local nodeOOMKills =
]);

local nonRunningPods =
common.barChartOptions
+ barChart.new('Pods not in Running state')
+ barChart.panelOptions.withDescription(
common.tsOptions
+ common.tsPodStateStylingOverrides
+ ts.new('Non Running Pods')
+ ts.panelOptions.withDescription(
|||
Pods in states other than 'Running'.
Pods in a non-running state in the hub's namespace.

In a functional clusters, pods should not be in non-Running states for long.
|||,
Pods stuck in non-running states often indicate an error condition
|||
)
+ ts.fieldConfig.defaults.custom.stacking.withMode('normal')
+ barChart.standardOptions.withDecimals(0)
+ barChart.queryOptions.withTargets([
+ ts.queryOptions.withTargets([
prometheus.new(
'$PROMETHEUS_DS',
|||
sum(kube_pod_status_phase{phase!="Running"}) by (phase)
sum(
kube_pod_status_phase{phase!="Running"}
) by (phase)
|||
)
+ prometheus.withLegendFormat('{{phase}}'),
Expand Down
55 changes: 55 additions & 0 deletions dashboards/common.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,61 @@ local _getDashedLineOverride(pattern, color) = {
])
,

tsPodStateStylingOverrides:
ts.standardOptions.withOverrides([
{
matcher: { id: 'byName', options: 'Pending' },
properties: [{
id: 'color',
value: {
fixedColor: 'yellow',
mode: 'fixed',
},
}],
},
{
matcher: { id: 'byName', options: 'Running' },
properties: [{
id: 'color',
value: {
fixedColor: 'blue',
mode: 'fixed',
},
}],
},
{
matcher: { id: 'byName', options: 'Succeeded' },
properties: [{
id: 'color',
value: {
fixedColor: 'green',
mode: 'fixed',
},
}],
},
{
matcher: { id: 'byName', options: 'Unknown' },
properties: [{
id: 'color',
value: {
fixedColor: 'orange',
mode: 'fixed',
},
}],
},
{
matcher: { id: 'byName', options: 'Failed' },
properties: [{
id: 'color',
value: {
fixedColor: 'red',
mode: 'fixed',
},
}],
},
])
,

// grafonnet ref: https://grafana.github.io/grafonnet/API/dashboard/variable.html
variables: {
prometheus:
Expand Down
4 changes: 1 addition & 3 deletions dashboards/jupyterhub.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,7 @@ local usersPerNode =

local nonRunningPods =
common.tsOptions
+ common.tsPodStateStylingOverrides
+ ts.new('Non Running Pods')
+ ts.panelOptions.withDescription(
|||
Expand All @@ -284,8 +285,6 @@ local nonRunningPods =
Pods stuck in non-running states often indicate an error condition
|||
)
// decimalsY1=0,
+ ts.fieldConfig.defaults.custom.stacking.withMode('normal')
+ ts.queryOptions.withTargets([
prometheus.new(
'$PROMETHEUS_DS',
Expand All @@ -311,7 +310,6 @@ local sharedVolumeFreeSpace =
what extra deployment is needed.
|||
)
// decimalsY1=0,
+ ts.standardOptions.withMax(1)
+ ts.standardOptions.withUnit('percentunit')
+ ts.queryOptions.withTargets([
Expand Down