diff --git a/charts/hami/templates/_commons.tpl b/charts/hami/templates/_commons.tpl index 78a262c90..56a09abab 100644 --- a/charts/hami/templates/_commons.tpl +++ b/charts/hami/templates/_commons.tpl @@ -46,4 +46,17 @@ imagePullSecrets: - name: {{ . }} {{- end }} {{- end }} -{{- end -}} \ No newline at end of file +{{- end -}} + +{{/* +Renders a value that contains template. +Usage: +{{ include "common.tplvalues.render" ( dict "value" .Values.path.to.the.Value "context" $) }} +*/}} +{{- define "common.tplvalues.render" -}} + {{- if typeIs "string" .value }} + {{- tpl .value .context }} + {{- else }} + {{- tpl (.value | toYaml) .context }} + {{- end }} +{{- end -}} diff --git a/charts/hami/templates/device-plugin/servicemonitor.yaml b/charts/hami/templates/device-plugin/servicemonitor.yaml new file mode 100644 index 000000000..987c9028f --- /dev/null +++ b/charts/hami/templates/device-plugin/servicemonitor.yaml @@ -0,0 +1,24 @@ +{{- if .Values.devicePlugin.serviceMonitor.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ include "hami-vgpu.device-plugin" . }} + namespace: {{ include "hami-vgpu.namespace" . }} + labels: + {{- include "hami-vgpu.labels" . | nindent 4 }} + {{- if .Values.devicePlugin.serviceMonitor.labels }} + {{- include "common.tplvalues.render" ( dict "value" .Values.devicePlugin.serviceMonitor.labels "context" $ ) | nindent 4 }} + {{- end }} +spec: + endpoints: + - interval: {{ .Values.devicePlugin.serviceMonitor.interval }} + path: /metrics + port: monitorport + jobLabel: app + namespaceSelector: + matchNames: + - {{ include "hami-vgpu.namespace" . }} + selector: + matchLabels: + app.kubernetes.io/component: hami-device-plugin +{{- end }} diff --git a/charts/hami/templates/scheduler/servicemonitor.yaml b/charts/hami/templates/scheduler/servicemonitor.yaml new file mode 100644 index 000000000..a73c9c5a9 --- /dev/null +++ b/charts/hami/templates/scheduler/servicemonitor.yaml @@ -0,0 +1,24 @@ +{{- if .Values.scheduler.serviceMonitor.enabled }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ include "hami-vgpu.scheduler" . }} + namespace: {{ include "hami-vgpu.namespace" . }} + labels: + {{- include "hami-vgpu.labels" . | nindent 4 }} + {{- if .Values.scheduler.serviceMonitor.labels }} + {{- include "common.tplvalues.render" ( dict "value" .Values.scheduler.serviceMonitor.labels "context" $ ) | nindent 4 }} + {{- end }} +spec: + endpoints: + - interval: {{ .Values.scheduler.serviceMonitor.interval }} + path: /metrics + port: monitor + jobLabel: app + namespaceSelector: + matchNames: + - {{ include "hami-vgpu.namespace" . }} + selector: + matchLabels: + app.kubernetes.io/component: hami-scheduler +{{- end }} diff --git a/charts/hami/values.yaml b/charts/hami/values.yaml index bae81f026..955f765c3 100644 --- a/charts/hami/values.yaml +++ b/charts/hami/values.yaml @@ -232,6 +232,12 @@ scheduler: monitorTargetPort: 9395 labels: {} annotations: {} + # enable serviceMonitor resource for hami scheduler + serviceMonitor: + enabled: false + interval: "15s" + labels: + release: prometheus devicePlugin: ## @param image.registry devicePlugin image registry @@ -296,6 +302,12 @@ devicePlugin: httpPort: 31992 labels: {} annotations: {} + # enable serviceMonitor resource for device plugin + serviceMonitor: + enabled: false + interval: "15s" + labels: + release: prometheus pluginPath: /var/lib/kubelet/device-plugins libPath: /usr/local/vgpu