Skip to content

Commit c7ff350

Browse files
feat: update health probe to make use of healthChecker
healthChecker was introduced in open-cluster-management-io/addon-framework#289 and it allow us to define a list of ProbeFields as before but it also allows us to define HealthChecker. HealthChecker has the ability to go through all the resultFields and decide which resources are mandatory and should trigger unheathly probes and which resources are optional
1 parent 53e1907 commit c7ff350

File tree

3 files changed

+46
-31
lines changed

3 files changed

+46
-31
lines changed

go.mod

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,13 @@ require (
1717
k8s.io/client-go v0.30.3
1818
k8s.io/component-base v0.30.3
1919
k8s.io/klog/v2 v2.130.1
20-
open-cluster-management.io/addon-framework v0.11.0
21-
open-cluster-management.io/api v0.15.0
20+
open-cluster-management.io/addon-framework v0.0.0-00010101000000-000000000000
21+
open-cluster-management.io/api v0.15.1-0.20241120090202-cb7ce98ab874
2222
sigs.k8s.io/controller-runtime v0.18.5
2323
)
2424

25+
replace open-cluster-management.io/addon-framework => github.com/zhiweiyin318/addon-framework v0.0.0-20241125074036-754aecccaf2b
26+
2527
replace github.com/prometheus/common => github.com/prometheus/common v0.60.1
2628

2729
replace github.com/prometheus/client_golang => github.com/prometheus/client_golang v1.20.5

go.sum

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,8 @@ github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q
219219
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
220220
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
221221
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
222+
github.com/zhiweiyin318/addon-framework v0.0.0-20241125074036-754aecccaf2b h1:xBnQWUqASEhZfyUZ8Bsl7fBe78ZF+Y2od524n6dZfCA=
223+
github.com/zhiweiyin318/addon-framework v0.0.0-20241125074036-754aecccaf2b/go.mod h1:tsBSNs9mGfVQQjXBnjgpiX6r0UM+G3iNfmzQgKhEfw4=
222224
go.etcd.io/bbolt v1.3.8 h1:xs88BrvEv273UsB79e0hcVrlUWmS0a8upikMFhSyAtA=
223225
go.etcd.io/bbolt v1.3.8/go.mod h1:N9Mkw9X8x5fupy0IKsmuqVtoGDyxsaDlbk4Rd05IAQw=
224226
go.etcd.io/etcd/api/v3 v3.5.12 h1:W4sw5ZoU2Juc9gBWuLk5U6fHfNVyY1WC5g9uiXZio/c=
@@ -378,10 +380,8 @@ k8s.io/kube-openapi v0.0.0-20240620174524-b456828f718b h1:Q9xmGWBvOGd8UJyccgpYlL
378380
k8s.io/kube-openapi v0.0.0-20240620174524-b456828f718b/go.mod h1:UxDHUPsUwTOOxSU+oXURfFBcAS6JwiRXTYqYwfuGowc=
379381
k8s.io/utils v0.0.0-20240502163921-fe8a2dddb1d0 h1:jgGTlFYnhF1PM1Ax/lAlxUPE+KfCIXHaathvJg1C3ak=
380382
k8s.io/utils v0.0.0-20240502163921-fe8a2dddb1d0/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
381-
open-cluster-management.io/addon-framework v0.11.0 h1:ZJxphgHQ36VUJF0RIag+nzcEn5PNyep2rsEPdz6wT7o=
382-
open-cluster-management.io/addon-framework v0.11.0/go.mod h1:ruMU8i/dciz3qCv2CQ46Cu1b7rkK7TpvB+W4bRwHf+I=
383-
open-cluster-management.io/api v0.15.0 h1:lRee1KOlGHZb2scTA7ff9E9Fxt2hJc7jpkHnaCbvkOU=
384-
open-cluster-management.io/api v0.15.0/go.mod h1:9erZEWEn4bEqh0nIX2wA7f/s3KCuFycQdBrPrRzi0QM=
383+
open-cluster-management.io/api v0.15.1-0.20241120090202-cb7ce98ab874 h1:WgkuYXTbJV7EK+qtiMq3soa21faGUKeTG5w0C8Mn1Ok=
384+
open-cluster-management.io/api v0.15.1-0.20241120090202-cb7ce98ab874/go.mod h1:9erZEWEn4bEqh0nIX2wA7f/s3KCuFycQdBrPrRzi0QM=
385385
open-cluster-management.io/sdk-go v0.15.0 h1:2IAJnPfUoY6rPC5w7LhqAnvIlgekPoVW03LdZO1unIM=
386386
open-cluster-management.io/sdk-go v0.15.0/go.mod h1:fi5WBsbC5K3txKb8eRLuP0Sim/Oqz/PHX18skAEyjiA=
387387
sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.29.0 h1:/U5vjBbQn3RChhv7P11uhYvCSm5G2GaIi5AIGBS6r4c=

internal/addon/addon.go

Lines changed: 38 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,11 @@ import (
1414
)
1515

1616
var (
17+
errMissingFields = errors.New("no fields found in health checker")
1718
errProbeConditionNotSatisfied = errors.New("probe condition is not satisfied")
1819
errProbeValueIsNil = errors.New("probe value is nil")
19-
errValueNotProbed = errors.New("value not probed")
20+
errUnknownProbeKey = errors.New("probe has key that doesn't match the key defined")
21+
errUnknownResource = errors.New("undefined health check for resource")
2022
)
2123

2224
func NewRegistrationOption(agentName string) *agent.RegistrationOption {
@@ -90,42 +92,53 @@ func AgentHealthProber() *agent.HealthProber {
9092
},
9193
},
9294
},
93-
HealthCheck: func(identifier workv1.ResourceIdentifier, result workv1.StatusFeedbackResult) error {
94-
for _, value := range result.Values {
95+
HealthChecker: func(fields []agent.ResultField) error {
96+
if len(fields) == 0 {
97+
return errMissingFields
98+
}
99+
for _, field := range fields {
100+
if len(field.FeedbackResult.Values) == 0 {
101+
// If a probe didn't get any values maybe the resources was not deployed
102+
continue
103+
}
104+
identifier := field.ResourceIdentifier
95105
switch {
96106
case identifier.Resource == ClusterLogForwardersResource:
97-
if value.Name != clfProbeKey {
98-
continue
99-
}
107+
for _, value := range field.FeedbackResult.Values {
108+
if value.Name != clfProbeKey {
109+
return fmt.Errorf("%w: %s with key %s/%s unknown probe keys %s", errUnknownProbeKey, identifier.Resource, identifier.Namespace, identifier.Name, value.Name)
110+
}
100111

101-
if value.Value.String == nil {
102-
return fmt.Errorf("%w: clusterlogforwarder with key %s/%s", errProbeValueIsNil, identifier.Namespace, identifier.Name)
103-
}
112+
if value.Value.String == nil {
113+
return fmt.Errorf("%w: %s with key %s/%s", errProbeValueIsNil, identifier.Resource, identifier.Namespace, identifier.Name)
114+
}
104115

105-
if *value.Value.String != "True" {
106-
return fmt.Errorf("%w: clusterlogforwarder status condition type is %s for %s/%s", errProbeConditionNotSatisfied, *value.Value.String, identifier.Namespace, identifier.Name)
116+
if *value.Value.String != "True" {
117+
return fmt.Errorf("%w: %s status condition type is %s for %s/%s", errProbeConditionNotSatisfied, identifier.Resource, *value.Value.String, identifier.Namespace, identifier.Name)
118+
}
119+
// everything checks we should skip to the next field
107120
}
108-
109-
return nil
110121
case identifier.Resource == OpenTelemetryCollectorsResource:
111-
if value.Name != otelColProbeKey {
112-
continue
113-
}
122+
for _, value := range field.FeedbackResult.Values {
123+
if value.Name != otelColProbeKey {
124+
return fmt.Errorf("%w: %s with key %s/%s unknown probe keys %s", errUnknownProbeKey, identifier.Resource, identifier.Namespace, identifier.Name, value.Name)
125+
}
114126

115-
if value.Value.Integer == nil {
116-
return fmt.Errorf("%w: opentelemetrycollector with key %s/%s", errProbeValueIsNil, identifier.Namespace, identifier.Name)
117-
}
127+
if value.Value.Integer == nil {
128+
return fmt.Errorf("%w: %s with key %s/%s", errProbeValueIsNil, identifier.Resource, identifier.Namespace, identifier.Name)
129+
}
118130

119-
if *value.Value.Integer < 1 {
120-
return fmt.Errorf("%w: opentelemetrycollector replicas is %d for %s/%s", errProbeConditionNotSatisfied, *value.Value.Integer, identifier.Namespace, identifier.Name)
131+
if *value.Value.Integer < 1 {
132+
return fmt.Errorf("%w: %s replicas is %d for %s/%s", errProbeConditionNotSatisfied, identifier.Resource, *value.Value.Integer, identifier.Namespace, identifier.Name)
133+
}
134+
// everything checks we should skip to the next field
121135
}
122-
123-
return nil
124136
default:
125-
continue
137+
// If a resource is being probed it should have a health check defined
138+
return fmt.Errorf("%w: %s with key %s/%s", errUnknownResource, identifier.Resource, identifier.Namespace, identifier.Name)
126139
}
127140
}
128-
return fmt.Errorf("%w: for resource %s with key %s/%s", errValueNotProbed, identifier.Resource, identifier.Namespace, identifier.Name)
141+
return nil
129142
},
130143
},
131144
}

0 commit comments

Comments
 (0)