Skip to content

Commit 12371fb

Browse files
committed
add new addonHealthCheck func to check all fileds and support wildcard
Signed-off-by: Zhiwei Yin <[email protected]>
1 parent d787e98 commit 12371fb

File tree

14 files changed

+604
-42
lines changed

14 files changed

+604
-42
lines changed

cmd/example/helloworld_helm/main.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,7 @@ func runController(ctx context.Context, kubeConfig *rest.Config) error {
132132
utils.AgentInstallNamespaceFromDeploymentConfigFunc(
133133
utils.NewAddOnDeploymentConfigGetter(addonClient),
134134
),
135-
).
135+
).WithAgentHealthProber(helloworld_helm.AgentHealthProber()).
136136
BuildHelmAgentAddon()
137137
if err != nil {
138138
klog.Errorf("failed to build agent %v", err)

examples/helloworld_helm/helloworld_helm.go

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ import (
44
"context"
55
"embed"
66
"fmt"
7+
"open-cluster-management.io/addon-framework/pkg/agent"
8+
workapiv1 "open-cluster-management.io/api/work/v1"
79
"os"
810

911
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -112,3 +114,42 @@ func GetImageValues(kubeClient kubernetes.Interface) addonfactory.GetValuesFunc
112114
return overrideValues, nil
113115
}
114116
}
117+
118+
func AgentHealthProber() *agent.HealthProber {
119+
return &agent.HealthProber{
120+
Type: agent.HealthProberTypeWork,
121+
WorkProber: &agent.WorkHealthProber{
122+
ProbeFields: []agent.ProbeField{
123+
{
124+
ResourceIdentifier: workapiv1.ResourceIdentifier{
125+
Group: "apps",
126+
Resource: "deployments",
127+
Name: "*",
128+
Namespace: "*",
129+
},
130+
ProbeRules: []workapiv1.FeedbackRule{
131+
{
132+
Type: workapiv1.WellKnownStatusType,
133+
},
134+
},
135+
},
136+
},
137+
HealthCheckAll: func(fields []agent.ResultField) error {
138+
for _, field := range fields {
139+
if len(field.FeedbackResult.Values) == 0 {
140+
return fmt.Errorf("no helloworldhelmhm agent")
141+
}
142+
switch field.ResourceIdentifier.Name {
143+
case "helloworldhelm-agent":
144+
for _, value := range field.FeedbackResult.Values {
145+
if value.Name == "AvailableReplicas" && *value.Value.Integer != 1 {
146+
return nil
147+
}
148+
}
149+
}
150+
}
151+
return fmt.Errorf("helloworldhelmhm agent is not ready")
152+
},
153+
},
154+
}
155+
}

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ require (
2222
k8s.io/component-base v0.30.2
2323
k8s.io/klog/v2 v2.120.1
2424
k8s.io/utils v0.0.0-20240310230437-4693a0247e57
25-
open-cluster-management.io/api v0.15.0
25+
open-cluster-management.io/api v0.15.1-0.20241120090202-cb7ce98ab874
2626
open-cluster-management.io/sdk-go v0.15.0
2727
sigs.k8s.io/controller-runtime v0.18.4
2828
)

go.sum

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -474,8 +474,8 @@ k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 h1:BZqlfIlq5YbRMFko6/PM7F
474474
k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340/go.mod h1:yD4MZYeKMBwQKVht279WycxKyM84kkAx2DPrTXaeb98=
475475
k8s.io/utils v0.0.0-20240310230437-4693a0247e57 h1:gbqbevonBh57eILzModw6mrkbwM0gQBEuevE/AaBsHY=
476476
k8s.io/utils v0.0.0-20240310230437-4693a0247e57/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
477-
open-cluster-management.io/api v0.15.0 h1:lRee1KOlGHZb2scTA7ff9E9Fxt2hJc7jpkHnaCbvkOU=
478-
open-cluster-management.io/api v0.15.0/go.mod h1:9erZEWEn4bEqh0nIX2wA7f/s3KCuFycQdBrPrRzi0QM=
477+
open-cluster-management.io/api v0.15.1-0.20241120090202-cb7ce98ab874 h1:WgkuYXTbJV7EK+qtiMq3soa21faGUKeTG5w0C8Mn1Ok=
478+
open-cluster-management.io/api v0.15.1-0.20241120090202-cb7ce98ab874/go.mod h1:9erZEWEn4bEqh0nIX2wA7f/s3KCuFycQdBrPrRzi0QM=
479479
open-cluster-management.io/sdk-go v0.15.0 h1:2IAJnPfUoY6rPC5w7LhqAnvIlgekPoVW03LdZO1unIM=
480480
open-cluster-management.io/sdk-go v0.15.0/go.mod h1:fi5WBsbC5K3txKb8eRLuP0Sim/Oqz/PHX18skAEyjiA=
481481
sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.29.0 h1:/U5vjBbQn3RChhv7P11uhYvCSm5G2GaIi5AIGBS6r4c=

pkg/addonmanager/controllers/agentdeploy/healthcheck_sync.go

Lines changed: 91 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package agentdeploy
33
import (
44
"context"
55
"fmt"
6+
"regexp"
67
"strings"
78

89
appsv1 "k8s.io/api/apps/v1"
@@ -167,17 +168,38 @@ func (s *healthCheckSyncer) probeAddonStatusByWorks(
167168
manifestConditions = append(manifestConditions, work.Status.ResourceStatus.Manifests...)
168169
}
169170

170-
probeFields, healthChecker, err := s.analyzeWorkProber(s.agentAddon, cluster, addon)
171+
probeFields, healthChecker, healthAllChecker, err := s.analyzeWorkProber(s.agentAddon, cluster, addon)
171172
if err != nil {
172173
// should not happen, return
173174
return err
174175
}
175176

177+
var resultFields []agent.ResultField
178+
176179
for _, field := range probeFields {
177-
result := findResultByIdentifier(field.ResourceIdentifier, manifestConditions)
180+
results := findResultsByIdentifier(field.ResourceIdentifier, manifestConditions)
181+
182+
// healthChecker will be ignored if healthAllChecker is set
183+
if healthAllChecker != nil {
184+
if len(results) != 0 {
185+
resultFields = append(resultFields, results...)
186+
}
187+
continue
188+
}
189+
190+
if healthChecker == nil {
191+
meta.SetStatusCondition(&addon.Status.Conditions, metav1.Condition{
192+
Type: addonapiv1alpha1.ManagedClusterAddOnConditionAvailable,
193+
Status: metav1.ConditionFalse,
194+
Reason: addonapiv1alpha1.AddonAvailableReasonProbeUnavailable,
195+
Message: fmt.Sprintf("health checker function is not set %v", err),
196+
})
197+
return nil
198+
}
199+
178200
// if no results are returned. it is possible that work agent has not returned the feedback value.
179201
// mark condition to unknown
180-
if result == nil {
202+
if len(results) == 0 {
181203
meta.SetStatusCondition(&addon.Status.Conditions, metav1.Condition{
182204
Type: addonapiv1alpha1.ManagedClusterAddOnConditionAvailable,
183205
Status: metav1.ConditionUnknown,
@@ -189,16 +211,29 @@ func (s *healthCheckSyncer) probeAddonStatusByWorks(
189211
return nil
190212
}
191213

192-
err := healthChecker(field.ResourceIdentifier, *result)
193-
if err != nil {
194-
meta.SetStatusCondition(&addon.Status.Conditions, metav1.Condition{
195-
Type: addonapiv1alpha1.ManagedClusterAddOnConditionAvailable,
196-
Status: metav1.ConditionFalse,
197-
Reason: addonapiv1alpha1.AddonAvailableReasonProbeUnavailable,
198-
Message: fmt.Sprintf("Probe addon unavailable with err %v", err),
199-
})
200-
return nil
214+
for _, result := range results {
215+
err := healthChecker(result.ResourceIdentifier, result.FeedbackResult)
216+
if err != nil {
217+
meta.SetStatusCondition(&addon.Status.Conditions, metav1.Condition{
218+
Type: addonapiv1alpha1.ManagedClusterAddOnConditionAvailable,
219+
Status: metav1.ConditionFalse,
220+
Reason: addonapiv1alpha1.AddonAvailableReasonProbeUnavailable,
221+
Message: fmt.Sprintf("Probe addon unavailable with err %v", err),
222+
})
223+
return nil
224+
}
201225
}
226+
227+
}
228+
229+
if healthAllChecker != nil && healthAllChecker(resultFields) != nil {
230+
meta.SetStatusCondition(&addon.Status.Conditions, metav1.Condition{
231+
Type: addonapiv1alpha1.ManagedClusterAddOnConditionAvailable,
232+
Status: metav1.ConditionFalse,
233+
Reason: addonapiv1alpha1.AddonAvailableReasonProbeUnavailable,
234+
Message: fmt.Sprintf("Probe addon unavailable with err %v", err),
235+
})
236+
return nil
202237
}
203238

204239
meta.SetStatusCondition(&addon.Status.Conditions, metav1.Condition{
@@ -214,21 +249,23 @@ func (s *healthCheckSyncer) analyzeWorkProber(
214249
agentAddon agent.AgentAddon,
215250
cluster *clusterv1.ManagedCluster,
216251
addon *addonapiv1alpha1.ManagedClusterAddOn,
217-
) ([]agent.ProbeField, agent.AddonHealthCheckFunc, error) {
252+
) ([]agent.ProbeField, agent.AddonHealthCheckFunc, agent.AddonHealthCheckAllFunc, error) {
218253

219254
switch agentAddon.GetAgentAddonOptions().HealthProber.Type {
220255
case agent.HealthProberTypeWork:
221256
workProber := agentAddon.GetAgentAddonOptions().HealthProber.WorkProber
222257
if workProber != nil {
223-
return workProber.ProbeFields, workProber.HealthCheck, nil
258+
return workProber.ProbeFields, workProber.HealthCheck, workProber.HealthCheckAll, nil
224259
}
225-
return nil, nil, fmt.Errorf("work prober is not configured")
260+
return nil, nil, nil, fmt.Errorf("work prober is not configured")
226261
case agent.HealthProberTypeDeploymentAvailability:
227-
return s.analyzeDeploymentWorkProber(agentAddon, cluster, addon)
262+
probeFields, heathChecker, err := s.analyzeDeploymentWorkProber(agentAddon, cluster, addon)
263+
return probeFields, heathChecker, nil, err
228264
case agent.HealthProberTypeWorkloadAvailability:
229-
return s.analyzeWorkloadsWorkProber(agentAddon, cluster, addon)
265+
probeFields, heathChecker, err := s.analyzeWorkloadsWorkProber(agentAddon, cluster, addon)
266+
return probeFields, heathChecker, nil, err
230267
default:
231-
return nil, nil, fmt.Errorf("unsupported health prober type %s", agentAddon.GetAgentAddonOptions().HealthProber.Type)
268+
return nil, nil, nil, fmt.Errorf("unsupported health prober type %s", agentAddon.GetAgentAddonOptions().HealthProber.Type)
232269
}
233270
}
234271

@@ -294,27 +331,46 @@ func (s *healthCheckSyncer) analyzeWorkloadsWorkProber(
294331
return probeFields, utils.WorkloadAvailabilityHealthCheck, nil
295332
}
296333

297-
func findResultByIdentifier(identifier workapiv1.ResourceIdentifier, manifestConditions []workapiv1.ManifestCondition) *workapiv1.StatusFeedbackResult {
334+
func findResultsByIdentifier(identifier workapiv1.ResourceIdentifier,
335+
manifestConditions []workapiv1.ManifestCondition) []agent.ResultField {
336+
var results []agent.ResultField
298337
for _, status := range manifestConditions {
299-
if identifier.Group != status.ResourceMeta.Group {
300-
continue
301-
}
302-
if identifier.Resource != status.ResourceMeta.Resource {
303-
continue
304-
}
305-
if identifier.Name != status.ResourceMeta.Name {
306-
continue
307-
}
308-
if identifier.Namespace != status.ResourceMeta.Namespace {
309-
continue
338+
if resourceMatch(status.ResourceMeta, identifier) && len(status.StatusFeedbacks.Values) != 0 {
339+
results = append(results, agent.ResultField{
340+
ResourceIdentifier: workapiv1.ResourceIdentifier{
341+
Group: status.ResourceMeta.Group,
342+
Resource: status.ResourceMeta.Resource,
343+
Name: status.ResourceMeta.Name,
344+
Namespace: status.ResourceMeta.Namespace,
345+
},
346+
FeedbackResult: status.StatusFeedbacks,
347+
})
310348
}
349+
}
311350

312-
if len(status.StatusFeedbacks.Values) == 0 {
313-
return nil
314-
}
351+
return results
352+
}
315353

316-
return &status.StatusFeedbacks
354+
// compare two string, target may include *
355+
func wildcardMatch(resource, target string) bool {
356+
if resource == target || target == "*" {
357+
return true
317358
}
318359

319-
return nil
360+
pattern := "^" + regexp.QuoteMeta(target) + "$"
361+
pattern = strings.ReplaceAll(pattern, "\\*", ".*")
362+
363+
re, err := regexp.Compile(pattern)
364+
if err != nil {
365+
return false
366+
}
367+
368+
return re.MatchString(resource)
369+
}
370+
371+
func resourceMatch(resourceMeta workapiv1.ManifestResourceMeta, resource workapiv1.ResourceIdentifier) bool {
372+
return resourceMeta.Group == resource.Group &&
373+
resourceMeta.Resource == resource.Resource &&
374+
wildcardMatch(resourceMeta.Namespace, resource.Namespace) &&
375+
wildcardMatch(resourceMeta.Name, resource.Name)
320376
}

0 commit comments

Comments
 (0)