From 92c8e899d51ef12ce6935f9731a4344ccaab2735 Mon Sep 17 00:00:00 2001 From: zhujian Date: Fri, 24 Oct 2025 17:03:16 +0800 Subject: [PATCH 1/2] =?UTF-8?q?=F0=9F=90=9B=20Fix=20health=20check=20to=20?= =?UTF-8?q?support=20partial=20probe=20results=20with=20multi-probe=20conf?= =?UTF-8?q?iguration?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When multiple health check probes are configured, if the first probe returns empty feedback results, the health check was incorrectly marking the addon as Unknown and returning early, preventing other probes from being evaluated. This fix changes the logic to: - Collect all empty probe fields instead of returning immediately - Only mark the addon as Unknown if ALL probes have no results - Allow partial probe results to be considered valid - Remove redundant check for non-empty results after healthChecker Fixes #341 Signed-off-by: Claude Signed-off-by: zhujian --- .../agentdeploy/healthcheck_sync.go | 40 +++++++---- .../agentdeploy/healthcheck_sync_test.go | 68 +++++++++++++++++++ 2 files changed, 95 insertions(+), 13 deletions(-) diff --git a/pkg/addonmanager/controllers/agentdeploy/healthcheck_sync.go b/pkg/addonmanager/controllers/agentdeploy/healthcheck_sync.go index 6f265e9cd..a0cce4113 100644 --- a/pkg/addonmanager/controllers/agentdeploy/healthcheck_sync.go +++ b/pkg/addonmanager/controllers/agentdeploy/healthcheck_sync.go @@ -188,28 +188,20 @@ func (s *healthCheckSyncer) probeAddonStatusByWorks( } var FieldResults []agent.FieldResult + var emptyProbeFields []workapiv1.ResourceIdentifier for _, field := range probeFields { results := findResultsByIdentifier(field.ResourceIdentifier, manifestConditions) // if no results are returned. it is possible that work agent has not returned the feedback value. - // mark condition to unknown + // collect these fields and check if all probes are empty later if len(results) == 0 { - meta.SetStatusCondition(&addon.Status.Conditions, metav1.Condition{ - Type: addonapiv1alpha1.ManagedClusterAddOnConditionAvailable, - Status: metav1.ConditionUnknown, - Reason: addonapiv1alpha1.AddonAvailableReasonNoProbeResult, - Message: fmt.Sprintf("Probe results are not returned for %s/%s: %s/%s", - field.ResourceIdentifier.Group, field.ResourceIdentifier.Resource, - field.ResourceIdentifier.Namespace, field.ResourceIdentifier.Name), - }) - return nil + emptyProbeFields = append(emptyProbeFields, field.ResourceIdentifier) + continue } // healthCheck will be ignored if healthChecker is set if healthChecker != nil { - if len(results) != 0 { - FieldResults = append(FieldResults, results...) - } + FieldResults = append(FieldResults, results...) continue } @@ -238,6 +230,28 @@ func (s *healthCheckSyncer) probeAddonStatusByWorks( } + // If all probe fields have no results, mark condition to unknown + if len(emptyProbeFields) == len(probeFields) && len(probeFields) > 0 { + var msg string + if len(emptyProbeFields) == 1 { + msg = fmt.Sprintf("Probe results are not returned for %s/%s: %s/%s", + emptyProbeFields[0].Group, emptyProbeFields[0].Resource, + emptyProbeFields[0].Namespace, emptyProbeFields[0].Name) + } else { + msg = fmt.Sprintf("Probe results are not returned for %d probe fields", len(emptyProbeFields)) + } + meta.SetStatusCondition(&addon.Status.Conditions, metav1.Condition{ + Type: addonapiv1alpha1.ManagedClusterAddOnConditionAvailable, + Status: metav1.ConditionUnknown, + Reason: addonapiv1alpha1.AddonAvailableReasonNoProbeResult, + Message: msg, + }) + return nil + } + + // If we have FieldResults but some probes are empty, still proceed with healthChecker + // This allows partial probe results to be considered valid + if healthChecker != nil { if err := healthChecker(FieldResults, cluster, addon); err != nil { meta.SetStatusCondition(&addon.Status.Conditions, metav1.Condition{ diff --git a/pkg/addonmanager/controllers/agentdeploy/healthcheck_sync_test.go b/pkg/addonmanager/controllers/agentdeploy/healthcheck_sync_test.go index 540871180..00a6f4b85 100644 --- a/pkg/addonmanager/controllers/agentdeploy/healthcheck_sync_test.go +++ b/pkg/addonmanager/controllers/agentdeploy/healthcheck_sync_test.go @@ -1259,6 +1259,74 @@ func TestHealthCheckReconcile(t *testing.T) { Message: "test add-on is available.", }, }, + { + name: "Health check mode is work with multi probes where first probe has empty results but second has valid results", + testAddon: &healthCheckTestAgent{name: "test", + health: newDeploymentsCheckAllProber( + types.NamespacedName{Name: "test-deployment-nonexistent", Namespace: "default"}, + types.NamespacedName{Name: "test-deployment1", Namespace: "default"}), + }, + addon: addontesting.NewAddonWithConditions("test", "cluster1", manifestAppliedCondition), + existingWork: []runtime.Object{ + &v1.ManifestWork{ + ObjectMeta: metav1.ObjectMeta{ + Name: "addon-test-deploy-01", + Namespace: "cluster1", + Labels: map[string]string{ + "open-cluster-management.io/addon-name": "test", + }, + }, + Spec: v1.ManifestWorkSpec{}, + Status: v1.ManifestWorkStatus{ + ResourceStatus: v1.ManifestResourceStatus{ + Manifests: []v1.ManifestCondition{ + { + ResourceMeta: v1.ManifestResourceMeta{ + Ordinal: 0, + Group: "apps", + Version: "", + Kind: "", + Resource: "deployments", + Name: "test-deployment1", + Namespace: "default", + }, + StatusFeedbacks: v1.StatusFeedbackResult{ + Values: []v1.FeedbackValue{ + { + Name: "Replicas", + Value: v1.FieldValue{ + Integer: boolPtr(1), + }, + }, + { + Name: "ReadyReplicas", + Value: v1.FieldValue{ + Integer: boolPtr(1), + }, + }, + }, + }, + }, + }, + }, + Conditions: []metav1.Condition{ + { + Type: v1.WorkAvailable, + Status: metav1.ConditionTrue, + }, + }, + }, + }, + }, + expectedErr: nil, + expectedHealthCheckMode: addonapiv1alpha1.HealthCheckModeCustomized, + expectAvailableCondition: metav1.Condition{ + Type: addonapiv1alpha1.ManagedClusterAddOnConditionAvailable, + Status: metav1.ConditionTrue, + Reason: addonapiv1alpha1.AddonAvailableReasonProbeAvailable, + Message: "test add-on is available.", + }, + }, } for _, c := range cases { From 3b953f5efb35525667a740dc670747ef4a29bb59 Mon Sep 17 00:00:00 2001 From: zhujian Date: Fri, 24 Oct 2025 18:13:22 +0800 Subject: [PATCH 2/2] Refactor health check probe result handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Simplify the probe result collection logic by: - Collecting all probe results upfront, regardless of healthChecker - Checking for empty results based on FieldResults length - Removing unnecessary tracking of empty probe fields - Updating test expectations for multi-probe scenarios This makes the code cleaner and maintains the fix for supporting partial probe results with multi-probe configurations. Signed-off-by: Jia Zhu 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Signed-off-by: zhujian --- .../agentdeploy/healthcheck_sync.go | 22 +++++-------------- test/integration/kube/agent_deploy_test.go | 4 ++-- 2 files changed, 8 insertions(+), 18 deletions(-) diff --git a/pkg/addonmanager/controllers/agentdeploy/healthcheck_sync.go b/pkg/addonmanager/controllers/agentdeploy/healthcheck_sync.go index a0cce4113..213cc9fb9 100644 --- a/pkg/addonmanager/controllers/agentdeploy/healthcheck_sync.go +++ b/pkg/addonmanager/controllers/agentdeploy/healthcheck_sync.go @@ -187,21 +187,19 @@ func (s *healthCheckSyncer) probeAddonStatusByWorks( return err } - var FieldResults []agent.FieldResult - var emptyProbeFields []workapiv1.ResourceIdentifier + var fieldResults []agent.FieldResult for _, field := range probeFields { results := findResultsByIdentifier(field.ResourceIdentifier, manifestConditions) // if no results are returned. it is possible that work agent has not returned the feedback value. // collect these fields and check if all probes are empty later if len(results) == 0 { - emptyProbeFields = append(emptyProbeFields, field.ResourceIdentifier) continue } + fieldResults = append(fieldResults, results...) // healthCheck will be ignored if healthChecker is set if healthChecker != nil { - FieldResults = append(FieldResults, results...) continue } @@ -231,29 +229,21 @@ func (s *healthCheckSyncer) probeAddonStatusByWorks( } // If all probe fields have no results, mark condition to unknown - if len(emptyProbeFields) == len(probeFields) && len(probeFields) > 0 { - var msg string - if len(emptyProbeFields) == 1 { - msg = fmt.Sprintf("Probe results are not returned for %s/%s: %s/%s", - emptyProbeFields[0].Group, emptyProbeFields[0].Resource, - emptyProbeFields[0].Namespace, emptyProbeFields[0].Name) - } else { - msg = fmt.Sprintf("Probe results are not returned for %d probe fields", len(emptyProbeFields)) - } + if len(probeFields) > 0 && len(fieldResults) == 0 { meta.SetStatusCondition(&addon.Status.Conditions, metav1.Condition{ Type: addonapiv1alpha1.ManagedClusterAddOnConditionAvailable, Status: metav1.ConditionUnknown, Reason: addonapiv1alpha1.AddonAvailableReasonNoProbeResult, - Message: msg, + Message: "Probe results are not returned", }) return nil } - // If we have FieldResults but some probes are empty, still proceed with healthChecker + // If we have fieldResults but some probes are empty, still proceed with healthChecker // This allows partial probe results to be considered valid if healthChecker != nil { - if err := healthChecker(FieldResults, cluster, addon); err != nil { + if err := healthChecker(fieldResults, cluster, addon); err != nil { meta.SetStatusCondition(&addon.Status.Conditions, metav1.Condition{ Type: addonapiv1alpha1.ManagedClusterAddOnConditionAvailable, Status: metav1.ConditionFalse, diff --git a/test/integration/kube/agent_deploy_test.go b/test/integration/kube/agent_deploy_test.go index 2bf2b8e06..c3c32057d 100644 --- a/test/integration/kube/agent_deploy_test.go +++ b/test/integration/kube/agent_deploy_test.go @@ -922,8 +922,8 @@ var _ = ginkgo.Describe("Agent deploy", func() { return err } - // the daemonset is not probed, so the addon available condition is unknown - if !meta.IsStatusConditionPresentAndEqual(addon.Status.Conditions, "Available", metav1.ConditionUnknown) { + // the daemonset is not probed, so the addon available condition is false + if !meta.IsStatusConditionPresentAndEqual(addon.Status.Conditions, "Available", metav1.ConditionFalse) { return fmt.Errorf("Unexpected addon available condition, %v", addon.Status.Conditions) } return nil