Skip to content

Commit 92c8e89

Browse files
zhujian7openshift-cherrypick-robot
authored andcommitted
šŸ› Fix health check to support partial probe results with multi-probe configuration
When multiple health check probes are configured, if the first probe returns empty feedback results, the health check was incorrectly marking the addon as Unknown and returning early, preventing other probes from being evaluated. This fix changes the logic to: - Collect all empty probe fields instead of returning immediately - Only mark the addon as Unknown if ALL probes have no results - Allow partial probe results to be considered valid - Remove redundant check for non-empty results after healthChecker Fixes #341 Signed-off-by: Claude <[email protected]> Signed-off-by: zhujian <[email protected]>
1 parent c2f5c52 commit 92c8e89

File tree

2 files changed

+95
-13
lines changed

2 files changed

+95
-13
lines changed

ā€Žpkg/addonmanager/controllers/agentdeploy/healthcheck_sync.goā€Ž

Lines changed: 27 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -188,28 +188,20 @@ func (s *healthCheckSyncer) probeAddonStatusByWorks(
188188
}
189189

190190
var FieldResults []agent.FieldResult
191+
var emptyProbeFields []workapiv1.ResourceIdentifier
191192

192193
for _, field := range probeFields {
193194
results := findResultsByIdentifier(field.ResourceIdentifier, manifestConditions)
194195
// if no results are returned. it is possible that work agent has not returned the feedback value.
195-
// mark condition to unknown
196+
// collect these fields and check if all probes are empty later
196197
if len(results) == 0 {
197-
meta.SetStatusCondition(&addon.Status.Conditions, metav1.Condition{
198-
Type: addonapiv1alpha1.ManagedClusterAddOnConditionAvailable,
199-
Status: metav1.ConditionUnknown,
200-
Reason: addonapiv1alpha1.AddonAvailableReasonNoProbeResult,
201-
Message: fmt.Sprintf("Probe results are not returned for %s/%s: %s/%s",
202-
field.ResourceIdentifier.Group, field.ResourceIdentifier.Resource,
203-
field.ResourceIdentifier.Namespace, field.ResourceIdentifier.Name),
204-
})
205-
return nil
198+
emptyProbeFields = append(emptyProbeFields, field.ResourceIdentifier)
199+
continue
206200
}
207201

208202
// healthCheck will be ignored if healthChecker is set
209203
if healthChecker != nil {
210-
if len(results) != 0 {
211-
FieldResults = append(FieldResults, results...)
212-
}
204+
FieldResults = append(FieldResults, results...)
213205
continue
214206
}
215207

@@ -238,6 +230,28 @@ func (s *healthCheckSyncer) probeAddonStatusByWorks(
238230

239231
}
240232

233+
// If all probe fields have no results, mark condition to unknown
234+
if len(emptyProbeFields) == len(probeFields) && len(probeFields) > 0 {
235+
var msg string
236+
if len(emptyProbeFields) == 1 {
237+
msg = fmt.Sprintf("Probe results are not returned for %s/%s: %s/%s",
238+
emptyProbeFields[0].Group, emptyProbeFields[0].Resource,
239+
emptyProbeFields[0].Namespace, emptyProbeFields[0].Name)
240+
} else {
241+
msg = fmt.Sprintf("Probe results are not returned for %d probe fields", len(emptyProbeFields))
242+
}
243+
meta.SetStatusCondition(&addon.Status.Conditions, metav1.Condition{
244+
Type: addonapiv1alpha1.ManagedClusterAddOnConditionAvailable,
245+
Status: metav1.ConditionUnknown,
246+
Reason: addonapiv1alpha1.AddonAvailableReasonNoProbeResult,
247+
Message: msg,
248+
})
249+
return nil
250+
}
251+
252+
// If we have FieldResults but some probes are empty, still proceed with healthChecker
253+
// This allows partial probe results to be considered valid
254+
241255
if healthChecker != nil {
242256
if err := healthChecker(FieldResults, cluster, addon); err != nil {
243257
meta.SetStatusCondition(&addon.Status.Conditions, metav1.Condition{

ā€Žpkg/addonmanager/controllers/agentdeploy/healthcheck_sync_test.goā€Ž

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1259,6 +1259,74 @@ func TestHealthCheckReconcile(t *testing.T) {
12591259
Message: "test add-on is available.",
12601260
},
12611261
},
1262+
{
1263+
name: "Health check mode is work with multi probes where first probe has empty results but second has valid results",
1264+
testAddon: &healthCheckTestAgent{name: "test",
1265+
health: newDeploymentsCheckAllProber(
1266+
types.NamespacedName{Name: "test-deployment-nonexistent", Namespace: "default"},
1267+
types.NamespacedName{Name: "test-deployment1", Namespace: "default"}),
1268+
},
1269+
addon: addontesting.NewAddonWithConditions("test", "cluster1", manifestAppliedCondition),
1270+
existingWork: []runtime.Object{
1271+
&v1.ManifestWork{
1272+
ObjectMeta: metav1.ObjectMeta{
1273+
Name: "addon-test-deploy-01",
1274+
Namespace: "cluster1",
1275+
Labels: map[string]string{
1276+
"open-cluster-management.io/addon-name": "test",
1277+
},
1278+
},
1279+
Spec: v1.ManifestWorkSpec{},
1280+
Status: v1.ManifestWorkStatus{
1281+
ResourceStatus: v1.ManifestResourceStatus{
1282+
Manifests: []v1.ManifestCondition{
1283+
{
1284+
ResourceMeta: v1.ManifestResourceMeta{
1285+
Ordinal: 0,
1286+
Group: "apps",
1287+
Version: "",
1288+
Kind: "",
1289+
Resource: "deployments",
1290+
Name: "test-deployment1",
1291+
Namespace: "default",
1292+
},
1293+
StatusFeedbacks: v1.StatusFeedbackResult{
1294+
Values: []v1.FeedbackValue{
1295+
{
1296+
Name: "Replicas",
1297+
Value: v1.FieldValue{
1298+
Integer: boolPtr(1),
1299+
},
1300+
},
1301+
{
1302+
Name: "ReadyReplicas",
1303+
Value: v1.FieldValue{
1304+
Integer: boolPtr(1),
1305+
},
1306+
},
1307+
},
1308+
},
1309+
},
1310+
},
1311+
},
1312+
Conditions: []metav1.Condition{
1313+
{
1314+
Type: v1.WorkAvailable,
1315+
Status: metav1.ConditionTrue,
1316+
},
1317+
},
1318+
},
1319+
},
1320+
},
1321+
expectedErr: nil,
1322+
expectedHealthCheckMode: addonapiv1alpha1.HealthCheckModeCustomized,
1323+
expectAvailableCondition: metav1.Condition{
1324+
Type: addonapiv1alpha1.ManagedClusterAddOnConditionAvailable,
1325+
Status: metav1.ConditionTrue,
1326+
Reason: addonapiv1alpha1.AddonAvailableReasonProbeAvailable,
1327+
Message: "test add-on is available.",
1328+
},
1329+
},
12621330
}
12631331

12641332
for _, c := range cases {

0 commit comments

Comments
Ā (0)