diff --git a/e2e/e2e_common.go b/e2e/e2e_common.go index ec1923fbb..4482c876c 100644 --- a/e2e/e2e_common.go +++ b/e2e/e2e_common.go @@ -17,17 +17,19 @@ package e2e import ( "context" "fmt" + "reflect" "strings" "time" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" utilruntime "k8s.io/apimachinery/pkg/util/runtime" "k8s.io/client-go/kubernetes/scheme" - "k8s.io/utils/ptr" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/config" + "sigs.k8s.io/yaml" bmov1alpha1 "github.com/metal3-io/baremetal-operator/apis/metal3.io/v1alpha1" metal3v1 "github.com/metal3-io/cluster-api-provider-metal3/api/v1beta1" @@ -42,10 +44,9 @@ import ( ctrl "sigs.k8s.io/controller-runtime" logf "sigs.k8s.io/controller-runtime/pkg/log" - capiframework "github.com/openshift/cluster-capi-operator/e2e/framework" - configv1 "github.com/openshift/api/config/v1" mapiv1beta1 "github.com/openshift/api/machine/v1beta1" + capiframework "github.com/openshift/cluster-capi-operator/e2e/framework" "sigs.k8s.io/controller-runtime/pkg/envtest/komega" ) @@ -59,6 +60,12 @@ var ( ctx = context.Background() platform configv1.PlatformType clusterName string + + // resourcesUnderTest tracks objects created by the current test for focused + // diagnostics on failure. Helpers call trackResource after creating objects; + // ReportAfterEach dumps detailed state for each tracked resource then clears + // the list. + resourcesUnderTest []client.Object ) func init() { @@ -99,148 +106,140 @@ func InitCommonVariables() { komega.SetContext(ctx) } -// dumpClusterState logs Machines, MachineSets, and Events from both MAPI and CAPI -// namespaces. Called on test failure to capture resource state before cleanup removes them. -func dumpClusterState() { - namespaces := []string{capiframework.MAPINamespace, capiframework.CAPINamespace} +// trackResource registers a resource for focused diagnostics on test failure. +// The object must have Name and Namespace set. The object's type is used to +// determine how to fetch and format it in the failure dump. +func trackResource(obj client.Object) { + resourcesUnderTest = append(resourcesUnderTest, obj) +} + +// dumpTrackedResources writes detailed diagnostics for each tracked resource +// to GinkgoWriter. For each resource it fetches current state, marshals it as +// YAML, and lists events specific to that object. It also dumps all +// AWSMachineTemplates (on AWS) and all events in both namespaces. +// Best-effort: panics are recovered and individual errors are logged without +// aborting the dump. +func dumpTrackedResources() { + defer func() { + if r := recover(); r != nil { + GinkgoWriter.Printf("WARNING: dumpTrackedResources panicked: %v\n", r) + } + }() var buf strings.Builder - buf.WriteString("\n=== Cluster State Dump (test failure) ===\n") - - for _, ns := range namespaces { - dumpMAPIMachines(&buf, ns) - dumpCAPIMachines(&buf, ns) - dumpMAPIMachineSets(&buf, ns) - dumpCAPIMachineSets(&buf, ns) - dumpEvents(&buf, ns) + buf.WriteString("\n=== Test Failure Diagnostics ===\n") + + for _, obj := range resourcesUnderTest { + dumpSingleResource(&buf, obj) } if platform == configv1.AWSPlatformType { - for _, ns := range namespaces { - dumpAWSMachines(&buf, ns) - dumpAWSMachineTemplates(&buf, ns) - } + dumpAllAWSMachineTemplates(&buf) } + dumpNamespaceEvents(&buf, capiframework.CAPINamespace) + dumpNamespaceEvents(&buf, capiframework.MAPINamespace) - buf.WriteString("=== End Cluster State Dump ===\n") + buf.WriteString("\n=== End Test Failure Diagnostics ===\n") GinkgoWriter.Print(buf.String()) - AddReportEntry("cluster-state-dump", buf.String()) } -func dumpMAPIMachines(buf *strings.Builder, namespace string) { - list := &mapiv1beta1.MachineList{} - if err := cl.List(ctx, list, client.InNamespace(namespace)); err != nil { - fmt.Fprintf(buf, "\n[%s] MAPI Machines: error listing: %v\n", namespace, err) - return - } - - if len(list.Items) == 0 { - return - } +func dumpSingleResource(buf *strings.Builder, obj client.Object) { + defer func() { + if r := recover(); r != nil { + fmt.Fprintf(buf, "\n--- (panic dumping %T): %v ---\n", obj, r) + } + }() - fmt.Fprintf(buf, "\n[%s] MAPI Machines (%d):\n", namespace, len(list.Items)) + key := client.ObjectKeyFromObject(obj) + typeName := reflect.TypeOf(obj).Elem().Name() - for i := range list.Items { - m := &list.Items[i] - phase := ptr.Deref(m.Status.Phase, "") - fmt.Fprintf(buf, " %-50s phase=%-12s authAPI=%-12s conditions=%s created=%s\n", - m.Name, phase, m.Status.AuthoritativeAPI, - summarizeMAPIConditions(m.Status.Conditions), m.CreationTimestamp.Format(time.RFC3339)) - } -} + // Create a fresh instance of the same type to Get into. + fresh := reflect.New(reflect.TypeOf(obj).Elem()).Interface().(client.Object) -func dumpCAPIMachines(buf *strings.Builder, namespace string) { - list := &clusterv1.MachineList{} - if err := cl.List(ctx, list, client.InNamespace(namespace)); err != nil { - fmt.Fprintf(buf, "\n[%s] CAPI Machines: error listing: %v\n", namespace, err) - return - } + if err := cl.Get(ctx, key, fresh); err != nil { + if apierrors.IsNotFound(err) { + fmt.Fprintf(buf, "\n--- %s %s/%s: not found (deleted) ---\n", typeName, key.Namespace, key.Name) + } else { + fmt.Fprintf(buf, "\n--- %s %s/%s: error fetching: %v ---\n", typeName, key.Namespace, key.Name, err) + } - if len(list.Items) == 0 { return } - fmt.Fprintf(buf, "\n[%s] CAPI Machines (%d):\n", namespace, len(list.Items)) - - for i := range list.Items { - m := &list.Items[i] - fmt.Fprintf(buf, " %-50s phase=%-12s conditions=%s created=%s\n", - m.Name, m.Status.Phase, - summarizeV1Beta2Conditions(m.Status.Conditions), m.CreationTimestamp.Format(time.RFC3339)) - } + fmt.Fprintf(buf, "\n--- %s %s/%s ---\n", typeName, key.Namespace, key.Name) + describeObject(buf, fresh) + describeObjectEvents(buf, key) } -func dumpMAPIMachineSets(buf *strings.Builder, namespace string) { - list := &mapiv1beta1.MachineSetList{} - if err := cl.List(ctx, list, client.InNamespace(namespace)); err != nil { - fmt.Fprintf(buf, "\n[%s] MAPI MachineSets: error listing: %v\n", namespace, err) - return +func describeObject(buf *strings.Builder, obj client.Object) { + obj.SetManagedFields(nil) + + annotations := obj.GetAnnotations() + if annotations != nil { + delete(annotations, "kubectl.kubernetes.io/last-applied-configuration") + obj.SetAnnotations(annotations) } - if len(list.Items) == 0 { + out, err := yaml.Marshal(obj) + if err != nil { + fmt.Fprintf(buf, " (failed to marshal %T: %v)\n", obj, err) return } - fmt.Fprintf(buf, "\n[%s] MAPI MachineSets (%d):\n", namespace, len(list.Items)) - - for i := range list.Items { - ms := &list.Items[i] - replicas := ptr.Deref(ms.Spec.Replicas, 0) - fmt.Fprintf(buf, " %-50s replicas=%d/%d authAPI=%-12s conditions=%s\n", - ms.Name, ms.Status.ReadyReplicas, replicas, ms.Status.AuthoritativeAPI, - summarizeMAPIConditions(ms.Status.Conditions)) - } + buf.Write(out) } -func dumpCAPIMachineSets(buf *strings.Builder, namespace string) { - list := &clusterv1.MachineSetList{} - if err := cl.List(ctx, list, client.InNamespace(namespace)); err != nil { - fmt.Fprintf(buf, "\n[%s] CAPI MachineSets: error listing: %v\n", namespace, err) - return - } - - if len(list.Items) == 0 { +// describeObjectEvents lists events for the given object. Matching is by name +// only; events for different resource kinds with the same name in the same +// namespace will be included. +func describeObjectEvents(buf *strings.Builder, key client.ObjectKey) { + list := &corev1.EventList{} + if err := cl.List(ctx, list, client.InNamespace(key.Namespace)); err != nil { + fmt.Fprintf(buf, " Events: error listing: %v\n", err) return } - fmt.Fprintf(buf, "\n[%s] CAPI MachineSets (%d):\n", namespace, len(list.Items)) + var matching []corev1.Event for i := range list.Items { - ms := &list.Items[i] - replicas := ptr.Deref(ms.Spec.Replicas, 0) - fmt.Fprintf(buf, " %-50s replicas=%d/%d conditions=%s\n", - ms.Name, ptr.Deref(ms.Status.ReadyReplicas, 0), replicas, - summarizeV1Beta2Conditions(ms.Status.Conditions)) + if list.Items[i].InvolvedObject.Name == key.Name { + matching = append(matching, list.Items[i]) + } } -} -func dumpAWSMachines(buf *strings.Builder, namespace string) { - list := &awsv1.AWSMachineList{} - if err := cl.List(ctx, list, client.InNamespace(namespace)); err != nil { - fmt.Fprintf(buf, "\n[%s] AWSMachines: error listing: %v\n", namespace, err) + if len(matching) == 0 { + fmt.Fprintf(buf, " Events: none\n") return } - if len(list.Items) == 0 { - return - } + fmt.Fprintf(buf, " Events:\n") - fmt.Fprintf(buf, "\n[%s] AWSMachines (%d):\n", namespace, len(list.Items)) + for i := range matching { + e := &matching[i] + ts := e.LastTimestamp.Time + if ts.IsZero() { + ts = e.EventTime.Time + } - for i := range list.Items { - m := &list.Items[i] - providerID := ptr.Deref(m.Spec.ProviderID, "") - instanceID := ptr.Deref(m.Spec.InstanceID, "") - fmt.Fprintf(buf, " %-50s instanceType=%-12s instanceID=%-22s providerID=%s created=%s\n", - m.Name, m.Spec.InstanceType, instanceID, providerID, m.CreationTimestamp.Format(time.RFC3339)) + fmt.Fprintf(buf, " %s %-8s %-25s %s\n", + ts.Format(time.RFC3339), e.Type, e.Reason, e.Message) } } -func dumpAWSMachineTemplates(buf *strings.Builder, namespace string) { +// dumpAllAWSMachineTemplates lists all AWSMachineTemplates in the CAPI namespace +// and describes each one. Templates use generated names so we list rather than +// trying to predict names. +func dumpAllAWSMachineTemplates(buf *strings.Builder) { + defer func() { + if r := recover(); r != nil { + fmt.Fprintf(buf, "\n--- (panic dumping AWSMachineTemplates): %v ---\n", r) + } + }() + list := &awsv1.AWSMachineTemplateList{} - if err := cl.List(ctx, list, client.InNamespace(namespace)); err != nil { - fmt.Fprintf(buf, "\n[%s] AWSMachineTemplates: error listing: %v\n", namespace, err) + if err := cl.List(ctx, list, client.InNamespace(capiframework.CAPINamespace)); err != nil { + fmt.Fprintf(buf, "\n--- AWSMachineTemplates: error listing: %v ---\n", err) return } @@ -248,63 +247,48 @@ func dumpAWSMachineTemplates(buf *strings.Builder, namespace string) { return } - fmt.Fprintf(buf, "\n[%s] AWSMachineTemplates (%d):\n", namespace, len(list.Items)) + fmt.Fprintf(buf, "\n--- AWSMachineTemplates in %s (%d) ---\n", capiframework.CAPINamespace, len(list.Items)) for i := range list.Items { t := &list.Items[i] - fmt.Fprintf(buf, " %-50s instanceType=%-12s created=%s\n", - t.Name, t.Spec.Template.Spec.InstanceType, t.CreationTimestamp.Format(time.RFC3339)) + key := client.ObjectKeyFromObject(t) + fmt.Fprintf(buf, "\n %s:\n", t.Name) + describeObject(buf, t) + describeObjectEvents(buf, key) } } -func dumpEvents(buf *strings.Builder, namespace string) { - cutoff := time.Now().Add(-10 * time.Minute) +// dumpNamespaceEvents lists all events in a namespace. This catches events not +// associated with tracked resources (e.g. from controllers acting on other objects). +func dumpNamespaceEvents(buf *strings.Builder, namespace string) { + defer func() { + if r := recover(); r != nil { + fmt.Fprintf(buf, "\n--- (panic dumping events in %s): %v ---\n", namespace, r) + } + }() list := &corev1.EventList{} if err := cl.List(ctx, list, client.InNamespace(namespace)); err != nil { - fmt.Fprintf(buf, "\n[%s] Events: error listing: %v\n", namespace, err) + fmt.Fprintf(buf, "\n--- Events in %s: error listing: %v ---\n", namespace, err) return } - var recent []corev1.Event - - for i := range list.Items { - e := &list.Items[i] - ts := e.LastTimestamp.Time - if ts.IsZero() { - ts = e.EventTime.Time - } - - if ts.After(cutoff) { - recent = append(recent, *e) - } - } - - if len(recent) == 0 { + if len(list.Items) == 0 { return } - fmt.Fprintf(buf, "\n[%s] Events (last 10min, %d):\n", namespace, len(recent)) + fmt.Fprintf(buf, "\n--- Events in %s (%d) ---\n", namespace, len(list.Items)) - for i := range recent { - e := &recent[i] + for i := range list.Items { + e := &list.Items[i] ts := e.LastTimestamp.Time if ts.IsZero() { ts = e.EventTime.Time } - fmt.Fprintf(buf, " %s %s/%s %-8s %-20s %s\n", - ts.Format(time.RFC3339), + fmt.Fprintf(buf, " %s %-8s %s/%-30s %-25s %s\n", + ts.Format(time.RFC3339), e.Type, e.InvolvedObject.Kind, e.InvolvedObject.Name, - e.Type, e.Reason, truncate(e.Message, 120)) + e.Reason, e.Message) } } - -func truncate(s string, max int) string { - if len(s) <= max { - return s - } - - return s[:max-3] + "..." -} - diff --git a/e2e/e2e_test.go b/e2e/e2e_test.go index 8dbbbb203..491176931 100644 --- a/e2e/e2e_test.go +++ b/e2e/e2e_test.go @@ -15,10 +15,14 @@ package e2e import ( + "fmt" + "os" + "path/filepath" "testing" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" + "github.com/onsi/ginkgo/v2/reporters" ) func TestAPIs(t *testing.T) { @@ -31,9 +35,47 @@ var _ = BeforeSuite(func() { }) var _ = ReportAfterEach(func(report SpecReport) { - if !report.Failed() { + if report.Failed() { + dumpTrackedResources() + } + + resourcesUnderTest = nil +}) + +// ReportAfterSuite generates a JUnit XML report with tracked resource +// diagnostics appended to the failure description. This replaces the +// --junit-report ginkgo flag so that Spyglass renders diagnostics inline +// with the failure instead of hiding them behind "open stderr". +var _ = ReportAfterSuite("junit with diagnostics", func(report Report) { + defer func() { + if r := recover(); r != nil { + fmt.Fprintf(os.Stderr, "WARNING: ReportAfterSuite panicked: %v\n", r) + } + }() + + artifactDir := os.Getenv("ARTIFACT_DIR") + if artifactDir == "" { return } - dumpClusterState() + // Append GinkgoWriter output (which contains our tracked resource dump) + // to the failure description so it appears in the element of + // the JUnit XML, which is what Spyglass renders by default. + for i := range report.SpecReports { + sr := &report.SpecReports[i] + if !sr.Failed() { + continue + } + + if sr.CapturedGinkgoWriterOutput == "" { + continue + } + + sr.Failure.Message += "\n\n" + sr.CapturedGinkgoWriterOutput + } + + dst := filepath.Join(artifactDir, "junit_cluster_capi_operator.xml") + if err := reporters.GenerateJUnitReport(report, dst); err != nil { + fmt.Fprintf(os.Stderr, "WARNING: failed to write JUnit report to %s: %v\n", dst, err) + } }) diff --git a/e2e/machine_migration_helpers.go b/e2e/machine_migration_helpers.go index b6fb25749..f8537ef5a 100644 --- a/e2e/machine_migration_helpers.go +++ b/e2e/machine_migration_helpers.go @@ -108,6 +108,16 @@ func createCAPIMachine(ctx context.Context, cl client.Client, machineName string return cl.Create(ctx, newAWSMachine) }, capiframework.WaitMedium, capiframework.RetryMedium).Should(Succeed(), "Should have successfully created AWSmachine %s/%s", newAWSMachine.Namespace, newAWSMachine.Name) + trackResource(newCapiMachine) + trackResource(newAWSMachine) + // The sync controller will create a mirrored MAPI Machine with the same name. + trackResource(&mapiv1beta1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: machineName, + Namespace: capiframework.MAPINamespace, + }, + }) + verifyMachineRunning(cl, newCapiMachine) return newCapiMachine @@ -156,6 +166,15 @@ func createMAPIMachineWithAuthority(ctx context.Context, cl client.Client, machi return cl.Create(ctx, newMachine) }, capiframework.WaitMedium, capiframework.RetryMedium).Should(Succeed(), "Should have successfully created MAPI machine %s with AuthoritativeAPI: %s", newMachine.Name, authority) + trackResource(newMachine) + // The sync controller will create a mirrored CAPI Machine with the same name. + trackResource(&clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Name: machineName, + Namespace: capiframework.CAPINamespace, + }, + }) + return newMachine } diff --git a/e2e/machineset_migration_helpers.go b/e2e/machineset_migration_helpers.go index 8d7de7c71..6a758cf8e 100644 --- a/e2e/machineset_migration_helpers.go +++ b/e2e/machineset_migration_helpers.go @@ -69,6 +69,16 @@ func createCAPIMachineSet(ctx context.Context, cl client.Client, replicas int32, "worker-user-data", )) + trackResource(awsMachineTemplate) + trackResource(machineSet) + // The sync controller will create a mirrored MAPI MachineSet with the same name. + trackResource(&mapiv1beta1.MachineSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: machineSetName, + Namespace: capiframework.MAPINamespace, + }, + }) + capiframework.WaitForMachineSet(ctx, cl, machineSet.Name, machineSet.Namespace, capiframework.WaitLong) return machineSet @@ -89,6 +99,8 @@ func createMAPIMachineSetWithAuthoritativeAPI(ctx context.Context, cl client.Cli mapiMachineSet, err := mapiframework.CreateMachineSet(cl, machineSetParams) Expect(err).ToNot(HaveOccurred(), "MAPI MachineSet %s creation should succeed", machineSetName) + trackResource(mapiMachineSet) + capiMachineSet := &clusterv1.MachineSet{ ObjectMeta: metav1.ObjectMeta{ Name: machineSetName, @@ -98,6 +110,8 @@ func createMAPIMachineSetWithAuthoritativeAPI(ctx context.Context, cl client.Cli Eventually(komega.Get(capiMachineSet), capiframework.WaitShort, capiframework.RetryShort).Should( Succeed(), "Should have mirror CAPI MachineSet created within 1 minute") + trackResource(capiMachineSet) + switch machineAuthority { case mapiv1beta1.MachineAuthorityMachineAPI: mapiframework.WaitForMachineSet(ctx, cl, machineSetName) @@ -405,6 +419,8 @@ func createAWSMachineTemplate(ctx context.Context, cl client.Client, originalNam Eventually(cl.Create(ctx, newTemplate), capiframework.WaitMedium, capiframework.RetryMedium).Should( Succeed(), "Failed to create a new awsMachineTemplate %s", newTemplate.Name) + trackResource(newTemplate) + return newTemplate } diff --git a/hack/test.sh b/hack/test.sh index d4f7f0a70..711431962 100755 --- a/hack/test.sh +++ b/hack/test.sh @@ -30,7 +30,13 @@ if [ $HOME == "/" ]; then fi if [ "$OPENSHIFT_CI" == "true" ] && [ -n "$ARTIFACT_DIR" ] && [ -d "$ARTIFACT_DIR" ]; then # detect ci environment there - GINKGO_ARGS="${GINKGO_ARGS} --junit-report=junit_cluster_capi_operator.xml --cover --coverprofile=test-unit-coverage.out --output-dir=${ARTIFACT_DIR}" + GINKGO_ARGS="${GINKGO_ARGS} --cover --coverprofile=test-unit-coverage.out --output-dir=${ARTIFACT_DIR}" + # e2e tests use a custom ReportAfterSuite that appends resource diagnostics + # to the JUnit failure message for Spyglass. Other suites use ginkgo's + # built-in JUnit reporter. + if [[ "${TEST_DIRS}" != *"e2e"* ]]; then + GINKGO_ARGS="${GINKGO_ARGS} --junit-report=junit_cluster_capi_operator.xml" + fi fi # Print the command we are going to run as Make would.