openshift · xueqzhan · Jan 27, 2026 · Feb 19, 2026 · Feb 25, 2026 · Feb 26, 2026
diff --git a/config/views.yaml b/config/views.yaml
@@ -87,6 +87,97 @@ component_readiness:
     enabled: true
   automate_jira:
     enabled: true
+- name: 4.22-main-mass-failure
+  base_release:
+    release: "4.21"
+    relative_start: ga-30d
+    relative_end: ga
+  sample_release:
+    release: "4.22"
+    relative_start: now-7d
+    relative_end: now
+  variant_options:
+    column_group_by:
+      Network: { }
+      Platform: { }
+      Topology: { }
+    db_group_by:
+      Architecture: { }
+      FeatureSet: { }
+      Installer: { }
+      Network: { }
+      Platform: { }
+      Suite: { }
+      Topology: { }
+      Upgrade: { }
+      LayeredProduct: { }
+    include_variants:
+      Architecture:
+      - amd64
+      - arm64
+      - multi
+      FeatureSet:
+      - default
+      - techpreview
+      Installer:
+      - ipi
+      - upi
+      - hypershift
+      JobTier:
+      - blocking
+      - informing
+      - standard
+      LayeredProduct:
+      - none
+      - virt
+      Network:
+      - ovn
+      OS:
+      - rhcos9
+      Owner:
+      - eng
+      - service-delivery
+      Platform:
+      - aws
+      - azure
+      - gcp
+      - metal
+      - rosa
+      - vsphere
+      Topology:
+      - ha
+      - microshift
+      - external
+      CGroupMode:
+      - v2
+      ContainerRuntime:
+      - runc
+      - crun
+      Upgrade:
+      - micro
+      - minor
+      - none
+  advanced_options:
+    minimum_failure: 3
+    confidence: 95
+    pity_factor: 5
+    ignore_missing: false
+    ignore_disruption: true
+    flake_as_failure: false
+    pass_rate_required_new_tests: 95
+    include_multi_release_analysis: true
+    key_test_names:
+    - "install should succeed: overall"
+    - "[sig-cluster-lifecycle] Cluster completes upgrade"
+    - "[Jira:\"Test Framework\"] there should not be mass test failures"
+  metrics:
+    enabled: true
+  regression_tracking:
+    enabled: true
+  prime_cache:
+    enabled: true
+  automate_jira:
+    enabled: false
 - name: 4.22-hypershift-candidates
   base_release:
     release: "4.21"

diff --git a/pkg/api/componentreadiness/middleware/releasefallback/releasefallback.go b/pkg/api/componentreadiness/middleware/releasefallback/releasefallback.go
@@ -358,6 +358,8 @@ type fallbackTestQueryReleasesGeneratorCacheKey struct {
 	VariantDBGroupBy sets.String
 	// CRTimeRoundingFactor is used by GetReleaseDatesFromBigQuery
 	CRTimeRoundingFactor time.Duration
+	// KeyTestNames affects the BuildComponentReportQuery results via filtering logic
+	KeyTestNames []string
 }
 
 // getCacheKey creates a cache key using the generator properties that we want included for uniqueness in what
@@ -370,6 +372,7 @@ func (f *fallbackTestQueryReleasesGenerator) getCacheKey() fallbackTestQueryRele
 		BaseEnd:              f.BaseEnd,
 		VariantDBGroupBy:     f.ReqOptions.VariantOption.DBGroupBy,
 		CRTimeRoundingFactor: f.ReqOptions.CacheOption.CRTimeRoundingFactor,
+		KeyTestNames:         f.ReqOptions.AdvancedOption.KeyTestNames,
 	}
 }
 
@@ -516,8 +519,9 @@ type fallbackTestQueryGeneratorCacheKey struct {
 	BaseRelease string
 	BaseStart   time.Time
 	BaseEnd     time.Time
-	// IgnoreDisruption is the only field within AdvancedOption that is used here
+	// IgnoreDisruption and KeyTestNames are fields within AdvancedOption that affect the query
 	IgnoreDisruption bool
+	KeyTestNames     []string
 	IncludeVariants  map[string][]string
 	VariantDBGroupBy sets.String
 	// if we ever needed fallback on cross-compare views we should include fields for that,
@@ -533,6 +537,7 @@ func (f *fallbackTestQueryGenerator) getCacheKey() fallbackTestQueryGeneratorCac
 		BaseStart:        f.BaseStart,
 		BaseEnd:          f.BaseEnd,
 		IgnoreDisruption: f.ReqOptions.AdvancedOption.IgnoreDisruption,
+		KeyTestNames:     f.ReqOptions.AdvancedOption.KeyTestNames,
 		IncludeVariants:  f.ReqOptions.VariantOption.IncludeVariants,
 		VariantDBGroupBy: f.ReqOptions.VariantOption.DBGroupBy,
 	}
@@ -541,7 +546,7 @@ func (f *fallbackTestQueryGenerator) getCacheKey() fallbackTestQueryGeneratorCac
 func (f *fallbackTestQueryGenerator) getTestFallbackRelease(ctx context.Context) (bq.ReportTestStatus, []error) {
 	commonQuery, groupByQuery, queryParameters := query.BuildComponentReportQuery(
 		f.client, f.ReqOptions, f.allVariants, f.ReqOptions.VariantOption.IncludeVariants,
-		query.DefaultJunitTable, false)
+		query.DefaultJunitTable, false, f.ReqOptions.AdvancedOption.KeyTestNames...)
 	before := time.Now()
 	log.Infof("Starting Fallback (%s) QueryTestStatus", f.BaseRelease)
 	errs := []error{}

diff --git a/pkg/api/componentreadiness/query/querygenerators.go b/pkg/api/componentreadiness/query/querygenerators.go
@@ -135,9 +135,8 @@ func NewBaseQueryGenerator(
 
 func (b *baseQueryGenerator) QueryTestStatus(ctx context.Context) (bq.ReportTestStatus, []error) {
 
-	commonQuery, groupByQuery, queryParameters := BuildComponentReportQuery(b.client, b.ReqOptions, b.allVariants, b.ReqOptions.VariantOption.IncludeVariants, DefaultJunitTable, false)
+	commonQuery, groupByQuery, queryParameters := BuildComponentReportQuery(b.client, b.ReqOptions, b.allVariants, b.ReqOptions.VariantOption.IncludeVariants, DefaultJunitTable, false, b.ReqOptions.AdvancedOption.KeyTestNames...)
 
-	before := time.Now()
 	errs := []error{}
 	baseString := commonQuery + ` AND jv_Release.variant_value = @BaseRelease`
 	baseQuery := b.client.Query(ctx, bqlabel.CRJunitBase, baseString+groupByQuery)
@@ -164,8 +163,6 @@ func (b *baseQueryGenerator) QueryTestStatus(ctx context.Context) (bq.ReportTest
 		errs = append(errs, baseErrs...)
 	}
 
-	log.Infof("Base QueryTestStatus completed in %s with %d base results from db", time.Since(before), len(baseStatus))
-
 	return bq.ReportTestStatus{BaseStatus: baseStatus}, errs
 }
 
@@ -207,9 +204,8 @@ func NewSampleQueryGenerator(
 }
 
 func (s *sampleQueryGenerator) QueryTestStatus(ctx context.Context) (bq.ReportTestStatus, []error) {
-	commonQuery, groupByQuery, queryParameters := BuildComponentReportQuery(s.client, s.ReqOptions, s.allVariants, s.IncludeVariants, s.JunitTable, true)
+	commonQuery, groupByQuery, queryParameters := BuildComponentReportQuery(s.client, s.ReqOptions, s.allVariants, s.IncludeVariants, s.JunitTable, true, s.ReqOptions.AdvancedOption.KeyTestNames...)
 
-	before := time.Now()
 	errs := []error{}
 	sampleString := commonQuery
 	// Only set sample release when PR and payload options are not set
@@ -273,19 +269,36 @@ func (s *sampleQueryGenerator) QueryTestStatus(ctx context.Context) (bq.ReportTe
 		errs = append(errs, sampleErrs...)
 	}
 
-	log.Infof("Sample QueryTestStatus completed in %s with %d sample results db", time.Since(before), len(sampleStatus))
-
 	return bq.ReportTestStatus{SampleStatus: sampleStatus}, errs
 }
 
+// buildPriorityCaseStatement generates a SQL CASE statement that assigns priority based on test position in the list.
+// Lower index = higher priority. This is used to ensure when multiple key tests appear in the same job,
+// only the highest priority one is counted.
+func buildPriorityCaseStatement(keyTestNames []string) string {
+	var caseStatements []string
+	for i, testName := range keyTestNames {
+		// Escape single quotes in test name for SQL
+		escapedTestName := strings.ReplaceAll(testName, "'", "''")
+		caseStatements = append(caseStatements, fmt.Sprintf("WHEN test_name = '%s' THEN %d", escapedTestName, i))
+	}
+	// Add a default case to handle any unexpected tests (should not happen due to IN UNNEST filter)
+	caseStatements = append(caseStatements, fmt.Sprintf("ELSE %d", len(keyTestNames)))
+	return strings.Join(caseStatements, "\n\t\t\t\t\t\t\t")
+}
+
 // BuildComponentReportQuery returns the common query for the higher level summary component summary.
+// If keyTestNames is provided, when any of these tests fail in a job, all other test failures
+// in that job are excluded from regression analysis. Only the highest priority (earliest in the list)
+// key test will be included for each affected job.
 func BuildComponentReportQuery(
 	client *bqcachedclient.Client,
 	reqOptions reqopts.RequestOptions,
 	allJobVariants crtest.JobVariants,
 	includeVariants map[string][]string,
 	junitTable string,
 	isSample bool,
+	keyTestNames ...string,
 ) (string, string, []bigquery.QueryParameter) {
 	// Parts of the query, including the columns returned, are dynamic, based on the list of variants we're told to work with.
 	// Variants will be returned as columns with names like: variant_[VariantName]
@@ -294,7 +307,7 @@ func BuildComponentReportQuery(
 	joinVariants := ""
 	groupByVariants := ""
 	for _, v := range sortedKeys(allJobVariants.Variants) {
-		joinVariants += fmt.Sprintf("LEFT JOIN %s.job_variants jv_%s ON variant_registry_job_name = jv_%s.job_name AND jv_%s.variant_name = '%s'\n",
+		joinVariants += fmt.Sprintf("LEFT JOIN %s.job_variants jv_%s ON junit_data.variant_registry_job_name = jv_%s.job_name AND jv_%s.variant_name = '%s'\n",
 			client.Dataset, v, v, v, v)
 	}
 	for _, v := range reqOptions.VariantOption.DBGroupBy.List() {
@@ -313,33 +326,95 @@ func BuildComponentReportQuery(
 	// TODO: last_failure here explicitly uses success_val not adjusted_success_val, this ensures we
 	// show the last time the test failed, not flaked. if you enable the flakes as failures feature (which is
 	// non default today), the last failure time will be wrong which can impact things like failed fix detection.
-	queryString := fmt.Sprintf(`WITH latest_component_mapping AS (
-						SELECT *
-						FROM %s.component_mapping cm
-						WHERE created_at = (
-								SELECT MAX(created_at)
-								FROM %s.component_mapping))
+	// Build the WITH clause - add jobs_with_failed_key_tests CTE if keyTestNames is provided
+	withClause := ""
+	if len(keyTestNames) > 0 {
+		// Create a CTE that identifies the highest priority (lowest index) key test in each job
+		// This ensures when multiple key tests appear in the same job, only the highest priority one is used
+		withClause = fmt.Sprintf(`WITH key_test_priorities AS (
+							SELECT
+								prowjob_build_id,
+								test_name,
+								-- Find the index/priority of each test (lower index = higher priority)
+								CASE
+								%s
+								END AS test_priority
+							FROM %s.%s AS junit
+							WHERE modified_time >= DATETIME(@From)
+							AND modified_time < DATETIME(@To)
+							AND test_name IN UNNEST(@KeyTestNames)
+							AND success_val = 0
+						),
+						jobs_with_highest_priority_test AS (
+							SELECT
+								prowjob_build_id,
+								test_name
+							FROM key_test_priorities
+							WHERE test_priority = (
+								SELECT MIN(test_priority)
+								FROM key_test_priorities ep2
+								WHERE ep2.prowjob_build_id = key_test_priorities.prowjob_build_id
+							)
+						),
+						latest_component_mapping AS (
+							SELECT *
+							FROM %s.component_mapping cm
+							WHERE created_at = (
+									SELECT MAX(created_at)
+									FROM %s.component_mapping))`,
+			buildPriorityCaseStatement(keyTestNames), client.Dataset, junitTable, client.Dataset, client.Dataset)
+	} else {
+		withClause = fmt.Sprintf(`WITH latest_component_mapping AS (
+							SELECT *
+							FROM %s.component_mapping cm
+							WHERE created_at = (
+									SELECT MAX(created_at)
+									FROM %s.component_mapping))`,
+			client.Dataset, client.Dataset)
+	}
+
+	queryString := fmt.Sprintf(`%s
 					SELECT
-						ANY_VALUE(test_name HAVING MAX prowjob_start) AS test_name,
-						ANY_VALUE(testsuite HAVING MAX prowjob_start) AS test_suite,
+						ANY_VALUE(junit_data.test_name HAVING MAX junit_data.prowjob_start) AS test_name,
+						ANY_VALUE(junit_data.testsuite HAVING MAX junit_data.prowjob_start) AS test_suite,
 						cm.id as test_id,
 						%s
 						COUNT(cm.id) AS total_count,
-						SUM(adjusted_success_val) AS success_count,
-						SUM(adjusted_flake_count) AS flake_count,
-						MAX(CASE WHEN success_val = 0 THEN prowjob_start ELSE NULL END) AS last_failure,
+						SUM(junit_data.adjusted_success_val) AS success_count,
+						SUM(junit_data.adjusted_flake_count) AS flake_count,
+						MAX(CASE WHEN junit_data.success_val = 0 THEN junit_data.prowjob_start ELSE NULL END) AS last_failure,
 						ANY_VALUE(cm.component) AS component,
 						ANY_VALUE(cm.capabilities) AS capabilities,
-					FROM (%s)
-					INNER JOIN latest_component_mapping cm ON testsuite = cm.suite AND test_name = cm.name
+					FROM (%s) AS junit_data
+					INNER JOIN latest_component_mapping cm ON junit_data.testsuite = cm.suite AND junit_data.test_name = cm.name
 `,
-		client.Dataset, client.Dataset, selectVariants, fmt.Sprintf(dedupedJunitTable, jobNameQueryPortion, client.Dataset, junitTable, client.Dataset, client.Dataset, jobRunAnnotationToIgnore))
+		withClause, selectVariants, fmt.Sprintf(dedupedJunitTable, jobNameQueryPortion, client.Dataset, junitTable, client.Dataset, client.Dataset, jobRunAnnotationToIgnore))
 
 	queryString += joinVariants
 
 	queryString += `WHERE cm.staff_approved_obsolete = false AND
-						(variant_registry_job_name LIKE 'periodic-%%' OR variant_registry_job_name LIKE 'release-%%' OR variant_registry_job_name LIKE 'aggregator-%%')`
+						(junit_data.variant_registry_job_name LIKE 'periodic-%%' OR junit_data.variant_registry_job_name LIKE 'release-%%' OR junit_data.variant_registry_job_name LIKE 'aggregator-%%')`
 	commonParams := []bigquery.QueryParameter{}
+
+	// Add filtering logic for key tests with priority
+	// Only include the highest priority test from each job, and exclude all other tests from those jobs
+	if len(keyTestNames) > 0 {
+		queryString += `
+						AND (
+							-- Include tests from jobs that don't have any failed key tests
+							junit_data.prowjob_build_id NOT IN (SELECT prowjob_build_id FROM jobs_with_highest_priority_test)
+							-- Or include only the highest priority key test from jobs that have them
+							OR EXISTS (
+								SELECT 1 FROM jobs_with_highest_priority_test j
+								WHERE j.prowjob_build_id = junit_data.prowjob_build_id
+								AND j.test_name = junit_data.test_name
+							)
+						)`
+		commonParams = append(commonParams, bigquery.QueryParameter{
+			Name:  "KeyTestNames",
+			Value: keyTestNames,
+		})
+	}
 	if reqOptions.AdvancedOption.IgnoreDisruption {
 		queryString += ` AND NOT 'Disruption' in UNNEST(capabilities)`
 	}
@@ -627,6 +702,7 @@ func FetchTestStatusResults(ctx context.Context, query *bigquery.Query) (map[str
 
 		status[testIDStr] = testStatus
 	}
+
 	return status, errs
 }