-
Notifications
You must be signed in to change notification settings - Fork 101
TRT-2471: Consider mass failures during CR query #3285
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -135,9 +135,8 @@ func NewBaseQueryGenerator( | |
|
|
||
| func (b *baseQueryGenerator) QueryTestStatus(ctx context.Context) (bq.ReportTestStatus, []error) { | ||
|
|
||
| commonQuery, groupByQuery, queryParameters := BuildComponentReportQuery(b.client, b.ReqOptions, b.allVariants, b.ReqOptions.VariantOption.IncludeVariants, DefaultJunitTable, false) | ||
| commonQuery, groupByQuery, queryParameters := BuildComponentReportQuery(b.client, b.ReqOptions, b.allVariants, b.ReqOptions.VariantOption.IncludeVariants, DefaultJunitTable, false, b.ReqOptions.AdvancedOption.KeyTestNames...) | ||
|
|
||
| before := time.Now() | ||
| errs := []error{} | ||
| baseString := commonQuery + ` AND jv_Release.variant_value = @BaseRelease` | ||
| baseQuery := b.client.Query(ctx, bqlabel.CRJunitBase, baseString+groupByQuery) | ||
|
|
@@ -164,8 +163,6 @@ func (b *baseQueryGenerator) QueryTestStatus(ctx context.Context) (bq.ReportTest | |
| errs = append(errs, baseErrs...) | ||
| } | ||
|
|
||
| log.Infof("Base QueryTestStatus completed in %s with %d base results from db", time.Since(before), len(baseStatus)) | ||
|
|
||
| return bq.ReportTestStatus{BaseStatus: baseStatus}, errs | ||
| } | ||
|
|
||
|
|
@@ -207,9 +204,8 @@ func NewSampleQueryGenerator( | |
| } | ||
|
|
||
| func (s *sampleQueryGenerator) QueryTestStatus(ctx context.Context) (bq.ReportTestStatus, []error) { | ||
| commonQuery, groupByQuery, queryParameters := BuildComponentReportQuery(s.client, s.ReqOptions, s.allVariants, s.IncludeVariants, s.JunitTable, true) | ||
| commonQuery, groupByQuery, queryParameters := BuildComponentReportQuery(s.client, s.ReqOptions, s.allVariants, s.IncludeVariants, s.JunitTable, true, s.ReqOptions.AdvancedOption.KeyTestNames...) | ||
|
|
||
| before := time.Now() | ||
| errs := []error{} | ||
| sampleString := commonQuery | ||
| // Only set sample release when PR and payload options are not set | ||
|
|
@@ -273,19 +269,36 @@ func (s *sampleQueryGenerator) QueryTestStatus(ctx context.Context) (bq.ReportTe | |
| errs = append(errs, sampleErrs...) | ||
| } | ||
|
|
||
| log.Infof("Sample QueryTestStatus completed in %s with %d sample results db", time.Since(before), len(sampleStatus)) | ||
|
|
||
| return bq.ReportTestStatus{SampleStatus: sampleStatus}, errs | ||
| } | ||
|
|
||
| // buildPriorityCaseStatement generates a SQL CASE statement that assigns priority based on test position in the list. | ||
| // Lower index = higher priority. This is used to ensure when multiple key tests appear in the same job, | ||
| // only the highest priority one is counted. | ||
| func buildPriorityCaseStatement(keyTestNames []string) string { | ||
| var caseStatements []string | ||
| for i, testName := range keyTestNames { | ||
| // Escape single quotes in test name for SQL | ||
| escapedTestName := strings.ReplaceAll(testName, "'", "''") | ||
| caseStatements = append(caseStatements, fmt.Sprintf("WHEN test_name = '%s' THEN %d", escapedTestName, i)) | ||
| } | ||
| // Add a default case to handle any unexpected tests (should not happen due to IN UNNEST filter) | ||
| caseStatements = append(caseStatements, fmt.Sprintf("ELSE %d", len(keyTestNames))) | ||
| return strings.Join(caseStatements, "\n\t\t\t\t\t\t\t") | ||
| } | ||
|
|
||
| // BuildComponentReportQuery returns the common query for the higher level summary component summary. | ||
| // If keyTestNames is provided, when any of these tests fail in a job, all other test failures | ||
| // in that job are excluded from regression analysis. Only the highest priority (earliest in the list) | ||
| // key test will be included for each affected job. | ||
| func BuildComponentReportQuery( | ||
| client *bqcachedclient.Client, | ||
| reqOptions reqopts.RequestOptions, | ||
| allJobVariants crtest.JobVariants, | ||
| includeVariants map[string][]string, | ||
| junitTable string, | ||
| isSample bool, | ||
| keyTestNames ...string, | ||
| ) (string, string, []bigquery.QueryParameter) { | ||
| // Parts of the query, including the columns returned, are dynamic, based on the list of variants we're told to work with. | ||
| // Variants will be returned as columns with names like: variant_[VariantName] | ||
|
|
@@ -294,7 +307,7 @@ func BuildComponentReportQuery( | |
| joinVariants := "" | ||
| groupByVariants := "" | ||
| for _, v := range sortedKeys(allJobVariants.Variants) { | ||
| joinVariants += fmt.Sprintf("LEFT JOIN %s.job_variants jv_%s ON variant_registry_job_name = jv_%s.job_name AND jv_%s.variant_name = '%s'\n", | ||
| joinVariants += fmt.Sprintf("LEFT JOIN %s.job_variants jv_%s ON junit_data.variant_registry_job_name = jv_%s.job_name AND jv_%s.variant_name = '%s'\n", | ||
| client.Dataset, v, v, v, v) | ||
| } | ||
| for _, v := range reqOptions.VariantOption.DBGroupBy.List() { | ||
|
|
@@ -313,33 +326,95 @@ func BuildComponentReportQuery( | |
| // TODO: last_failure here explicitly uses success_val not adjusted_success_val, this ensures we | ||
| // show the last time the test failed, not flaked. if you enable the flakes as failures feature (which is | ||
| // non default today), the last failure time will be wrong which can impact things like failed fix detection. | ||
| queryString := fmt.Sprintf(`WITH latest_component_mapping AS ( | ||
| SELECT * | ||
| FROM %s.component_mapping cm | ||
| WHERE created_at = ( | ||
| SELECT MAX(created_at) | ||
| FROM %s.component_mapping)) | ||
| // Build the WITH clause - add jobs_with_failed_key_tests CTE if keyTestNames is provided | ||
| withClause := "" | ||
| if len(keyTestNames) > 0 { | ||
| // Create a CTE that identifies the highest priority (lowest index) key test in each job | ||
| // This ensures when multiple key tests appear in the same job, only the highest priority one is used | ||
| withClause = fmt.Sprintf(`WITH key_test_priorities AS ( | ||
| SELECT | ||
| prowjob_build_id, | ||
| test_name, | ||
| -- Find the index/priority of each test (lower index = higher priority) | ||
| CASE | ||
| %s | ||
| END AS test_priority | ||
| FROM %s.%s AS junit | ||
| WHERE modified_time >= DATETIME(@From) | ||
| AND modified_time < DATETIME(@To) | ||
| AND test_name IN UNNEST(@KeyTestNames) | ||
| AND success_val = 0 | ||
| ), | ||
|
Comment on lines
+331
to
+347
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
The CTE filters If a key test flakes, should the remaining tests in that job be suppressed from regression analysis? If not, add 💡 Suggested fix FROM %s.%s AS junit
WHERE modified_time >= DATETIME(`@From`)
AND modified_time < DATETIME(`@To`)
AND test_name IN UNNEST(`@KeyTestNames`)
- AND success_val = 0
+ AND success_val = 0
+ AND flake_count = 0🤖 Prompt for AI Agents |
||
| jobs_with_highest_priority_test AS ( | ||
| SELECT | ||
| prowjob_build_id, | ||
| test_name | ||
| FROM key_test_priorities | ||
| WHERE test_priority = ( | ||
| SELECT MIN(test_priority) | ||
| FROM key_test_priorities ep2 | ||
| WHERE ep2.prowjob_build_id = key_test_priorities.prowjob_build_id | ||
| ) | ||
| ), | ||
| latest_component_mapping AS ( | ||
| SELECT * | ||
| FROM %s.component_mapping cm | ||
| WHERE created_at = ( | ||
| SELECT MAX(created_at) | ||
| FROM %s.component_mapping))`, | ||
| buildPriorityCaseStatement(keyTestNames), client.Dataset, junitTable, client.Dataset, client.Dataset) | ||
| } else { | ||
| withClause = fmt.Sprintf(`WITH latest_component_mapping AS ( | ||
| SELECT * | ||
| FROM %s.component_mapping cm | ||
| WHERE created_at = ( | ||
| SELECT MAX(created_at) | ||
| FROM %s.component_mapping))`, | ||
| client.Dataset, client.Dataset) | ||
| } | ||
|
|
||
| queryString := fmt.Sprintf(`%s | ||
| SELECT | ||
| ANY_VALUE(test_name HAVING MAX prowjob_start) AS test_name, | ||
| ANY_VALUE(testsuite HAVING MAX prowjob_start) AS test_suite, | ||
| ANY_VALUE(junit_data.test_name HAVING MAX junit_data.prowjob_start) AS test_name, | ||
| ANY_VALUE(junit_data.testsuite HAVING MAX junit_data.prowjob_start) AS test_suite, | ||
| cm.id as test_id, | ||
| %s | ||
| COUNT(cm.id) AS total_count, | ||
| SUM(adjusted_success_val) AS success_count, | ||
| SUM(adjusted_flake_count) AS flake_count, | ||
| MAX(CASE WHEN success_val = 0 THEN prowjob_start ELSE NULL END) AS last_failure, | ||
| SUM(junit_data.adjusted_success_val) AS success_count, | ||
| SUM(junit_data.adjusted_flake_count) AS flake_count, | ||
| MAX(CASE WHEN junit_data.success_val = 0 THEN junit_data.prowjob_start ELSE NULL END) AS last_failure, | ||
| ANY_VALUE(cm.component) AS component, | ||
| ANY_VALUE(cm.capabilities) AS capabilities, | ||
| FROM (%s) | ||
| INNER JOIN latest_component_mapping cm ON testsuite = cm.suite AND test_name = cm.name | ||
| FROM (%s) AS junit_data | ||
| INNER JOIN latest_component_mapping cm ON junit_data.testsuite = cm.suite AND junit_data.test_name = cm.name | ||
| `, | ||
| client.Dataset, client.Dataset, selectVariants, fmt.Sprintf(dedupedJunitTable, jobNameQueryPortion, client.Dataset, junitTable, client.Dataset, client.Dataset, jobRunAnnotationToIgnore)) | ||
| withClause, selectVariants, fmt.Sprintf(dedupedJunitTable, jobNameQueryPortion, client.Dataset, junitTable, client.Dataset, client.Dataset, jobRunAnnotationToIgnore)) | ||
|
|
||
| queryString += joinVariants | ||
|
|
||
| queryString += `WHERE cm.staff_approved_obsolete = false AND | ||
| (variant_registry_job_name LIKE 'periodic-%%' OR variant_registry_job_name LIKE 'release-%%' OR variant_registry_job_name LIKE 'aggregator-%%')` | ||
| (junit_data.variant_registry_job_name LIKE 'periodic-%%' OR junit_data.variant_registry_job_name LIKE 'release-%%' OR junit_data.variant_registry_job_name LIKE 'aggregator-%%')` | ||
| commonParams := []bigquery.QueryParameter{} | ||
|
|
||
| // Add filtering logic for key tests with priority | ||
| // Only include the highest priority test from each job, and exclude all other tests from those jobs | ||
| if len(keyTestNames) > 0 { | ||
| queryString += ` | ||
| AND ( | ||
| -- Include tests from jobs that don't have any failed key tests | ||
| junit_data.prowjob_build_id NOT IN (SELECT prowjob_build_id FROM jobs_with_highest_priority_test) | ||
| -- Or include only the highest priority key test from jobs that have them | ||
| OR EXISTS ( | ||
| SELECT 1 FROM jobs_with_highest_priority_test j | ||
| WHERE j.prowjob_build_id = junit_data.prowjob_build_id | ||
| AND j.test_name = junit_data.test_name | ||
| ) | ||
| )` | ||
| commonParams = append(commonParams, bigquery.QueryParameter{ | ||
| Name: "KeyTestNames", | ||
| Value: keyTestNames, | ||
| }) | ||
| } | ||
| if reqOptions.AdvancedOption.IgnoreDisruption { | ||
| queryString += ` AND NOT 'Disruption' in UNNEST(capabilities)` | ||
| } | ||
|
|
@@ -627,6 +702,7 @@ func FetchTestStatusResults(ctx context.Context, query *bigquery.Query) (map[str | |
|
|
||
| status[testIDStr] = testStatus | ||
| } | ||
|
|
||
| return status, errs | ||
| } | ||
|
|
||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.