diff --git a/datafusion/expr/src/utils.rs b/datafusion/expr/src/utils.rs index 81a6fd393a989..94807d7c8bdaa 100644 --- a/datafusion/expr/src/utils.rs +++ b/datafusion/expr/src/utils.rs @@ -386,12 +386,17 @@ fn get_exprs_except_skipped( /// (once for each join side), but an unqualified wildcard should include it only once. /// This function returns the columns that should be excluded. fn exclude_using_columns(plan: &LogicalPlan) -> Result> { + let output_columns: HashSet<_> = plan.schema().columns().iter().cloned().collect(); let using_columns = plan.using_columns()?; let excluded = using_columns .into_iter() // For each USING JOIN condition, only expand to one of each join column in projection .flat_map(|cols| { - let mut cols = cols.into_iter().collect::>(); + // Only consider columns that survive in the output schema. + let mut cols = cols + .into_iter() + .filter(|c| output_columns.contains(c)) + .collect::>(); // sort join columns to make sure we consistently keep the same // qualified column cols.sort(); diff --git a/datafusion/sql/tests/sql_integration.rs b/datafusion/sql/tests/sql_integration.rs index 29c17be69ce5f..731a06b48e125 100644 --- a/datafusion/sql/tests/sql_integration.rs +++ b/datafusion/sql/tests/sql_integration.rs @@ -4847,6 +4847,40 @@ fn test_using_join_wildcard_schema() { ); } +#[test] +fn test_using_join_wildcard_schema_semi_anti() { + let left_expected = vec!["s.x1".to_string(), "s.x2".to_string(), "s.x3".to_string()]; + let right_expected = vec!["t.x1".to_string(), "t.x2".to_string(), "t.x3".to_string()]; + + let sql = "WITH + s AS (SELECT 1 AS x1, 2 AS x2, 3 AS x3), + t AS (SELECT 1 AS x1, 4 AS x2, 5 AS x3) + SELECT * FROM s LEFT SEMI JOIN t USING (x1)"; + let plan = logical_plan(sql).unwrap(); + assert_eq!(plan.schema().field_names(), left_expected); + + let sql = "WITH + s AS (SELECT 1 AS x1, 2 AS x2, 3 AS x3), + t AS (SELECT 1 AS x1, 4 AS x2, 5 AS x3) + SELECT * FROM s RIGHT SEMI JOIN t USING (x1)"; + let plan = logical_plan(sql).unwrap(); + assert_eq!(plan.schema().field_names(), right_expected); + + let sql = "WITH + s AS (SELECT 1 AS x1, 2 AS x2, 3 AS x3), + t AS (SELECT 1 AS x1, 4 AS x2, 5 AS x3) + SELECT * FROM s LEFT ANTI JOIN t USING (x1)"; + let plan = logical_plan(sql).unwrap(); + assert_eq!(plan.schema().field_names(), left_expected); + + let sql = "WITH + s AS (SELECT 1 AS x1, 2 AS x2, 3 AS x3), + t AS (SELECT 1 AS x1, 4 AS x2, 5 AS x3) + SELECT * FROM s RIGHT ANTI JOIN t USING (x1)"; + let plan = logical_plan(sql).unwrap(); + assert_eq!(plan.schema().field_names(), right_expected); +} + #[test] fn test_2_nested_lateral_join_with_the_deepest_join_referencing_the_outer_most_relation() {